QuantumSavory
diff --git a/‎benchmark/KernelAbstractions/README.md‎
Lines changed: 1 addition & 1 deletion b/‎benchmark/KernelAbstractions/README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎benchmark/KernelAbstractions/benchmark_platform_CUDA.jl‎
Lines changed: 5 additions & 2 deletions b/‎benchmark/KernelAbstractions/benchmark_platform_CUDA.jl‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎benchmark/KernelAbstractions/benchmark_platform_OpenCL.jl‎
Lines changed: 5 additions & 2 deletions b/‎benchmark/KernelAbstractions/benchmark_platform_OpenCL.jl‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎benchmark/KernelAbstractions/benchmark_platform_ROCm.jl‎
Lines changed: 5 additions & 2 deletions b/‎benchmark/KernelAbstractions/benchmark_platform_ROCm.jl‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎benchmark/KernelAbstractions/implementation/benchmark_KA_mul_leftright.jl‎
Lines changed: 0 additions & 195 deletions b/‎benchmark/KernelAbstractions/implementation/benchmark_KA_mul_leftright.jl‎
Lines changed: 0 additions & 195 deletions
diff --git a/‎benchmark/KernelAbstractions/implementation/benchmark_platform.jl‎
Lines changed: 14 additions & 4 deletions b/‎benchmark/KernelAbstractions/implementation/benchmark_platform.jl‎
Lines changed: 14 additions & 4 deletions
diff --git a/‎benchmark/KernelAbstractions/implementation/definitions.jl‎
Lines changed: 5 additions & 23 deletions b/‎benchmark/KernelAbstractions/implementation/definitions.jl‎
Lines changed: 5 additions & 23 deletions
diff --git a/‎benchmark/KernelAbstractions/implementation/definitions/benchmark_configuration.jl‎
Lines changed: 54 additions & 0 deletions b/‎benchmark/KernelAbstractions/implementation/definitions/benchmark_configuration.jl‎
Lines changed: 54 additions & 0 deletions
diff --git a/‎benchmark/KernelAbstractions/implementation/definitions/plot_configuration.jl‎
Lines changed: 13 additions & 0 deletions b/‎benchmark/KernelAbstractions/implementation/definitions/plot_configuration.jl‎
Lines changed: 13 additions & 0 deletions
@@ -1,6 +1,6 @@
 # Usage Instructions
 
-1. Modify the parameters listed in `implementation/definitions.jl` as desired, keeping in mind the limitations of the available device memory and the extent of the benchmark runtime.
+1. Modify the values listed in `implementation/definitions/[benchmark_configuration, tuning_parameters].jl` as desired, keeping in mind the limitations of the available device memory and the extent of the benchmark runtime.
 2. Ensure that all the packages listed in `implementation/imports.jl` are installed as this is not handled automatically.
 3. Ensure that the backend package(s) listed in the pertinent `benchmark_platform_*.jl` script are properly setup and configured.
 4. Pass said script as an argument to julia (optionally, also set the number of executing host threads) and await for the benchmark to conclude.
 
@@ -1,13 +1,16 @@
+
+#=============================================================================#
 include("implementation/benchmark_platform.jl")
 
 using CUDA: CuArray, devices, synchronize
 const AT = CuArray
-const path = "QuantumClifford_benchmarks/CUDA"
+const path = "benchmarks/QuantumCliffordKAExt/CUDA"
 
 const can_run = length(devices()) > 0
 
 if can_run
-    benchmark_platform(AT, synchronize, path)
+    benchmark_platform(synchronize, AT, path)
 else
     @info "Unable to run CUDA benchmark. No suitable device was found."
 end
+#=============================================================================#
@@ -1,15 +1,18 @@
+
+#=============================================================================#
 include("implementation/benchmark_platform.jl")
 
 import pocl_jll
 using OpenCL: CLArray, cl.devices, cl.platforms, cl.finish, cl.queue
 const AT = CLArray
-const path = "QuantumClifford_benchmarks/OpenCL"
+const path = "benchmarks/QuantumCliffordKAExt/OpenCL"
 
 const can_run = any(length(devices(platform)) > 0 for platform in platforms())
 
 if can_run
     synchronize() = finish(queue())
-    benchmark_platform(AT, synchronize, path)
+    benchmark_platform(synchronize, AT, path)
 else
     @info "Unable to run OpenCL benchmark. No suitable device was found."
 end
+#=============================================================================#
@@ -1,13 +1,16 @@
+
+#=============================================================================#
 include("implementation/benchmark_platform.jl")
 
 using AMDGPU: ROCArray, devices, synchronize
 const AT = ROCArray
-const path = "QuantumClifford_benchmarks/ROCm"
+const path = "benchmarks/QuantumCliffordKAExt/ROCm"
 
 const can_run = length(devices()) > 0
 
 if can_run
-    benchmark_platform(AT, synchronize, path)
+    benchmark_platform(synchronize, AT, path)
 else
     @info "Unable to run ROCm benchmark. No suitable device was found."
 end
+#=============================================================================#
@@ -1,10 +1,20 @@
+
+#=============================================================================#
 include("imports.jl")
 include("definitions.jl")
 include("utilities.jl")
-include("benchmark_KA_mul_leftright.jl")
 
-@inline function benchmark_platform(AT, synchronize, path)
+include("suites/benchmark_KA_mul.jl")
+include("suites/benchmark_KA_canonicalization.jl")
+
+@inline function benchmark_platform(synchronize, AT, path)::Nothing
+    # BenchmarkTools evaluates the setup block at the global scope.
+    global cache = AllocCache()
     path *= "/" * string(value(now()) - UNIXEPOCH)
-    benchmark_KA_mul_leftright(AT, synchronize, path; phases = Val(true))
-    benchmark_KA_mul_leftright(AT, synchronize, path; phases = Val(false))
+
+    benchmark_KA_mul(synchronize, AT, path)
+    benchmark_KA_canonicalization(synchronize, AT, path)
+
+    return nothing
 end
+#=============================================================================#
@@ -1,24 +1,6 @@
-# (La)TeX hates SVG but the Plots package has issues with transparent PDFs.
-const format = "svg"
 
-# BenchmarkTools parameters.
-# Evaluations per sample point.
-const evals = 16
-# Maximum number of samples.
-const samples = 2^10
-# Maximum runtime for each sample group.
-const seconds = 60
-
-# By definition, (unsigned) char is the smallest addressable unit of memory.
-const MiB = 1024 * 1024 * count_zeros(zero(Cuchar))
-# Avoid consuming too many resources, 1 GiB is plenty.
-const n_MiB = [2^i for i = 1:10]
-# TODO: Keep these or remove them now that a good default has been set?
-const batch_sizes = [1, 4, 8, 16, 32, 64]
-
-# These values originate from a package extension, hence the query.
-const KAExt = Base.get_extension(QuantumClifford, :QuantumCliffordKAExt)
-const default_phases = KAExt.default_phases
-const default_primary_axis = KAExt.default_primary_axis
-const default_block_size = KAExt.default_block_size
-const default_batch_size = KAExt.default_batch_size
+#=============================================================================#
+include("definitions/benchmark_configuration.jl")
+include("definitions/plot_configuration.jl")
+include("definitions/tuning_parameters.jl")
+#=============================================================================#
@@ -0,0 +1,54 @@
+
+#=============================================================================#
+
+#==============================================================================
+BENCHMARK TOOLS
+==============================================================================#
+
+# CAUTION: Functions mutate their arguments, induces disparity between runs.
+# Evaluations per sample point.
+const evals = 1
+
+# Maximum number of sample points.
+const samples = 2^14
+
+# Maximum runtime for each trial.
+const seconds = 60
+
+#==============================================================================
+SAMPLE EXTRAPOLATION
+==============================================================================#
+
+# Maximum sampling period before before being aborted and extrapolated instead.
+const extrapolation_threshold = seconds << 1
+
+# Whether to include the aborted run in the data set used for extrapolation.
+const include_threshold_point = false
+
+# In the absence of sufficient data points for a fit, perform O(n^k) scaling.
+const host_permit_simple_scaling = true
+const device_permit_simple_scaling = true
+
+#==============================================================================
+PROBLEM SIZE
+==============================================================================#
+
+# By definition, (unsigned) char is the smallest addressable unit of memory.
+const MiB = 1024 * 1024 * count_zeros(zero(Cuchar))
+
+# Avoid consuming too many resources, 1 GiB is plenty.
+const sizes_MiB = [2^i for i in 1 : 10]
+
+#==============================================================================
+TUNING PARAMETERS
+==============================================================================#
+
+const benchmark_primary_axis = true
+
+const benchmark_phases = true
+
+# TODO: Enable this by default once the POCL code generation bugs are fixed.
+const benchmark_block_size = false
+
+const benchmark_batch_size = true
+#=============================================================================#
@@ -0,0 +1,13 @@
+
+#=============================================================================#
+# The output should be both stylish and informative.
+const plot_style = Dict(
+    :xticks => sizes_MiB,
+    :xscale => :log2,
+    :shape => :circle,
+    :background_color => :transparent
+    )
+
+# (La)TeX hates SVG but the Plots package has issues with transparent PDFs.
+const file_format = "svg"
+#=============================================================================#