Skip to content

Commit ce32a1c

Browse files
Implement canonicalize and canonicalize_rref routines.
1 parent 9fc98a3 commit ce32a1c

36 files changed

+2354
-634
lines changed

benchmark/KernelAbstractions/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
2. Ensure that all the packages listed in `implementation/imports.jl` are installed as this is not handled automatically.
55
3. Ensure that the backend package(s) listed in the pertinent `benchmark_platform_*.jl` script are properly setup and configured.
66
4. Pass said script as an argument to julia (optionally, also set the number of executing host threads) and await for the benchmark to conclude.
7-
5. Navigate to the newly created directory in order to inspect the results.
7+
5. Navigate to the newly created directory (hierarchy) and find the matching platform/timestamp pair in order to inspect the results.
88

99
# Noteworthy Details
1010

benchmark/KernelAbstractions/benchmark_platform_CUDA.jl

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
11
include("implementation/benchmark_platform.jl")
22

3-
using Dates: value, now, UNIXEPOCH
43
using CUDA: CuArray, devices, synchronize
54
const AT = CuArray
6-
const path = "CUDA_benchmark_" * string(value(now()) - UNIXEPOCH)
5+
const path = "QuantumClifford_benchmarks/CUDA"
76

87
const can_run = length(devices()) > 0
98

benchmark/KernelAbstractions/benchmark_platform_OpenCL.jl

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
include("implementation/benchmark_platform.jl")
22

3-
using Dates: value, now, UNIXEPOCH
43
import pocl_jll
54
using OpenCL: CLArray, cl.devices, cl.platforms, cl.finish, cl.queue
65
const AT = CLArray
7-
const path = "OpenCL_benchmark_" * string(value(now()) - UNIXEPOCH)
6+
const path = "QuantumClifford_benchmarks/OpenCL"
87

98
const can_run = any(length(devices(platform)) > 0 for platform in platforms())
109

benchmark/KernelAbstractions/benchmark_platform_ROCm.jl

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
11
include("implementation/benchmark_platform.jl")
22

3-
using Dates: value, now, UNIXEPOCH
43
using AMDGPU: ROCArray, devices, synchronize
54
const AT = ROCArray
6-
const path = "ROCm_benchmark_" * string(value(now()) - UNIXEPOCH)
5+
const path = "QuantumClifford_benchmarks/ROCm"
76

87
const can_run = length(devices()) > 0
98

benchmark/KernelAbstractions/implementation/benchmark_KA_mul_leftright.jl

Lines changed: 33 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,35 @@
11
# This must be done explicitly as they are not exported.
22
using QuantumClifford: mul_left!, mul_right!, Tableau
33

4-
@inline host_f(x, y; phases::Val{phase_B} = Val(true)) where {phase_B} =
4+
@inline function host_f!(
5+
x, y;
6+
phases::Val{phase_B} = Val(default_phases)
7+
) where {phase_B}
8+
59
mul_left!(x, y; phases = phases)
610

7-
@inline function device_f(
11+
end
12+
13+
@inline function device_f!(
814
x, y, synchronize;
9-
phases::Val{phase_B} = Val(true),
15+
phases::Val{phase_B} = Val(default_phases),
16+
primary_axis::Val{primary_axis_E} = Val(default_primary_axis),
1017
block_size::Val{block_SZ} = Val(default_block_size),
1118
batch_size::Val{batch_SZ} = Val(default_batch_size)
12-
) where {phase_B, block_SZ, batch_SZ}
19+
) where {phase_B, primary_axis_E, block_SZ, batch_SZ}
1320

1421
mul_left!(
1522
x, y;
16-
phases = phases, block_size = block_size, batch_size = batch_size
23+
phases = phases, primary_axis = primary_axis,
24+
block_size = block_size, batch_size = batch_size
1725
)
1826
synchronize()
1927

2028
end
2129

2230
@inline function benchmark_KA_mul_pauli_pauli(
2331
AT, synchronize, path;
24-
phases::Val{phase_B} = Val(true)
32+
phases::Val{phase_B} = Val(default_phases)
2533
) where {phase_B}
2634

2735
host_time = zeros(Float64, length(n_MiB))
@@ -46,17 +54,17 @@ end
4654
d_p2 = copy(d_p1)
4755
synchronize()
4856
# Trigger compilation before benchmarking.
49-
host_f(h_p1, h_p2; phases = phases)
50-
host_time[i] = @belapsed host_f(
57+
host_f!(h_p1, h_p2; phases = phases)
58+
host_time[i] = @belapsed host_f!(
5159
$h_p1, $h_p2; phases = $phases
5260
) evals = evals samples = samples seconds = seconds
5361
for (j, size) in enumerate(batch_sizes)
54-
device_f(
62+
device_f!(
5563
d_p1, d_p2, synchronize;
5664
phases = phases, batch_size = Val(size)
5765
)
5866
device_time[j, i] =
59-
@belapsed device_f(
67+
@belapsed device_f!(
6068
$d_p1, $d_p2, $synchronize;
6169
phases = $phases, batch_size = Val($size)
6270
) evals = evals samples = samples seconds = seconds
@@ -72,28 +80,30 @@ end
7280
string(Sys.CPU_THREADS) * ", Device block size = $default_block_size"
7381
xlabel = "Pauli operator size (MiB)"
7482
label = hcat(("Device - batch size = " .* string.(batch_sizes))..., "Host")
83+
path *= "/pauli_pauli"
84+
mkpath(path)
7585

7686
plot(
7787
n_MiB, 10^3 .* hcat(device_cat..., host_time);
7888
shape = :circle, xticks = n_MiB, xscale = :log2, yscale = :log10,
7989
title = title, label = label, xlabel = xlabel, ylabel = "Runtime (ms)",
8090
background_color = :transparent
8191
)
82-
savefig("$path/runtime_pauli_pauli_phase_$phase_B.$format")
92+
savefig("$path/runtime.$format")
8393

8494
plot(
8595
n_MiB, map(x -> host_time ./ x, device_cat);
8696
shape = :circle, xticks = n_MiB, xscale = :log2, title = title,
8797
label = hcat(label[1 : end - 1]...), xlabel = xlabel,
8898
ylabel = "Ratio (host/device)", background_color = :transparent
8999
)
90-
savefig("$path/ratio_pauli_pauli_phase_$phase_B.$format")
100+
savefig("$path/ratio.$format")
91101

92102
end
93103

94104
@inline function benchmark_KA_mul_tableau_pauli(
95105
AT, synchronize, path;
96-
phases::Val{phase_B} = Val(true)
106+
phases::Val{phase_B} = Val(default_phases)
97107
) where {phase_B}
98108

99109
host_time = zeros(Float64, length(n_MiB))
@@ -126,17 +136,17 @@ end
126136
)
127137
synchronize()
128138
# Trigger compilation before benchmarking.
129-
host_f(h_t, h_p; phases = phases)
130-
host_time[i] = @belapsed host_f(
139+
host_f!(h_t, h_p; phases = phases)
140+
host_time[i] = @belapsed host_f!(
131141
$h_t, $h_p; phases = $phases
132142
) evals = evals samples = samples seconds = seconds
133143
for (j, size) in enumerate(batch_sizes)
134-
device_f(
144+
device_f!(
135145
d_t, d_p, synchronize;
136146
phases = phases, batch_size = Val(size)
137147
)
138148
device_time[j, i] =
139-
@belapsed device_f(
149+
@belapsed device_f!(
140150
$d_t, $d_p, $synchronize;
141151
phases = $phases, batch_size = Val($size)
142152
) evals = evals samples = samples seconds = seconds
@@ -152,32 +162,33 @@ end
152162
string(Sys.CPU_THREADS) * ", Device block size = $default_block_size"
153163
xlabel = "Tableau size (MiB)"
154164
label = hcat(("Device - batch size = " .* string.(batch_sizes))..., "Host")
165+
path *= "/tableau_pauli"
166+
mkpath(path)
155167

156168
plot(
157169
n_MiB, 10^3 .* hcat(device_cat..., host_time);
158170
shape = :circle, xticks = n_MiB, xscale = :log2, yscale = :log10,
159171
title = title, label = label, xlabel = xlabel, ylabel = "Runtime (ms)",
160172
background_color = :transparent
161173
)
162-
savefig("$path/runtime_tableau_pauli_phase_$phase_B.$format")
174+
savefig("$path/runtime.$format")
163175

164176
plot(
165177
n_MiB, map(x -> host_time ./ x, device_cat);
166178
shape = :circle, xticks = n_MiB, xscale = :log2, title = title,
167179
label = hcat(label[1 : end - 1]...), xlabel = xlabel,
168180
ylabel = "Ratio (host/device)", background_color = :transparent
169181
)
170-
savefig("$path/ratio_tableau_pauli_phase_$phase_B.$format")
182+
savefig("$path/ratio.$format")
171183

172184
end
173185

174186
@inline function benchmark_KA_mul_leftright(
175187
AT, synchronize, path;
176-
phases::Val{phase_B} = Val(true)
188+
phases::Val{phase_B} = Val(default_phases)
177189
) where {phase_B}
178190

179-
path = "$path/mul_leftright"
180-
mkpath(path)
191+
path *= "/mul_leftright/phase_$phase_B"
181192
benchmark_KA_mul_pauli_pauli(AT, synchronize, path; phases = phases)
182193
benchmark_KA_mul_tableau_pauli(AT, synchronize, path; phases = phases)
183194

benchmark/KernelAbstractions/implementation/benchmark_platform.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ include("utilities.jl")
44
include("benchmark_KA_mul_leftright.jl")
55

66
@inline function benchmark_platform(AT, synchronize, path)
7+
path *= "/" * string(value(now()) - UNIXEPOCH)
78
benchmark_KA_mul_leftright(AT, synchronize, path; phases = Val(true))
89
benchmark_KA_mul_leftright(AT, synchronize, path; phases = Val(false))
910
end

benchmark/KernelAbstractions/implementation/definitions.jl

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@ const n_MiB = [2^i for i = 1:10]
1616
# TODO: Keep these or remove them now that a good default has been set?
1717
const batch_sizes = [1, 4, 8, 16, 32, 64]
1818

19-
# These values are inaccessible since they originate from a package extension.
20-
const default_block_size = 256
21-
const default_batch_size = 32
19+
# These values originate from a package extension, hence the query.
20+
const KAExt = Base.get_extension(QuantumClifford, :QuantumCliffordKAExt)
21+
const default_phases = KAExt.default_phases
22+
const default_primary_axis = KAExt.default_primary_axis
23+
const default_block_size = KAExt.default_block_size
24+
const default_batch_size = KAExt.default_batch_size

benchmark/KernelAbstractions/implementation/imports.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import Atomix, GPUArraysCore, KernelAbstractions
33

44
using BenchmarkTools: @belapsed
5+
using Dates: value, now, UNIXEPOCH
56
# Assists in reducing resource demands.
67
using GPUArrays: AllocCache, @cached, unsafe_free!
78
using Plots: plot, savefig
Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# Works even when broadcasting on zero-dimensional arrays.
2-
@inline u32(v) = map(x -> UInt32(x), v)
2+
@inline function u32(v)
3+
return map(x -> UInt32(x), v)
4+
end
35

46
# By definition, the size of (unsigned) char is set to unity.
5-
@inline bit_count(::Type{T}) where {T} = sizeof(T) * count_zeros(zero(Cuchar))
7+
@inline function bit_count(::Type{T}) where {T}
8+
return sizeof(T) * count_zeros(zero(Cuchar))
9+
end

ext/QuantumCliffordKAExt/QuantumCliffordKAExt.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ include("definitions.jl")
77
include("../../src/throws.jl")
88
include("utilities.jl")
99
include("mul_leftright.jl")
10+
include("canonicalization.jl")
1011

1112
end
1213
#=============================================================================#

0 commit comments

Comments
 (0)