Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions benchmark/KernelAbstractions/README.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
# Usage Instructions

1. Modify the parameters listed in `implementation/definitions.jl` as desired, keeping in mind the limitations of the available device memory and the extent of the benchmark runtime.
1. Modify the values listed in `implementation/definitions/[benchmark_configuration, tuning_parameters].jl` as desired, keeping in mind the limitations of the available device memory and the extent of the benchmark runtime.
2. Ensure that all the packages listed in `implementation/imports.jl` are installed as this is not handled automatically.
3. Ensure that the backend package(s) listed in the pertinent `benchmark_platform_*.jl` script are properly setup and configured.
4. Pass said script as an argument to julia (optionally, also set the number of executing host threads) and await for the benchmark to conclude.
5. Navigate to the newly created directory in order to inspect the results.
5. Navigate to the newly created directory (hierarchy) and find the matching platform/timestamp pair in order to inspect the results.

# Noteworthy Details

Expand Down
8 changes: 5 additions & 3 deletions benchmark/KernelAbstractions/benchmark_platform_CUDA.jl
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@

#=============================================================================#
include("implementation/benchmark_platform.jl")

using Dates: value, now, UNIXEPOCH
using CUDA: CuArray, devices, synchronize
const AT = CuArray
const path = "CUDA_benchmark_" * string(value(now()) - UNIXEPOCH)
const path = "benchmarks/QuantumCliffordKAExt/CUDA"

const can_run = length(devices()) > 0

if can_run
benchmark_platform(AT, synchronize, path)
benchmark_platform(synchronize, AT, path)
else
@info "Unable to run CUDA benchmark. No suitable device was found."
end
#=============================================================================#
8 changes: 5 additions & 3 deletions benchmark/KernelAbstractions/benchmark_platform_OpenCL.jl
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@

#=============================================================================#
include("implementation/benchmark_platform.jl")

using Dates: value, now, UNIXEPOCH
import pocl_jll
using OpenCL: CLArray, cl.devices, cl.platforms, cl.finish, cl.queue
const AT = CLArray
const path = "OpenCL_benchmark_" * string(value(now()) - UNIXEPOCH)
const path = "benchmarks/QuantumCliffordKAExt/OpenCL"

const can_run = any(length(devices(platform)) > 0 for platform in platforms())

if can_run
synchronize() = finish(queue())
benchmark_platform(AT, synchronize, path)
benchmark_platform(synchronize, AT, path)
else
@info "Unable to run OpenCL benchmark. No suitable device was found."
end
#=============================================================================#
8 changes: 5 additions & 3 deletions benchmark/KernelAbstractions/benchmark_platform_ROCm.jl
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@

#=============================================================================#
include("implementation/benchmark_platform.jl")

using Dates: value, now, UNIXEPOCH
using AMDGPU: ROCArray, devices, synchronize
const AT = ROCArray
const path = "ROCm_benchmark_" * string(value(now()) - UNIXEPOCH)
const path = "benchmarks/QuantumCliffordKAExt/ROCm"

const can_run = length(devices()) > 0

if can_run
benchmark_platform(AT, synchronize, path)
benchmark_platform(synchronize, AT, path)
else
@info "Unable to run ROCm benchmark. No suitable device was found."
end
#=============================================================================#

This file was deleted.

19 changes: 15 additions & 4 deletions benchmark/KernelAbstractions/implementation/benchmark_platform.jl
Original file line number Diff line number Diff line change
@@ -1,9 +1,20 @@

#=============================================================================#
include("imports.jl")
include("definitions.jl")
include("utilities.jl")
include("benchmark_KA_mul_leftright.jl")

@inline function benchmark_platform(AT, synchronize, path)
benchmark_KA_mul_leftright(AT, synchronize, path; phases = Val(true))
benchmark_KA_mul_leftright(AT, synchronize, path; phases = Val(false))
include("suites/benchmark_KA_mul.jl")
include("suites/benchmark_KA_canonicalization.jl")

@inline function benchmark_platform(synchronize, AT, path)::Nothing
# BenchmarkTools evaluates the setup block at the global scope.
global cache = AllocCache()
path *= "/" * string(value(now()) - UNIXEPOCH)

benchmark_KA_mul(synchronize, AT, path)
benchmark_KA_canonicalization(synchronize, AT, path)

return nothing
end
#=============================================================================#
25 changes: 5 additions & 20 deletions benchmark/KernelAbstractions/implementation/definitions.jl
Original file line number Diff line number Diff line change
@@ -1,21 +1,6 @@
# (La)TeX hates SVG but the Plots package has issues with transparent PDFs.
const format = "svg"

# BenchmarkTools parameters.
# Evaluations per sample point.
const evals = 16
# Maximum number of samples.
const samples = 2^10
# Maximum runtime for each sample group.
const seconds = 60

# By definition, (unsigned) char is the smallest addressable unit of memory.
const MiB = 1024 * 1024 * count_zeros(zero(Cuchar))
# Avoid consuming too many resources, 1 GiB is plenty.
const n_MiB = [2^i for i = 1:10]
# TODO: Keep these or remove them now that a good default has been set?
const batch_sizes = [1, 4, 8, 16, 32, 64]

# These values are inaccessible since they originate from a package extension.
const default_block_size = 256
const default_batch_size = 32
#=============================================================================#
include("definitions/benchmark_configuration.jl")
include("definitions/plot_configuration.jl")
include("definitions/tuning_parameters.jl")
#=============================================================================#
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@

#=============================================================================#

#==============================================================================
BENCHMARK TOOLS
==============================================================================#

# CAUTION: Functions mutate their arguments, induces disparity between runs.
# Evaluations per sample point.
const evals = 1

# Maximum number of sample points.
const samples = 2^14

# Maximum runtime for each trial.
const seconds = 60

#==============================================================================
SAMPLE EXTRAPOLATION
==============================================================================#

# Maximum sampling period before before being aborted and extrapolated instead.
const extrapolation_threshold = seconds << 1

# Whether to include the aborted run in the data set used for extrapolation.
const include_threshold_point = false

# In the absence of sufficient data points for a fit, perform O(n^k) scaling.
const host_permit_simple_scaling = true
const device_permit_simple_scaling = true

#==============================================================================
PROBLEM SIZE
==============================================================================#

# By definition, (unsigned) char is the smallest addressable unit of memory.
const MiB = 1024 * 1024 * count_zeros(zero(Cuchar))

# Avoid consuming too many resources, 1 GiB is plenty.
const sizes_MiB = [2^i for i in 1 : 10]

#==============================================================================
TUNING PARAMETERS
==============================================================================#

const benchmark_primary_axis = true

const benchmark_phases = true

# TODO: Enable this by default once the POCL code generation bugs are fixed.
const benchmark_block_size = false

const benchmark_batch_size = true
#=============================================================================#
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@

#=============================================================================#
# The output should be both stylish and informative.
const plot_style = Dict(
:xticks => sizes_MiB,
:xscale => :log2,
:shape => :circle,
:background_color => :transparent
)

# (La)TeX hates SVG but the Plots package has issues with transparent PDFs.
const file_format = "svg"
#=============================================================================#
Loading
Loading