diff --git a/devops/scripts/benchmarks/CONTRIB.md b/devops/scripts/benchmarks/CONTRIB.md index fb1964dad7fe8..3ddc5e9bc1d72 100644 --- a/devops/scripts/benchmarks/CONTRIB.md +++ b/devops/scripts/benchmarks/CONTRIB.md @@ -18,11 +18,10 @@ The suite is structured around four main components: Suites, Benchmarks, Results * Represent a single benchmark, usually mapping to a binary execution. * Must implement the `Benchmark` base class (`benches/base.py`). * **Required Methods:** - * `setup()`: Initializes the benchmark (e.g., build, download data). Use `self.download()` for data dependencies. **Do not** perform setup in `__init__`. * `run(env_vars)`: Executes the benchmark binary (use `self.run_bench()`) and returns a list of `Result` objects. Can be called multiple times, must produce consistent results. - * `teardown()`: Cleans up resources. Can be empty. No need to remove build artifacts or downloaded datasets. * `name()`: Returns a unique identifier string for the benchmark across *all* suites. If a benchmark class is instantiated multiple times with different parameters (e.g., "Submit In Order", "Submit Out Of Order"), the `name()` must reflect this uniqueness. * **Optional Methods:** + * `setup()`: Initializes the benchmark (e.g., build, download data). Use `self.download()` for data dependencies. **Do not** perform setup in `__init__`. * `lower_is_better()`: Returns `True` if lower result values are better (default: `True`). * `description()`: Provides a short description about the benchmark. * `notes()`: Provides additional commentary about the benchmark results (string). @@ -163,9 +162,49 @@ The benchmark suite generates an interactive HTML dashboard that visualizes `Res **Stability:** * Mark unstable benchmarks with `metadata.unstable` to hide them by default. +## Code Style Guidelines + +### Benchmark Class Structure + +When creating benchmark classes, follow this consistent structure pattern: + +**1. Constructor (`__init__`):** +* Assign all parameters to protected (prefixed with `_`) or private (prefixed with `__`) instance variables. +* Set `self._iterations_regular` and `self._iterations_trace` BEFORE calling `super().__init__()` (required for subclasses of `ComputeBenchmark`). + +**2. Method Order:** +* Align with methods order as in the abstract base class `Benchmark`. Not all of them are required, but follow the order for consistency. +* Public methods first, then protected, then private. + +### Naming Conventions + +**Method Return Values:** +* `name()`: Unique identifier with underscores, lowercase, includes all distinguishing parameters + * Example: `"api_overhead_benchmark_sycl SubmitKernel in order with measure completion"` +* `display_name()`: User-friendly, uses proper capitalization, commas for readability, used for charts titles + * Example: `"SYCL SubmitKernel in order, with measure completion, NumKernels 10"` + +**Class method names and variables should follow PEP 8 guidelines.** +* Use lowercase with underscores for method names and variables. +* Use single underscores prefixes for protected variables/methods and double underscores for private variables/methods. + +### Description Writing + +Descriptions should: +* Clearly state what is being measured +* Include key parameters and their values +* Explain the purpose or what the benchmark tests +* Be 1-3 sentences, clear and concise +* If not needed, can be omitted + +### Tag Selection + +* Use predefined tags from `benches/base.py` when available +* Tags should be lowercase, descriptive, single words + ## Adding New Benchmarks -1. **Create Benchmark Class:** Implement a new class inheriting from `benches.base.Benchmark`. Implement required methods (`setup`, `run`, `teardown`, `name`) and optional ones (`description`, `get_tags`, etc.) as needed. +1. **Create Benchmark Class:** Implement a new class inheriting from `benches.base.Benchmark`. Implement required methods (`run`, `name`) and optional ones (`description`, `get_tags`, etc.) as needed. Follow the code style guidelines above. 2. **Add to Suite:** * If adding to an existing category, modify the corresponding `Suite` class (e.g., `benches/compute.py`) to instantiate and return your new benchmark in its `benchmarks()` method. * If creating a new category, create a new `Suite` class inheriting from `benches.base.Suite`. Implement `name()` and `benchmarks()`. Add necessary `setup()` if the suite requires shared setup. Add group metadata via `additional_metadata()` if needed. diff --git a/devops/scripts/benchmarks/benches/base.py b/devops/scripts/benchmarks/benches/base.py index bbbedaf629bf5..e5b29633496b7 100644 --- a/devops/scripts/benchmarks/benches/base.py +++ b/devops/scripts/benchmarks/benches/base.py @@ -56,6 +56,28 @@ def __init__(self, suite): def name(self) -> str: pass + @abstractmethod + def run( + self, + env_vars, + run_trace: TracingType = TracingType.NONE, + force_trace: bool = False, + ) -> list[Result]: + """Execute the benchmark with the given environment variables. + + Args: + env_vars: Environment variables to use when running the benchmark. + run_trace: The type of tracing to run (NONE, UNITRACE, or FLAMEGRAPH). + force_trace: If True, ignore the traceable() method and force tracing. + + Returns: + A list of Result objects with the benchmark results. + + Raises: + Exception: If the benchmark fails for any reason. + """ + pass + def display_name(self) -> str: """Returns a user-friendly name for display in charts. By default returns the same as name(), but can be overridden. @@ -87,44 +109,6 @@ def setup(self): """Extra setup steps to be performed before running the benchmark.""" pass - @abstractmethod - def teardown(self): - pass - - @abstractmethod - def run( - self, - env_vars, - run_trace: TracingType = TracingType.NONE, - force_trace: bool = False, - ) -> list[Result]: - """Execute the benchmark with the given environment variables. - - Args: - env_vars: Environment variables to use when running the benchmark. - run_trace: The type of tracing to run (NONE, UNITRACE, or FLAMEGRAPH). - force_trace: If True, ignore the traceable() method and force tracing. - - Returns: - A list of Result objects with the benchmark results. - - Raises: - Exception: If the benchmark fails for any reason. - """ - pass - - @staticmethod - def get_adapter_full_path(): - for libs_dir_name in ["lib", "lib64"]: - adapter_path = os.path.join( - options.ur, libs_dir_name, f"libur_adapter_{options.ur_adapter}.so" - ) - if os.path.isfile(adapter_path): - return adapter_path - assert ( - False - ), f"could not find adapter file {adapter_path} (and in similar lib paths)" - def run_bench( self, command, @@ -268,6 +252,18 @@ def get_metadata(self) -> dict[str, BenchmarkMetadata]: ) } + @staticmethod + def get_adapter_full_path(): + for libs_dir_name in ["lib", "lib64"]: + adapter_path = os.path.join( + options.ur, libs_dir_name, f"libur_adapter_{options.ur_adapter}.so" + ) + if os.path.isfile(adapter_path): + return adapter_path + assert ( + False + ), f"could not find adapter file {adapter_path} (and in similar lib paths)" + class Suite(ABC): @abstractmethod diff --git a/devops/scripts/benchmarks/benches/benchdnn.py b/devops/scripts/benchmarks/benches/benchdnn.py index 66ea6733e852d..198b8048e992d 100644 --- a/devops/scripts/benchmarks/benches/benchdnn.py +++ b/devops/scripts/benchmarks/benches/benchdnn.py @@ -94,9 +94,6 @@ def setup(self) -> None: timeout=60 * 20, ) - def teardown(self): - pass - class OneDnnBenchmark(Benchmark): def __init__(self, suite, bench_driver, bench_name, bench_args, syclgraph=True): @@ -210,6 +207,3 @@ def _extract_time(self, output): if values: return sum(values) return 0.0 - - def teardown(self): - pass diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py index f59f2e9b9bf83..ea77a7b35192d 100644 --- a/devops/scripts/benchmarks/benches/compute.py +++ b/devops/scripts/benchmarks/benches/compute.py @@ -52,8 +52,8 @@ def runtime_to_tag_name(runtime: RUNTIMES) -> str: class ComputeBench(Suite): def __init__(self): - self.submit_graph_num_kernels = [4, 10, 32] - self.project = None + self._submit_graph_num_kernels = [4, 10, 32] + self._project = None def name(self) -> str: return "Compute Benchmarks" @@ -69,8 +69,8 @@ def setup(self) -> None: if options.sycl is None: return - if self.project is None: - self.project = GitProject( + if self._project is None: + self._project = GitProject( self.git_url(), self.git_hash(), Path(options.workdir), @@ -101,8 +101,8 @@ def setup(self) -> None: f"-Dunified-runtime_DIR={options.ur}/lib/cmake/unified-runtime", ] - self.project.configure(extra_args, add_sycl=True) - self.project.build(add_sycl=True) + self._project.configure(extra_args, add_sycl=True) + self._project.build(add_sycl=True) def additional_metadata(self) -> dict[str, BenchmarkMetadata]: """ @@ -197,7 +197,7 @@ def benchmarks(self) -> list[Benchmark]: submit_graph_params = product( list(RUNTIMES), [0, 1], # in_order_queue - self.submit_graph_num_kernels, + self._submit_graph_num_kernels, [0, 1], # measure_completion_time [0, 1], # use_events ) @@ -317,94 +317,22 @@ def __init__( profiler_type: PROFILERS = PROFILERS.TIMER, ): super().__init__(suite) - self.suite = suite - self.bench_name = name - self.test = test - self.runtime = runtime - self.profiler_type = profiler_type + self._suite = suite + self._bench_name = name + self._test = test + self._runtime = runtime + self._profiler_type = profiler_type # Mandatory per-benchmark iteration counts. - # Subclasses MUST set both `self.iterations_regular` and - # `self.iterations_trace` (positive ints) in their __init__ before + # Subclasses MUST set both `self._iterations_regular` and + # `self._iterations_trace` (positive ints) in their __init__ before # calling super().__init__(). The base class enforces this. - self._validate_attr("iterations_regular") - self._validate_attr("iterations_trace") - - @property - def benchmark_bin(self) -> Path: - """Returns the path to the benchmark binary""" - return self.suite.project.build_dir / "bin" / self.bench_name - - def cpu_count_str(self, separator: str = "") -> str: - # Note: SYCL CI currently relies on this "CPU count" value. - # Please update /devops/scripts/benchmarks/compare.py if this value - # is changed. See compare.py usage (w.r.t. --regression-filter) in - # /devops/actions/run-tests/benchmarks/action.yml. - return ( - f"{separator} CPU count" - if self.profiler_type == PROFILERS.CPU_COUNTER - else "" - ) - - def get_iters(self, run_trace: TracingType): - """Returns the number of iterations to run for the given tracing type.""" - if options.exit_on_failure: - # we are just testing that the benchmark runs successfully - return 3 - if run_trace == TracingType.NONE: - return self.iterations_regular - return self.iterations_trace - - def supported_runtimes(self) -> list[RUNTIMES]: - """Base runtimes supported by this benchmark, can be overridden.""" - # By default, support all runtimes except SYCL_PREVIEW - return [r for r in RUNTIMES if r != RUNTIMES.SYCL_PREVIEW] - - def enabled_runtimes(self) -> list[RUNTIMES]: - """Runtimes available given the current configuration.""" - # Start with all supported runtimes and apply configuration filters - runtimes = self.supported_runtimes() - - # Remove UR if not available - if options.ur is None: - runtimes = [r for r in runtimes if r != RUNTIMES.UR] - - # Remove Level Zero if using CUDA backend - if options.ur_adapter == "cuda": - runtimes = [r for r in runtimes if r != RUNTIMES.LEVEL_ZERO] - - return runtimes - - def enabled(self) -> bool: - # SYCL is required for all benchmarks - if options.sycl is None: - return False - - # HIP adapter is not supported - if options.ur_adapter == "hip": - return False - - # Check if the specific runtime is enabled (or no specific runtime required) - return self.runtime is None or self.runtime in self.enabled_runtimes() + self.__validate_attr("_iterations_regular") + self.__validate_attr("_iterations_trace") def name(self): """Returns the name of the benchmark, can be overridden.""" - return self.bench_name - - def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]: - # Subclasses must implement this and include all flags except --iterations; - # the base `run()` will prepend the proper --iterations value based on - # `run_trace` and the subclass's `iterations_regular`/`iterations_trace`. - return [] - - def extra_env_vars(self) -> dict: - return {} - - def explicit_group(self): - return "" - - def description(self) -> str: - return "" + return self._bench_name def run( self, @@ -413,24 +341,24 @@ def run( force_trace: bool = False, ) -> list[Result]: command = [ - str(self.benchmark_bin), - f"--test={self.test}", + str(self.__benchmark_bin), + f"--test={self._test}", "--csv", "--noHeaders", ] # Let subclass provide remaining args; bin_args(run_trace) must # include the proper --iterations token computed from this class's # iteration fields. - command += self.bin_args(run_trace) - env_vars.update(self.extra_env_vars()) + command += self._bin_args(run_trace) + env_vars.update(self._extra_env_vars()) result = self.run_bench( command, env_vars, run_trace=run_trace, force_trace=force_trace ) - parsed_results = self.parse_output(result) + parsed_results = self.__parse_output(result) ret = [] for median, stddev in parsed_results: - unit = "instr" if self.profiler_type == PROFILERS.CPU_COUNTER else "μs" + unit = "instr" if self._profiler_type == PROFILERS.CPU_COUNTER else "μs" ret.append( Result( label=self.name(), @@ -439,13 +367,85 @@ def run( command=command, env=env_vars, unit=unit, - git_url=self.suite.git_url(), - git_hash=self.suite.git_hash(), + git_url=self._suite.git_url(), + git_hash=self._suite.git_hash(), ) ) return ret - def parse_output(self, output: str) -> list[tuple[float, float]]: + def explicit_group(self): + return "" + + def enabled(self) -> bool: + # SYCL is required for all benchmarks + if options.sycl is None: + return False + + # HIP adapter is not supported + if options.ur_adapter == "hip": + return False + + # Check if the specific runtime is enabled (or no specific runtime required) + return self._runtime is None or self._runtime in self.__enabled_runtimes() + + def description(self) -> str: + return "" + + def _cpu_count_str(self, separator: str = "") -> str: + # Note: SYCL CI currently relies on this "CPU count" value. + # Please update /devops/scripts/benchmarks/compare.py if this value + # is changed. See compare.py usage (w.r.t. --regression-filter) in + # /devops/actions/run-tests/benchmarks/action.yml. + return ( + f"{separator} CPU count" + if self._profiler_type == PROFILERS.CPU_COUNTER + else "" + ) + + def _get_iters(self, run_trace: TracingType): + """Returns the number of iterations to run for the given tracing type.""" + if options.exit_on_failure: + # we are just testing that the benchmark runs successfully + return 3 + if run_trace == TracingType.NONE: + return self._iterations_regular + return self._iterations_trace + + def _supported_runtimes(self) -> list[RUNTIMES]: + """Base runtimes supported by this benchmark, can be overridden.""" + # By default, support all runtimes except SYCL_PREVIEW + return [r for r in RUNTIMES if r != RUNTIMES.SYCL_PREVIEW] + + def _extra_env_vars(self) -> dict: + return {} + + def _bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]: + # Subclasses must implement this and include all flags except --iterations; + # the base `run()` will prepend the proper --iterations value based on + # `run_trace` and the subclass's `iterations_regular`/`iterations_trace`. + return [] + + @property + def __benchmark_bin(self) -> Path: + """Returns the path to the benchmark binary""" + return self._suite._project.build_dir / "bin" / self._bench_name + + def __enabled_runtimes(self) -> list[RUNTIMES]: + """Runtimes available given the current configuration.""" + # Start with all supported runtimes and apply configuration filters + runtimes = self._supported_runtimes() + + # Remove UR if not available + if options.ur is None: + runtimes = [r for r in runtimes if r != RUNTIMES.UR] + + # Remove Level Zero if using CUDA backend + if options.ur_adapter == "cuda": + runtimes = [r for r in runtimes if r != RUNTIMES.LEVEL_ZERO] + + return runtimes + + def __parse_output(self, output: str) -> list[tuple[float, float]]: csv_file = io.StringIO(output) reader = csv.reader(csv_file) next(reader, None) @@ -469,17 +469,14 @@ def parse_output(self, output: str) -> list[tuple[float, float]]: raise ValueError("Benchmark output does not contain data.") return results - def teardown(self): - return - - def _validate_attr(self, attr_name: str): + def __validate_attr(self, attr_name: str): if ( not hasattr(self, attr_name) or not isinstance(getattr(self, attr_name, None), int) or getattr(self, attr_name, 0) <= 0 ): raise ValueError( - f"{self.bench_name}: subclasses must set `{attr_name}` (positive int) before calling super().__init__" + f"{self._bench_name}: subclasses must set `{attr_name}` (positive int) before calling super().__init__" ) @@ -494,14 +491,14 @@ def __init__( KernelExecTime=1, profiler_type=PROFILERS.TIMER, ): - self.ioq = ioq - self.MeasureCompletion = MeasureCompletion - self.UseEvents = UseEvents - self.KernelExecTime = KernelExecTime - self.NumKernels = 10 + self._ioq = ioq + self._measure_completion = MeasureCompletion + self._use_events = UseEvents + self._kernel_exec_time = KernelExecTime + self._num_kernels = 10 # iterations set per existing bin_args: --iterations=100000 - self.iterations_regular = 100000 - self.iterations_trace = 10 + self._iterations_regular = 100000 + self._iterations_trace = 10 super().__init__( bench, f"api_overhead_benchmark_{runtime.value}", @@ -510,87 +507,89 @@ def __init__( profiler_type, ) - def supported_runtimes(self) -> list[RUNTIMES]: - return super().supported_runtimes() + [RUNTIMES.SYCL_PREVIEW] - - def enabled(self) -> bool: - # This is a workaround for the BMG server where we have old results for self.KernelExecTime=20 - # The benchmark instance gets created just to make metadata for these old results - if not super().enabled(): - return False - - device_arch = getattr(options, "device_architecture", "") - if "bmg" in device_arch and self.KernelExecTime == 20: - # Disable this benchmark for BMG server, just create metadata - return False - if "bmg" not in device_arch and self.KernelExecTime == 200: - # Disable KernelExecTime=200 for non-BMG systems, just create metadata - return False - return True - - def get_tags(self): - return ["submit", "latency", runtime_to_tag_name(self.runtime), "micro"] - def name(self): - order = "in order" if self.ioq else "out of order" - completion_str = " with measure completion" if self.MeasureCompletion else "" + order = "in order" if self._ioq else "out of order" + completion_str = " with measure completion" if self._measure_completion else "" # this needs to be inversed (i.e., using events is empty string) # to match the existing already stored results - events_str = " not using events" if not self.UseEvents else "" + events_str = " not using events" if not self._use_events else "" kernel_exec_time_str = ( - f" KernelExecTime={self.KernelExecTime}" if self.KernelExecTime != 1 else "" + f" KernelExecTime={self._kernel_exec_time}" + if self._kernel_exec_time != 1 + else "" ) - return f"api_overhead_benchmark_{self.runtime.value} SubmitKernel {order}{completion_str}{events_str}{kernel_exec_time_str}{self.cpu_count_str()}" + return f"api_overhead_benchmark_{self._runtime.value} SubmitKernel {order}{completion_str}{events_str}{kernel_exec_time_str}{self._cpu_count_str()}" def display_name(self) -> str: - order = "in order" if self.ioq else "out of order" + order = "in order" if self._ioq else "out of order" info = [] - if self.MeasureCompletion: + if self._measure_completion: info.append("with measure completion") - if self.UseEvents: + if self._use_events: info.append("using events") - if self.KernelExecTime != 1: - info.append(f"KernelExecTime={self.KernelExecTime}") + if self._kernel_exec_time != 1: + info.append(f"KernelExecTime={self._kernel_exec_time}") additional_info = f" {' '.join(info)}" if info else "" - return f"{self.runtime.value.upper()} SubmitKernel {order}{additional_info}, NumKernels {self.NumKernels}{self.cpu_count_str(separator=',')}" + return f"{self._runtime.value.upper()} SubmitKernel {order}{additional_info}, NumKernels {self._num_kernels}{self._cpu_count_str(separator=',')}" def explicit_group(self): - order = "in order" if self.ioq else "out of order" - completion_str = " with completion" if self.MeasureCompletion else "" - events_str = " using events" if self.UseEvents else "" + order = "in order" if self._ioq else "out of order" + completion_str = " with completion" if self._measure_completion else "" + events_str = " using events" if self._use_events else "" - kernel_exec_time_str = f" long kernel" if self.KernelExecTime != 1 else "" + kernel_exec_time_str = f" long kernel" if self._kernel_exec_time != 1 else "" - return f"SubmitKernel {order}{completion_str}{events_str}{kernel_exec_time_str}{self.cpu_count_str(separator=',')}" + return f"SubmitKernel {order}{completion_str}{events_str}{kernel_exec_time_str}{self._cpu_count_str(separator=',')}" + + def enabled(self) -> bool: + # This is a workaround for the BMG server where we have old results for self._kernel_exec_time=20 + # The benchmark instance gets created just to make metadata for these old results + if not super().enabled(): + return False + + device_arch = getattr(options, "device_architecture", "") + if "bmg" in device_arch and self._kernel_exec_time == 20: + # Disable this benchmark for BMG server, just create metadata + return False + if "bmg" not in device_arch and self._kernel_exec_time == 200: + # Disable KernelExecTime=200 for non-BMG systems, just create metadata + return False + return True def description(self) -> str: - order = "in-order" if self.ioq else "out-of-order" - runtime_name = runtime_to_name(self.runtime) - completion_desc = f", {'including' if self.MeasureCompletion else 'excluding'} kernel completion time" + order = "in-order" if self._ioq else "out-of-order" + runtime_name = runtime_to_name(self._runtime) + completion_desc = f", {'including' if self._measure_completion else 'excluding'} kernel completion time" return ( f"Measures CPU time overhead of submitting {order} kernels through {runtime_name} API{completion_desc}. " - f"Runs {self.NumKernels} simple kernels with minimal execution time to isolate API overhead from kernel execution time." - f"Each kernel executes for approximately {self.KernelExecTime} micro seconds." + f"Runs {self._num_kernels} simple kernels with minimal execution time to isolate API overhead from kernel execution time." + f"Each kernel executes for approximately {self._kernel_exec_time} micro seconds." ) + def get_tags(self): + return ["submit", "latency", runtime_to_tag_name(self._runtime), "micro"] + def range(self) -> tuple[float, float]: return (0.0, None) - def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]: - iters = self.get_iters(run_trace) + def _supported_runtimes(self) -> list[RUNTIMES]: + return super()._supported_runtimes() + [RUNTIMES.SYCL_PREVIEW] + + def _bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]: + iters = self._get_iters(run_trace) return [ f"--iterations={iters}", - f"--Ioq={self.ioq}", - f"--MeasureCompletion={self.MeasureCompletion}", + f"--Ioq={self._ioq}", + f"--MeasureCompletion={self._measure_completion}", "--Profiling=0", - f"--NumKernels={self.NumKernels}", - f"--KernelExecTime={self.KernelExecTime}", - f"--UseEvents={self.UseEvents}", - f"--profilerType={self.profiler_type.value}", + f"--NumKernels={self._num_kernels}", + f"--KernelExecTime={self._kernel_exec_time}", + f"--UseEvents={self._use_events}", + f"--profilerType={self._profiler_type.value}", ] @@ -598,14 +597,14 @@ class ExecImmediateCopyQueue(ComputeBenchmark): def __init__( self, bench, ioq, isCopyOnly, source, destination, size, profiler_type ): - self.ioq = ioq - self.isCopyOnly = isCopyOnly - self.source = source - self.destination = destination - self.size = size + self._ioq = ioq + self._is_copy_only = isCopyOnly + self._source = source + self._destination = destination + self._size = size # iterations per bin_args: --iterations=100000 - self.iterations_regular = 100000 - self.iterations_trace = 10 + self._iterations_regular = 100000 + self._iterations_trace = 10 super().__init__( bench, "api_overhead_benchmark_sycl", @@ -614,48 +613,48 @@ def __init__( ) def name(self): - order = "in order" if self.ioq else "out of order" - return f"api_overhead_benchmark_sycl ExecImmediateCopyQueue {order} from {self.source} to {self.destination}, size {self.size}{self.cpu_count_str()}" + order = "in order" if self._ioq else "out of order" + return f"api_overhead_benchmark_sycl ExecImmediateCopyQueue {order} from {self._source} to {self._destination}, size {self._size}{self._cpu_count_str()}" def display_name(self) -> str: - order = "in order" if self.ioq else "out of order" - return f"SYCL ExecImmediateCopyQueue {order} from {self.source} to {self.destination}, size {self.size}{self.cpu_count_str(separator=',')}" + order = "in order" if self._ioq else "out of order" + return f"SYCL ExecImmediateCopyQueue {order} from {self._source} to {self._destination}, size {self._size}{self._cpu_count_str(separator=',')}" def description(self) -> str: - order = "in-order" if self.ioq else "out-of-order" - operation = "copy-only" if self.isCopyOnly else "copy and command submission" + order = "in-order" if self._ioq else "out-of-order" + operation = "copy-only" if self._is_copy_only else "copy and command submission" return ( - f"Measures SYCL {order} queue overhead for {operation} from {self.source} to " - f"{self.destination} memory with {self.size} bytes. Tests immediate execution overheads." + f"Measures SYCL {order} queue overhead for {operation} from {self._source} to " + f"{self._destination} memory with {self._size} bytes. Tests immediate execution overheads." ) def get_tags(self): return ["memory", "submit", "latency", "SYCL", "micro"] - def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]: - iters = self.get_iters(run_trace) + def _bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]: + iters = self._get_iters(run_trace) return [ f"--iterations={iters}", - f"--ioq={self.ioq}", - f"--IsCopyOnly={self.isCopyOnly}", + f"--ioq={self._ioq}", + f"--IsCopyOnly={self._is_copy_only}", "--MeasureCompletionTime=0", - f"--src={self.destination}", - f"--dst={self.destination}", - f"--size={self.size}", + f"--src={self._destination}", + f"--dst={self._destination}", + f"--size={self._size}", "--withCopyOffload=0", - f"--profilerType={self.profiler_type.value}", + f"--profilerType={self._profiler_type.value}", ] class QueueInOrderMemcpy(ComputeBenchmark): def __init__(self, bench, isCopyOnly, source, destination, size, profiler_type): - self.isCopyOnly = isCopyOnly - self.source = source - self.destination = destination - self.size = size + self._is_copy_only = isCopyOnly + self._source = source + self._destination = destination + self._size = size # iterations per bin_args: --iterations=10000 - self.iterations_regular = 10000 - self.iterations_trace = 10 + self._iterations_regular = 10000 + self._iterations_trace = 10 super().__init__( bench, "memory_benchmark_sycl", @@ -664,109 +663,109 @@ def __init__(self, bench, isCopyOnly, source, destination, size, profiler_type): ) def name(self): - return f"memory_benchmark_sycl QueueInOrderMemcpy from {self.source} to {self.destination}, size {self.size}{self.cpu_count_str()}" + return f"memory_benchmark_sycl QueueInOrderMemcpy from {self._source} to {self._destination}, size {self._size}{self._cpu_count_str()}" def display_name(self) -> str: - return f"SYCL QueueInOrderMemcpy from {self.source} to {self.destination}, size {self.size}{self.cpu_count_str(separator=',')}" + return f"SYCL QueueInOrderMemcpy from {self._source} to {self._destination}, size {self._size}{self._cpu_count_str(separator=',')}" def description(self) -> str: - operation = "copy-only" if self.isCopyOnly else "copy and command submission" + operation = "copy-only" if self._is_copy_only else "copy and command submission" return ( f"Measures SYCL in-order queue memory copy performance for {operation} from " - f"{self.source} to {self.destination} with {self.size} bytes, executed 100 times per iteration." + f"{self._source} to {self._destination} with {self._size} bytes, executed 100 times per iteration." ) def get_tags(self): return ["memory", "latency", "SYCL", "micro"] - def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]: - iters = self.get_iters(run_trace) + def _bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]: + iters = self._get_iters(run_trace) return [ f"--iterations={iters}", - f"--IsCopyOnly={self.isCopyOnly}", - f"--sourcePlacement={self.source}", - f"--destinationPlacement={self.destination}", - f"--size={self.size}", + f"--IsCopyOnly={self._is_copy_only}", + f"--sourcePlacement={self._source}", + f"--destinationPlacement={self._destination}", + f"--size={self._size}", "--count=100", "--withCopyOffload=0", - f"--profilerType={self.profiler_type.value}", + f"--profilerType={self._profiler_type.value}", ] class QueueMemcpy(ComputeBenchmark): def __init__(self, bench, source, destination, size, profiler_type): - self.source = source - self.destination = destination - self.size = size + self._source = source + self._destination = destination + self._size = size # iterations per bin_args: --iterations=10000 - self.iterations_regular = 10000 - self.iterations_trace = 10 + self._iterations_regular = 10000 + self._iterations_trace = 10 super().__init__( bench, "memory_benchmark_sycl", "QueueMemcpy", profiler_type=profiler_type ) def name(self): - return f"memory_benchmark_sycl QueueMemcpy from {self.source} to {self.destination}, size {self.size}{self.cpu_count_str()}" + return f"memory_benchmark_sycl QueueMemcpy from {self._source} to {self._destination}, size {self._size}{self._cpu_count_str()}" def display_name(self) -> str: - return f"SYCL QueueMemcpy from {self.source} to {self.destination}, size {self.size}{self.cpu_count_str(separator=',')}" + return f"SYCL QueueMemcpy from {self._source} to {self._destination}, size {self._size}{self._cpu_count_str(separator=',')}" def description(self) -> str: return ( - f"Measures general SYCL queue memory copy performance from {self.source} to " - f"{self.destination} with {self.size} bytes per operation." + f"Measures general SYCL queue memory copy performance from {self._source} to " + f"{self._destination} with {self._size} bytes per operation." ) def get_tags(self): return ["memory", "latency", "SYCL", "micro"] - def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]: - iters = self.get_iters(run_trace) + def _bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]: + iters = self._get_iters(run_trace) return [ f"--iterations={iters}", - f"--sourcePlacement={self.source}", - f"--destinationPlacement={self.destination}", - f"--size={self.size}", - f"--profilerType={self.profiler_type.value}", + f"--sourcePlacement={self._source}", + f"--destinationPlacement={self._destination}", + f"--size={self._size}", + f"--profilerType={self._profiler_type.value}", ] class StreamMemory(ComputeBenchmark): def __init__(self, bench, type, size, placement): - self.type = type - self.size = size - self.placement = placement + self._type = type + self._size = size + self._placement = placement # iterations per bin_args: --iterations=10000 - self.iterations_regular = 10000 - self.iterations_trace = 10 + self._iterations_regular = 10000 + self._iterations_trace = 10 super().__init__(bench, "memory_benchmark_sycl", "StreamMemory") def name(self): - return f"memory_benchmark_sycl StreamMemory, placement {self.placement}, type {self.type}, size {self.size}" + return f"memory_benchmark_sycl StreamMemory, placement {self._placement}, type {self._type}, size {self._size}" def display_name(self) -> str: - return f"SYCL StreamMemory, placement {self.placement}, type {self.type}, size {self.size}" - - def description(self) -> str: - return ( - f"Measures {self.placement} memory bandwidth using {self.type} pattern with " - f"{self.size} bytes. Higher values (GB/s) indicate better performance." - ) + return f"SYCL StreamMemory, placement {self._placement}, type {self._type}, size {self._size}" # measurement is in GB/s def lower_is_better(self): return False + def description(self) -> str: + return ( + f"Measures {self._placement} memory bandwidth using {self._type} pattern with " + f"{self._size} bytes. Higher values (GB/s) indicate better performance." + ) + def get_tags(self): return ["memory", "throughput", "SYCL", "micro"] - def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]: - iters = self.get_iters(run_trace) + def _bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]: + iters = self._get_iters(run_trace) return [ f"--iterations={iters}", - f"--type={self.type}", - f"--size={self.size}", - f"--memoryPlacement={self.placement}", + f"--type={self._type}", + f"--size={self._size}", + f"--memoryPlacement={self._placement}", "--useEvents=0", "--contents=Zeros", "--multiplier=1", @@ -778,8 +777,8 @@ def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]: class VectorSum(ComputeBenchmark): def __init__(self, bench): # iterations per bin_args: --iterations=1000 - self.iterations_regular = 1000 - self.iterations_trace = 10 + self._iterations_regular = 1000 + self._iterations_trace = 10 super().__init__(bench, "miscellaneous_benchmark_sycl", "VectorSum") def name(self): @@ -797,8 +796,8 @@ def description(self) -> str: def get_tags(self): return ["math", "throughput", "SYCL", "micro"] - def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]: - iters = self.get_iters(run_trace) + def _bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]: + iters = self._get_iters(run_trace) return [ f"--iterations={iters}", "--numberOfElementsX=512", @@ -822,138 +821,138 @@ def __init__( useCopyOffload, useBarrier, ): - self.numOpsPerThread = numOpsPerThread - self.numThreads = numThreads - self.allocSize = allocSize + self._num_ops_per_thread = numOpsPerThread + self._num_threads = numThreads + self._alloc_size = allocSize # preserve provided iterations value - # self.iterations = iterations - self.iterations_regular = iterations - self.iterations_trace = min(iterations, 10) - self.srcUSM = srcUSM - self.dstUSM = dstUSM - self.useEvents = useEvent - self.useCopyOffload = useCopyOffload - self.useBarrier = useBarrier + # self._iterations = iterations + self._iterations_regular = iterations + self._iterations_trace = min(iterations, 10) + self._src_usm = srcUSM + self._dst_usm = dstUSM + self._use_events = useEvent + self._use_copy_offload = useCopyOffload + self._use_barrier = useBarrier super().__init__( bench, f"multithread_benchmark_{runtime.value}", "MemcpyExecute", runtime ) - def extra_env_vars(self) -> dict: - if not self.useCopyOffload: - return {"UR_L0_V2_FORCE_DISABLE_COPY_OFFLOAD": "1"} - else: - return {} - def name(self): return ( - f"multithread_benchmark_{self.runtime.value} MemcpyExecute opsPerThread:{self.numOpsPerThread}, numThreads:{self.numThreads}, allocSize:{self.allocSize} srcUSM:{self.srcUSM} dstUSM:{self.dstUSM}" - + (" without events" if not self.useEvents else "") - + (" without copy offload" if not self.useCopyOffload else "") - + (" with barrier" if self.useBarrier else "") + f"multithread_benchmark_{self._runtime.value} MemcpyExecute opsPerThread:{self._num_ops_per_thread}, numThreads:{self._num_threads}, allocSize:{self._alloc_size} srcUSM:{self._src_usm} dstUSM:{self._dst_usm}" + + (" without events" if not self._use_events else "") + + (" without copy offload" if not self._use_copy_offload else "") + + (" with barrier" if self._use_barrier else "") ) def display_name(self) -> str: info = [] - if not self.useEvents: + if not self._use_events: info.append("without events") - if not self.useCopyOffload: + if not self._use_copy_offload: info.append("without copy offload") additional_info = f", {' '.join(info)}" if info else "" return ( - f"UR MemcpyExecute, opsPerThread {self.numOpsPerThread}, " - f"numThreads {self.numThreads}, allocSize {self.allocSize}, srcUSM {self.srcUSM}, " - f"dstUSM {self.dstUSM}{additional_info}" + f"UR MemcpyExecute, opsPerThread {self._num_ops_per_thread}, " + f"numThreads {self._num_threads}, allocSize {self._alloc_size}, srcUSM {self._src_usm}, " + f"dstUSM {self._dst_usm}{additional_info}" ) def explicit_group(self): return ( "MemcpyExecute, opsPerThread: " - + str(self.numOpsPerThread) + + str(self._num_ops_per_thread) + ", numThreads: " - + str(self.numThreads) + + str(self._num_threads) + ", allocSize: " - + str(self.allocSize) + + str(self._alloc_size) ) def description(self) -> str: - src_type = "device" if self.srcUSM == 1 else "host" - dst_type = "device" if self.dstUSM == 1 else "host" - events = "with" if self.useEvents else "without" - copy_offload = "with" if self.useCopyOffload else "without" - with_barrier = "with" if self.useBarrier else "without" + src_type = "device" if self._src_usm == 1 else "host" + dst_type = "device" if self._dst_usm == 1 else "host" + events = "with" if self._use_events else "without" + copy_offload = "with" if self._use_copy_offload else "without" + with_barrier = "with" if self._use_barrier else "without" return ( - f"Measures multithreaded memory copy performance with {self.numThreads} threads " - f"each performing {self.numOpsPerThread} operations on {self.allocSize} bytes " + f"Measures multithreaded memory copy performance with {self._num_threads} threads " + f"each performing {self._num_ops_per_thread} operations on {self._alloc_size} bytes " f"from {src_type} to {dst_type} memory {events} events {copy_offload} driver copy offload " f"{with_barrier} barrier. " ) def get_tags(self): - return ["memory", "latency", runtime_to_tag_name(self.runtime), "micro"] + return ["memory", "latency", runtime_to_tag_name(self._runtime), "micro"] - def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]: - iters = self.get_iters(run_trace) + def _extra_env_vars(self) -> dict: + if not self._use_copy_offload: + return {"UR_L0_V2_FORCE_DISABLE_COPY_OFFLOAD": "1"} + else: + return {} + + def _bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]: + iters = self._get_iters(run_trace) return [ f"--iterations={iters}", "--Ioq=1", - f"--UseEvents={self.useEvents}", + f"--UseEvents={self._use_events}", "--MeasureCompletion=1", "--UseQueuePerThread=1", - f"--AllocSize={self.allocSize}", - f"--NumThreads={self.numThreads}", - f"--NumOpsPerThread={self.numOpsPerThread}", - f"--SrcUSM={self.srcUSM}", - f"--DstUSM={self.dstUSM}", - f"--UseBarrier={self.useBarrier}", + f"--AllocSize={self._alloc_size}", + f"--NumThreads={self._num_threads}", + f"--NumOpsPerThread={self._num_ops_per_thread}", + f"--SrcUSM={self._src_usm}", + f"--DstUSM={self._dst_usm}", + f"--UseBarrier={self._use_barrier}", ] class GraphApiSinKernelGraph(ComputeBenchmark): def __init__(self, bench, runtime: RUNTIMES, withGraphs, numKernels): - self.withGraphs = withGraphs - self.numKernels = numKernels + self._with_graphs = withGraphs + self._num_kernels = numKernels # iterations per bin_args: --iterations=10000 - self.iterations_regular = 10000 - self.iterations_trace = 10 + self._iterations_regular = 10000 + self._iterations_trace = 10 super().__init__( bench, f"graph_api_benchmark_{runtime.value}", "SinKernelGraph", runtime ) + def name(self): + return f"graph_api_benchmark_{self._runtime.value} SinKernelGraph graphs:{self._with_graphs}, numKernels:{self._num_kernels}" + + def display_name(self) -> str: + return f"{self._runtime.value.upper()} SinKernelGraph, graphs {self._with_graphs}, numKernels {self._num_kernels}" + def explicit_group(self): - return f"SinKernelGraph, numKernels: {self.numKernels}" + return f"SinKernelGraph, numKernels: {self._num_kernels}" def description(self) -> str: - execution = "using graphs" if self.withGraphs else "without graphs" + execution = "using graphs" if self._with_graphs else "without graphs" return ( - f"Measures {self.runtime.value.upper()} performance when executing {self.numKernels} " + f"Measures {self._runtime.value.upper()} performance when executing {self._num_kernels} " f"sin kernels {execution}. Tests overhead and benefits of graph-based execution." ) - def name(self): - return f"graph_api_benchmark_{self.runtime.value} SinKernelGraph graphs:{self.withGraphs}, numKernels:{self.numKernels}" - - def display_name(self) -> str: - return f"{self.runtime.value.upper()} SinKernelGraph, graphs {self.withGraphs}, numKernels {self.numKernels}" - def unstable(self) -> str: return "This benchmark combines both eager and graph execution, and may not be representative of real use cases." def get_tags(self): return [ "graph", - runtime_to_tag_name(self.runtime), + runtime_to_tag_name(self._runtime), "proxy", "submit", "memory", "latency", ] - def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]: - iters = self.get_iters(run_trace) + def _bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]: + iters = self._get_iters(run_trace) return [ f"--iterations={iters}", - f"--numKernels={self.numKernels}", - f"--withGraphs={self.withGraphs}", + f"--numKernels={self._num_kernels}", + f"--withGraphs={self._with_graphs}", "--withCopyOffload=1", "--immediateAppendCmdList=0", ] @@ -972,21 +971,21 @@ def __init__( useHostTasks, profiler_type=PROFILERS.TIMER, ): - self.inOrderQueue = inOrderQueue - self.numKernels = numKernels - self.measureCompletionTime = measureCompletionTime - self.useEvents = useEvents - self.useHostTasks = useHostTasks - self.emulateGraphs = emulate_graphs - self.ioq_str = "in order" if self.inOrderQueue else "out of order" - self.measure_str = ( - " with measure completion" if self.measureCompletionTime else "" + self._in_order_queue = inOrderQueue + self._num_kernels = numKernels + self._measure_completion_time = measureCompletionTime + self._use_events = useEvents + self._use_host_tasks = useHostTasks + self._emulate_graphs = emulate_graphs + self._ioq_str = "in order" if self._in_order_queue else "out of order" + self._measure_str = ( + " with measure completion" if self._measure_completion_time else "" ) - self.use_events_str = f" with events" if self.useEvents else "" - self.host_tasks_str = f" use host tasks" if self.useHostTasks else "" + self._use_events_str = f" with events" if self._use_events else "" + self._host_tasks_str = f" use host tasks" if self._use_host_tasks else "" # iterations per bin_args: --iterations=10000 - self.iterations_regular = 10000 - self.iterations_trace = 10 + self._iterations_regular = 10000 + self._iterations_trace = 10 super().__init__( bench, f"graph_api_benchmark_{runtime.value}", @@ -995,47 +994,47 @@ def __init__( profiler_type, ) - def supported_runtimes(self) -> list[RUNTIMES]: - return super().supported_runtimes() + [RUNTIMES.SYCL_PREVIEW] + def name(self): + return f"graph_api_benchmark_{self._runtime.value} SubmitGraph{self._use_events_str}{self._host_tasks_str} numKernels:{self._num_kernels} ioq {self._in_order_queue} measureCompletion {self._measure_completion_time}{self._cpu_count_str()}" + + def display_name(self) -> str: + return f"{self._runtime.value.upper()} SubmitGraph {self._ioq_str}{self._measure_str}{self._use_events_str}{self._host_tasks_str}, {self._num_kernels} kernels{self._cpu_count_str(separator=',')}" def explicit_group(self): - return f"SubmitGraph {self.ioq_str}{self.measure_str}{self.use_events_str}{self.host_tasks_str}, {self.numKernels} kernels{self.cpu_count_str(separator=',')}" + return f"SubmitGraph {self._ioq_str}{self._measure_str}{self._use_events_str}{self._host_tasks_str}, {self._num_kernels} kernels{self._cpu_count_str(separator=',')}" def description(self) -> str: return ( - f"Measures {self.runtime.value.upper()} performance when executing {self.numKernels} " + f"Measures {self._runtime.value.upper()} performance when executing {self._num_kernels} " f"trivial kernels using graphs. Tests overhead and benefits of graph-based execution." ) - def name(self): - return f"graph_api_benchmark_{self.runtime.value} SubmitGraph{self.use_events_str}{self.host_tasks_str} numKernels:{self.numKernels} ioq {self.inOrderQueue} measureCompletion {self.measureCompletionTime}{self.cpu_count_str()}" - - def display_name(self) -> str: - return f"{self.runtime.value.upper()} SubmitGraph {self.ioq_str}{self.measure_str}{self.use_events_str}{self.host_tasks_str}, {self.numKernels} kernels{self.cpu_count_str(separator=',')}" - def get_tags(self): return [ "graph", - runtime_to_tag_name(self.runtime), + runtime_to_tag_name(self._runtime), "micro", "submit", "latency", ] - def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]: - iters = self.get_iters(run_trace) + def _supported_runtimes(self) -> list[RUNTIMES]: + return super()._supported_runtimes() + [RUNTIMES.SYCL_PREVIEW] + + def _bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]: + iters = self._get_iters(run_trace) return [ f"--iterations={iters}", - f"--NumKernels={self.numKernels}", - f"--MeasureCompletionTime={self.measureCompletionTime}", - f"--InOrderQueue={self.inOrderQueue}", + f"--NumKernels={self._num_kernels}", + f"--MeasureCompletionTime={self._measure_completion_time}", + f"--InOrderQueue={self._in_order_queue}", "--Profiling=0", "--KernelExecutionTime=1", - f"--UseEvents={self.useEvents}", + f"--UseEvents={self._use_events}", "--UseExplicit=0", - f"--UseHostTasks={self.useHostTasks}", - f"--profilerType={self.profiler_type.value}", - f"--EmulateGraphs={self.emulateGraphs}", + f"--UseHostTasks={self._use_host_tasks}", + f"--profilerType={self._profiler_type.value}", + f"--EmulateGraphs={self._emulate_graphs}", ] @@ -1043,11 +1042,11 @@ class UllsEmptyKernel(ComputeBenchmark): def __init__( self, bench, runtime: RUNTIMES, wgc, wgs, profiler_type=PROFILERS.TIMER ): - self.wgc = wgc - self.wgs = wgs + self._wgc = wgc + self._wgs = wgs # iterations per bin_args: --iterations=10000 - self.iterations_regular = 10000 - self.iterations_trace = 10 + self._iterations_regular = 10000 + self._iterations_trace = 10 super().__init__( bench, f"ulls_benchmark_{runtime.value}", @@ -1056,31 +1055,31 @@ def __init__( profiler_type, ) - def supported_runtimes(self) -> list[RUNTIMES]: - return [RUNTIMES.SYCL, RUNTIMES.LEVEL_ZERO] + def name(self): + return f"ulls_benchmark_{self._runtime.value} EmptyKernel wgc:{self._wgc}, wgs:{self._wgs}{self._cpu_count_str()}" + + def display_name(self) -> str: + return f"{self._runtime.value.upper()} EmptyKernel, wgc {self._wgc}, wgs {self._wgs}{self._cpu_count_str(separator=',')}" def explicit_group(self): - return f"EmptyKernel, wgc: {self.wgc}, wgs: {self.wgs}{self.cpu_count_str(separator=',')}" + return f"EmptyKernel, wgc: {self._wgc}, wgs: {self._wgs}{self._cpu_count_str(separator=',')}" def description(self) -> str: return "" - def name(self): - return f"ulls_benchmark_{self.runtime.value} EmptyKernel wgc:{self.wgc}, wgs:{self.wgs}{self.cpu_count_str()}" - - def display_name(self) -> str: - return f"{self.runtime.value.upper()} EmptyKernel, wgc {self.wgc}, wgs {self.wgs}{self.cpu_count_str(separator=',')}" - def get_tags(self): - return [runtime_to_tag_name(self.runtime), "micro", "latency", "submit"] + return [runtime_to_tag_name(self._runtime), "micro", "latency", "submit"] + + def _supported_runtimes(self) -> list[RUNTIMES]: + return [RUNTIMES.SYCL, RUNTIMES.LEVEL_ZERO] - def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]: - iters = self.get_iters(run_trace) + def _bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]: + iters = self._get_iters(run_trace) return [ f"--iterations={iters}", - f"--wgs={self.wgs}", - f"--wgc={self.wgc}", - f"--profilerType={self.profiler_type.value}", + f"--wgs={self._wgs}", + f"--wgc={self._wgc}", + f"--profilerType={self._profiler_type.value}", ] @@ -1096,47 +1095,47 @@ def __init__( ioq, ctrBasedEvents, ): - self.count = count - self.kernelTime = kernelTime - self.barrier = barrier - self.hostVisible = hostVisible - self.ctrBasedEvents = ctrBasedEvents - self.ioq = ioq + self._count = count + self._kernel_time = kernelTime + self._barrier = barrier + self._host_visible = hostVisible + self._ctr_based_events = ctrBasedEvents + self._ioq = ioq # iterations per bin_args: --iterations=1000 - self.iterations_regular = 1000 - self.iterations_trace = 10 + self._iterations_regular = 1000 + self._iterations_trace = 10 super().__init__( bench, f"ulls_benchmark_{runtime.value}", "KernelSwitch", runtime ) - def supported_runtimes(self): - return [RUNTIMES.SYCL, RUNTIMES.LEVEL_ZERO] + def name(self): + return f"ulls_benchmark_{self._runtime.value} KernelSwitch count {self._count} kernelTime {self._kernel_time}" + + def display_name(self) -> str: + return f"{self._runtime.value.upper()} KernelSwitch, count {self._count}, kernelTime {self._kernel_time}" def explicit_group(self): - return f"KernelSwitch, count: {self.count}, kernelTime: {self.kernelTime}" + return f"KernelSwitch, count: {self._count}, kernelTime: {self._kernel_time}" def description(self) -> str: return "" - def name(self): - return f"ulls_benchmark_{self.runtime.value} KernelSwitch count {self.count} kernelTime {self.kernelTime}" - - def display_name(self) -> str: - return f"{self.runtime.value.upper()} KernelSwitch, count {self.count}, kernelTime {self.kernelTime}" - def get_tags(self): - return [runtime_to_tag_name(self.runtime), "micro", "latency", "submit"] + return [runtime_to_tag_name(self._runtime), "micro", "latency", "submit"] + + def _supported_runtimes(self): + return [RUNTIMES.SYCL, RUNTIMES.LEVEL_ZERO] - def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]: - iters = self.get_iters(run_trace) + def _bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]: + iters = self._get_iters(run_trace) return [ f"--iterations={iters}", - f"--count={self.count}", - f"--kernelTime={self.kernelTime}", - f"--barrier={self.barrier}", - f"--hostVisible={self.hostVisible}", - f"--ioq={self.ioq}", - f"--ctrBasedEvents={self.ctrBasedEvents}", + f"--count={self._count}", + f"--kernelTime={self._kernel_time}", + f"--barrier={self._barrier}", + f"--hostVisible={self._host_visible}", + f"--ioq={self._ioq}", + f"--ctrBasedEvents={self._ctr_based_events}", ] @@ -1144,12 +1143,12 @@ class UsmMemoryAllocation(ComputeBenchmark): def __init__( self, bench, runtime: RUNTIMES, usm_memory_placement, size, measure_mode ): - self.usm_memory_placement = usm_memory_placement - self.size = size - self.measure_mode = measure_mode + self._usm_memory_placement = usm_memory_placement + self._size = size + self._measure_mode = measure_mode # iterations per bin_args: --iterations=10000 - self.iterations_regular = 10000 - self.iterations_trace = 10 + self._iterations_regular = 10000 + self._iterations_trace = 10 super().__init__( bench, f"api_overhead_benchmark_{runtime.value}", @@ -1157,19 +1156,16 @@ def __init__( runtime, ) - def get_tags(self): - return [runtime_to_tag_name(self.runtime), "micro", "latency", "memory"] - def name(self): return ( - f"api_overhead_benchmark_{self.runtime.value} UsmMemoryAllocation " - f"usmMemoryPlacement:{self.usm_memory_placement} size:{self.size} measureMode:{self.measure_mode}" + f"api_overhead_benchmark_{self._runtime.value} UsmMemoryAllocation " + f"usmMemoryPlacement:{self._usm_memory_placement} size:{self._size} measureMode:{self._measure_mode}" ) def display_name(self) -> str: return ( - f"{self.runtime.value.upper()} UsmMemoryAllocation, " - f"usmMemoryPlacement {self.usm_memory_placement}, size {self.size}, measureMode {self.measure_mode}" + f"{self._runtime.value.upper()} UsmMemoryAllocation, " + f"usmMemoryPlacement {self._usm_memory_placement}, size {self._size}, measureMode {self._measure_mode}" ) def explicit_group(self): @@ -1177,23 +1173,26 @@ def explicit_group(self): def description(self) -> str: what_is_measured = "Both memory allocation and memory free are timed" - if self.measure_mode == "Allocate": + if self._measure_mode == "Allocate": what_is_measured = "Only memory allocation is timed" - elif self.measure_mode == "Free": + elif self._measure_mode == "Free": what_is_measured = "Only memory free is timed" return ( - f"Measures memory allocation overhead by allocating {self.size} bytes of " - f"usm {self.usm_memory_placement} memory and free'ing it immediately. " + f"Measures memory allocation overhead by allocating {self._size} bytes of " + f"usm {self._usm_memory_placement} memory and free'ing it immediately. " f"{what_is_measured}. " ) - def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]: - iters = self.get_iters(run_trace) + def get_tags(self): + return [runtime_to_tag_name(self._runtime), "micro", "latency", "memory"] + + def _bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]: + iters = self._get_iters(run_trace) return [ f"--iterations={iters}", - f"--type={self.usm_memory_placement}", - f"--size={self.size}", - f"--measureMode={self.measure_mode}", + f"--type={self._usm_memory_placement}", + f"--size={self._size}", + f"--measureMode={self._measure_mode}", ] @@ -1207,13 +1206,13 @@ def __init__( size, measure_mode, ): - self.usm_memory_placement = usm_memory_placement - self.allocation_count = allocation_count - self.size = size - self.measure_mode = measure_mode + self._usm_memory_placement = usm_memory_placement + self._allocation_count = allocation_count + self._size = size + self._measure_mode = measure_mode # iterations per bin_args: --iterations=1000 - self.iterations_regular = 1000 - self.iterations_trace = 10 + self._iterations_regular = 1000 + self._iterations_trace = 10 super().__init__( bench, f"api_overhead_benchmark_{runtime.value}", @@ -1221,19 +1220,16 @@ def __init__( runtime, ) - def get_tags(self): - return [runtime_to_tag_name(self.runtime), "micro", "latency", "memory"] - def name(self): return ( - f"api_overhead_benchmark_{self.runtime.value} UsmBatchMemoryAllocation " - f"usmMemoryPlacement:{self.usm_memory_placement} allocationCount:{self.allocation_count} size:{self.size} measureMode:{self.measure_mode}" + f"api_overhead_benchmark_{self._runtime.value} UsmBatchMemoryAllocation " + f"usmMemoryPlacement:{self._usm_memory_placement} allocationCount:{self._allocation_count} size:{self._size} measureMode:{self._measure_mode}" ) def display_name(self) -> str: return ( - f"{self.runtime.value.upper()} UsmBatchMemoryAllocation, " - f"usmMemoryPlacement {self.usm_memory_placement}, allocationCount {self.allocation_count}, size {self.size}, measureMode {self.measure_mode}" + f"{self._runtime.value.upper()} UsmBatchMemoryAllocation, " + f"usmMemoryPlacement {self._usm_memory_placement}, allocationCount {self._allocation_count}, size {self._size}, measureMode {self._measure_mode}" ) def explicit_group(self): @@ -1241,24 +1237,27 @@ def explicit_group(self): def description(self) -> str: what_is_measured = "Both memory allocation and memory free are timed" - if self.measure_mode == "Allocate": + if self._measure_mode == "Allocate": what_is_measured = "Only memory allocation is timed" - elif self.measure_mode == "Free": + elif self._measure_mode == "Free": what_is_measured = "Only memory free is timed" return ( - f"Measures memory allocation overhead by allocating {self.size} bytes of " - f"usm {self.usm_memory_placement} memory {self.allocation_count} times, then free'ing it all at once. " + f"Measures memory allocation overhead by allocating {self._size} bytes of " + f"usm {self._usm_memory_placement} memory {self._allocation_count} times, then free'ing it all at once. " f"{what_is_measured}. " ) - def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]: - iters = self.get_iters(run_trace) + def get_tags(self): + return [runtime_to_tag_name(self._runtime), "micro", "latency", "memory"] + + def _bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]: + iters = self._get_iters(run_trace) return [ f"--iterations={iters}", - f"--type={self.usm_memory_placement}", - f"--allocationCount={self.allocation_count}", - f"--size={self.size}", - f"--measureMode={self.measure_mode}", + f"--type={self._usm_memory_placement}", + f"--allocationCount={self._allocation_count}", + f"--size={self._size}", + f"--measureMode={self._measure_mode}", ] @@ -1270,14 +1269,14 @@ def __init__( rebuild_graph_every_iteration, graph_structure, ): - self.rebuild_graph_every_iteration = rebuild_graph_every_iteration - self.graph_structure = graph_structure + self._rebuild_graph_every_iteration = rebuild_graph_every_iteration + self._graph_structure = graph_structure # base iterations value mirrors previous behaviour base_iters = 10000 if graph_structure == "Llama": base_iters = base_iters // 10 - self.iterations_regular = base_iters - self.iterations_trace = min(base_iters, 10) + self._iterations_regular = base_iters + self._iterations_trace = min(base_iters, 10) super().__init__( bench, @@ -1286,13 +1285,19 @@ def __init__( runtime, ) + def name(self): + return f"graph_api_benchmark_{self._runtime.value} FinalizeGraph rebuildGraphEveryIter:{self._rebuild_graph_every_iteration} graphStructure:{self._graph_structure}" + + def display_name(self) -> str: + return f"{self._runtime.value.upper()} FinalizeGraph, rebuildGraphEveryIter {self._rebuild_graph_every_iteration}, graphStructure {self._graph_structure}" + def explicit_group(self): - return f"FinalizeGraph, GraphStructure: {self.graph_structure}" + return f"FinalizeGraph, GraphStructure: {self._graph_structure}" def description(self) -> str: what_is_measured = "" - if self.rebuild_graph_every_iteration == 0: + if self._rebuild_graph_every_iteration == 0: what_is_measured = ( "It measures finalizing the same modifiable graph repeatedly " "over multiple iterations." @@ -1304,29 +1309,23 @@ def description(self) -> str: return ( "Measures the time taken to finalize a SYCL graph, using a graph " - f"structure based on the usage of graphs in {self.graph_structure}. " + f"structure based on the usage of graphs in {self._graph_structure}. " f"{what_is_measured}" ) - def name(self): - return f"graph_api_benchmark_{self.runtime.value} FinalizeGraph rebuildGraphEveryIter:{self.rebuild_graph_every_iteration} graphStructure:{self.graph_structure}" - - def display_name(self) -> str: - return f"{self.runtime.value.upper()} FinalizeGraph, rebuildGraphEveryIter {self.rebuild_graph_every_iteration}, graphStructure {self.graph_structure}" - def get_tags(self): return [ "graph", - runtime_to_tag_name(self.runtime), + runtime_to_tag_name(self._runtime), "micro", "finalize", "latency", ] - def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]: - iters = self.get_iters(run_trace) + def _bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]: + iters = self._get_iters(run_trace) return [ f"--iterations={iters}", - f"--rebuildGraphEveryIter={self.rebuild_graph_every_iteration}", - f"--graphStructure={self.graph_structure}", + f"--rebuildGraphEveryIter={self._rebuild_graph_every_iteration}", + f"--graphStructure={self._graph_structure}", ] diff --git a/devops/scripts/benchmarks/benches/gromacs.py b/devops/scripts/benchmarks/benches/gromacs.py index ad7d35027a0ef..10197b62fa249 100644 --- a/devops/scripts/benchmarks/benches/gromacs.py +++ b/devops/scripts/benchmarks/benches/gromacs.py @@ -97,9 +97,6 @@ def setup(self) -> None: untar=True, ) - def teardown(self): - pass - class GromacsBenchmark(Benchmark): def __init__(self, suite, model, type, option): @@ -278,6 +275,3 @@ def _validate_correctness(self, log_file): ) raise ValueError(f"Conserved Energy Drift not found in log file: {log_file}") - - def teardown(self): - pass diff --git a/devops/scripts/benchmarks/benches/llamacpp.py b/devops/scripts/benchmarks/benches/llamacpp.py index b58f26965a6f4..b9cad8c8f2f71 100644 --- a/devops/scripts/benchmarks/benches/llamacpp.py +++ b/devops/scripts/benchmarks/benches/llamacpp.py @@ -183,6 +183,3 @@ def parse_output(self, output): raise ValueError(f"Error parsing output: {e}") return results - - def teardown(self): - return diff --git a/devops/scripts/benchmarks/benches/syclbench.py b/devops/scripts/benchmarks/benches/syclbench.py index cefe46242b530..2fdfcd8f4123d 100644 --- a/devops/scripts/benchmarks/benches/syclbench.py +++ b/devops/scripts/benchmarks/benches/syclbench.py @@ -181,9 +181,6 @@ def run( def name(self): return f"{self.suite.name()} {self.test}" - def teardown(self): - return - # multi benchmarks class Blocked_transform(SyclBenchmark): diff --git a/devops/scripts/benchmarks/benches/test.py b/devops/scripts/benchmarks/benches/test.py index e3dda9a3a1502..140fd6bed32f3 100644 --- a/devops/scripts/benchmarks/benches/test.py +++ b/devops/scripts/benchmarks/benches/test.py @@ -100,6 +100,3 @@ def run( unit="ms", ) ] - - def teardown(self): - return diff --git a/devops/scripts/benchmarks/benches/umf.py b/devops/scripts/benchmarks/benches/umf.py index 41a8a7fc82073..3ec4980edafb3 100644 --- a/devops/scripts/benchmarks/benches/umf.py +++ b/devops/scripts/benchmarks/benches/umf.py @@ -204,9 +204,6 @@ def parse_output(self, output): return results - def teardown(self): - return - class GBenchPreloaded(GBench): def __init__(self, bench, lib_to_be_replaced, replacing_lib): diff --git a/devops/scripts/benchmarks/benches/velocity.py b/devops/scripts/benchmarks/benches/velocity.py index 967d64c754520..c173fb7f95e49 100644 --- a/devops/scripts/benchmarks/benches/velocity.py +++ b/devops/scripts/benchmarks/benches/velocity.py @@ -182,9 +182,6 @@ def run( ) ] - def teardown(self): - return - class Hashtable(VelocityBase): def __init__(self, suite: VelocityBench): diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py index de372d7279cc2..572965b98657c 100755 --- a/devops/scripts/benchmarks/main.py +++ b/devops/scripts/benchmarks/main.py @@ -337,12 +337,6 @@ def main(directory, additional_env_vars, compare_names, filter): failures[benchmark.name()] = f"Benchmark run failure: {e}" log.error(f"failed: {e}") - for benchmark in benchmarks: - # this never has any useful information anyway, so hide it behind verbose - log.debug(f"tearing down {benchmark.name()}... ") - benchmark.teardown() - log.debug(f"{benchmark.name()} teardown complete.") - this_name = options.current_run_name chart_data = {}