|
3 | 3 | # See LICENSE.TXT |
4 | 4 | # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
5 | 5 |
|
6 | | -from itertools import product |
| 6 | +import copy |
7 | 7 | import csv |
8 | 8 | import io |
9 | | -import copy |
10 | 9 | import math |
11 | 10 | from enum import Enum |
| 11 | +from itertools import product |
12 | 12 | from pathlib import Path |
13 | 13 |
|
14 | | -from .base import Benchmark, Suite, TracingType |
15 | | -from utils.result import BenchmarkMetadata, Result |
16 | | -from .base import Benchmark, Suite |
17 | | -from options import options |
18 | 14 | from git_project import GitProject |
| 15 | +from options import options |
| 16 | +from utils.result import BenchmarkMetadata, Result |
| 17 | + |
| 18 | +from .base import Benchmark, Suite, TracingType |
19 | 19 |
|
20 | 20 |
|
21 | 21 | class RUNTIMES(Enum): |
@@ -100,66 +100,57 @@ def setup(self) -> None: |
100 | 100 |
|
101 | 101 | def additional_metadata(self) -> dict[str, BenchmarkMetadata]: |
102 | 102 | metadata = { |
103 | | - "SubmitKernel": BenchmarkMetadata( |
104 | | - type="group", |
105 | | - description="Measures CPU time overhead of submitting kernels through different APIs.", |
106 | | - notes="Each layer builds on top of the previous layer, adding functionality and overhead.\n" |
107 | | - "The first layer is the Level Zero API, the second is the Unified Runtime API, and the third is the SYCL API.\n" |
108 | | - "The UR v2 adapter noticeably reduces UR layer overhead, also improving SYCL performance.\n" |
109 | | - "Work is ongoing to reduce the overhead of the SYCL API\n", |
110 | | - tags=["submit", "micro", "SYCL", "UR", "L0"], |
111 | | - range_min=0.0, |
112 | | - ), |
113 | 103 | "SinKernelGraph": BenchmarkMetadata( |
114 | 104 | type="group", |
115 | 105 | unstable="This benchmark combines both eager and graph execution, and may not be representative of real use cases.", |
116 | 106 | tags=["submit", "memory", "proxy", "SYCL", "UR", "L0", "graph"], |
117 | 107 | ), |
118 | | - "SubmitGraph": BenchmarkMetadata( |
119 | | - type="group", tags=["submit", "micro", "SYCL", "UR", "L0", "graph"] |
120 | | - ), |
121 | 108 | "FinalizeGraph": BenchmarkMetadata( |
122 | 109 | type="group", tags=["finalize", "micro", "SYCL", "graph"] |
123 | 110 | ), |
124 | 111 | } |
125 | 112 |
|
126 | 113 | # Add metadata for all SubmitKernel group variants |
127 | | - base_metadata = metadata["SubmitKernel"] |
128 | | - |
| 114 | + submit_kernel_metadata = BenchmarkMetadata( |
| 115 | + type="group", |
| 116 | + notes="Each layer builds on top of the previous layer, adding functionality and overhead.\n" |
| 117 | + "The first layer is the Level Zero API, the second is the Unified Runtime API, and the third is the SYCL API.\n" |
| 118 | + "The UR v2 adapter noticeably reduces UR layer overhead, also improving SYCL performance.\n" |
| 119 | + "Work is ongoing to reduce the overhead of the SYCL API\n", |
| 120 | + tags=["submit", "micro", "SYCL", "UR", "L0"], |
| 121 | + range_min=0.0, |
| 122 | + ) |
129 | 123 | for order in ["in order", "out of order"]: |
130 | 124 | for completion in ["", " with completion"]: |
131 | 125 | for events in ["", " using events"]: |
132 | 126 | group_name = f"SubmitKernel {order}{completion}{events} long kernel" |
133 | | - metadata[group_name] = BenchmarkMetadata( |
134 | | - type="group", |
135 | | - description=f"Measures CPU time overhead of submitting {order} kernels with longer execution times through different APIs.", |
136 | | - notes=base_metadata.notes, |
137 | | - tags=base_metadata.tags, |
138 | | - range_min=base_metadata.range_min, |
| 127 | + metadata[group_name] = copy.deepcopy(submit_kernel_metadata) |
| 128 | + metadata[group_name].description = ( |
| 129 | + f"Measures CPU time overhead of submitting {order} kernels with longer execution times through different APIs." |
139 | 130 | ) |
140 | | - |
141 | 131 | # CPU count variants |
142 | 132 | cpu_count_group = f"{group_name}, CPU count" |
143 | | - metadata[cpu_count_group] = BenchmarkMetadata( |
144 | | - type="group", |
145 | | - description=f"Measures CPU time overhead of submitting {order} kernels with longer execution times through different APIs.", |
146 | | - notes=base_metadata.notes, |
147 | | - tags=base_metadata.tags, |
148 | | - range_min=base_metadata.range_min, |
| 133 | + metadata[cpu_count_group] = copy.deepcopy(submit_kernel_metadata) |
| 134 | + metadata[cpu_count_group].description = ( |
| 135 | + f"Measures CPU instruction count overhead of submitting {order} kernels with longer execution times through different APIs." |
149 | 136 | ) |
150 | 137 |
|
151 | 138 | # Add metadata for all SubmitGraph group variants |
152 | | - base_metadata = metadata["SubmitGraph"] |
| 139 | + submit_graph_metadata = BenchmarkMetadata( |
| 140 | + type="group", tags=["submit", "micro", "SYCL", "UR", "L0", "graph"] |
| 141 | + ) |
153 | 142 | for order in ["in order", "out of order"]: |
154 | 143 | for completion in ["", " with completion"]: |
155 | 144 | for events in ["", " using events"]: |
156 | 145 | for num_kernels in self.submit_graph_num_kernels: |
157 | | - group_name = f"SubmitGraph {order}{completion}{events}, {num_kernels} kernels" |
158 | | - metadata[group_name] = BenchmarkMetadata( |
159 | | - type="group", |
160 | | - tags=base_metadata.tags, |
161 | | - ) |
162 | | - |
| 146 | + for host_tasks in ["", " use host tasks"]: |
| 147 | + group_name = f"SubmitGraph {order}{completion}{events}{host_tasks}, {num_kernels} kernels" |
| 148 | + metadata[group_name] = copy.deepcopy(submit_graph_metadata) |
| 149 | + # CPU count variants |
| 150 | + cpu_count_group = f"{group_name}, CPU count" |
| 151 | + metadata[cpu_count_group] = copy.deepcopy( |
| 152 | + submit_graph_metadata |
| 153 | + ) |
163 | 154 | return metadata |
164 | 155 |
|
165 | 156 | def benchmarks(self) -> list[Benchmark]: |
@@ -1088,6 +1079,22 @@ def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]: |
1088 | 1079 | bin_args.append(f"--profilerType={self.profiler_type.value}") |
1089 | 1080 | return bin_args |
1090 | 1081 |
|
| 1082 | + def get_metadata(self) -> dict[str, BenchmarkMetadata]: |
| 1083 | + metadata_dict = super().get_metadata() |
| 1084 | + |
| 1085 | + # Create CPU count variant with modified display name and explicit_group |
| 1086 | + cpu_count_name = self.name() + " CPU count" |
| 1087 | + cpu_count_metadata = copy.deepcopy(metadata_dict[self.name()]) |
| 1088 | + cpu_count_display_name = self.display_name() + ", CPU count" |
| 1089 | + cpu_count_explicit_group = ( |
| 1090 | + self.explicit_group() + ", CPU count" if self.explicit_group() else "" |
| 1091 | + ) |
| 1092 | + cpu_count_metadata.display_name = cpu_count_display_name |
| 1093 | + cpu_count_metadata.explicit_group = cpu_count_explicit_group |
| 1094 | + metadata_dict[cpu_count_name] = cpu_count_metadata |
| 1095 | + |
| 1096 | + return metadata_dict |
| 1097 | + |
1091 | 1098 |
|
1092 | 1099 | class UllsEmptyKernel(ComputeBenchmark): |
1093 | 1100 | def __init__( |
|
0 commit comments