Skip to content

Commit 0f1533a

Browse files
authored
[SYCL] optimize enqueueImpKernel by making trace faster (#20682)
Part of #20668 related to making tracing kernels shorter. Core optimization is based on making checking that traces are disabled as fast & short as possible by - use __builtin_expect indicating common path - putting all code of non-common path in a separate not inlined function (in cpp) - remove "bool ResetCache" parameter and not handling its logic on common path So inlined check is now just "if" and one function call branch which is a small instruction set and moreover should be optimized out by compiler to non-common path of instructions predicted. ``` if (__builtin_expect(SYCLConfigTrace::isTraceInMemCache(), false)) traceKernelImpl(Msg, KernelName, isFastKernelCache); ``` By chance I did some refactor SYCLConfig<SYCL_CACHE_TRACE> is a specialized class but code does not utilize fact that it is a template, that is SyclConfig is never called with generic type which evaluates to SYCL_CACHE_TRACE. Therefore this specialization should be a simple class. This eases coding stuff in it.
1 parent ff5f9b8 commit 0f1533a

File tree

6 files changed

+80
-77
lines changed

6 files changed

+80
-77
lines changed

sycl/source/detail/config.cpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,36 @@ const std::array<std::pair<std::string, backend>, 8> &getSyclBeMap() {
180180
{"*", backend::all}}};
181181
return SyclBeMap;
182182
}
183+
namespace {
184+
185+
unsigned int parseLevel(const char *ValStr) {
186+
unsigned int intVal = 0;
187+
188+
if (ValStr) {
189+
try {
190+
intVal = std::stoul(ValStr);
191+
} catch (...) {
192+
// If the value is not null and not a number, it is considered
193+
// to enable disk cache tracing. This is the legacy behavior.
194+
intVal = 1;
195+
}
196+
}
197+
198+
// Legacy behavior.
199+
if (intVal > 7)
200+
intVal = 1;
201+
202+
return intVal;
203+
}
204+
205+
} // namespace
206+
207+
void SYCLConfig<SYCL_CACHE_TRACE>::reset() {
208+
Level = parseLevel(BaseT::getRawValue());
209+
}
210+
211+
unsigned int SYCLConfig<SYCL_CACHE_TRACE>::Level =
212+
parseLevel(BaseT::getRawValue());
183213

184214
} // namespace detail
185215
} // namespace _V1

sycl/source/detail/config.hpp

Lines changed: 6 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -714,47 +714,14 @@ template <> class SYCLConfig<SYCL_CACHE_TRACE> {
714714
enum TraceBitmask { DiskCache = 1, InMemCache = 2, KernelCompiler = 4 };
715715

716716
public:
717-
static unsigned int get() { return getCachedValue(); }
718-
static void reset() { (void)getCachedValue(true); }
719-
static bool isTraceDiskCache() {
720-
return getCachedValue() & TraceBitmask::DiskCache;
721-
}
722-
static bool isTraceInMemCache() {
723-
return getCachedValue() & TraceBitmask::InMemCache;
724-
}
725-
static bool isTraceKernelCompiler() {
726-
return getCachedValue() & TraceBitmask::KernelCompiler;
727-
}
717+
static unsigned int get() { return Level; }
718+
static void reset();
719+
static bool isTraceDiskCache() { return Level & DiskCache; }
720+
static bool isTraceInMemCache() { return Level & InMemCache; }
721+
static bool isTraceKernelCompiler() { return Level & KernelCompiler; }
728722

729723
private:
730-
static unsigned int getCachedValue(bool ResetCache = false) {
731-
const auto Parser = []() {
732-
const char *ValStr = BaseT::getRawValue();
733-
int intVal = 0;
734-
735-
if (ValStr) {
736-
try {
737-
intVal = std::stoi(ValStr);
738-
} catch (...) {
739-
// If the value is not null and not a number, it is considered
740-
// to enable disk cache tracing. This is the legacy behavior.
741-
intVal = 1;
742-
}
743-
}
744-
745-
// Legacy behavior.
746-
if (intVal > 7)
747-
intVal = 1;
748-
749-
return intVal;
750-
};
751-
752-
static unsigned int Level = Parser();
753-
if (ResetCache)
754-
Level = Parser();
755-
756-
return Level;
757-
}
724+
static unsigned int Level;
758725
};
759726

760727
// SYCL_IN_MEM_CACHE_EVICTION_THRESHOLD accepts an integer that specifies

sycl/source/detail/kernel_program_cache.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,18 @@
1212
namespace sycl {
1313
inline namespace _V1 {
1414
namespace detail {
15+
16+
void KernelProgramCache::traceKernelImpl(const char *Msg,
17+
KernelNameStrRefT KernelName,
18+
bool IsFastKernelCache) {
19+
std::string Identifier =
20+
"[IsFastCache: " + std::to_string(IsFastKernelCache) +
21+
"][Key:{Name = " + KernelName.data() + "}]: ";
22+
23+
std::cerr << "[In-Memory Cache][Thread Id:" << std::this_thread::get_id()
24+
<< "][Kernel Cache]" << Identifier << Msg << std::endl;
25+
}
26+
1527
adapter_impl &KernelProgramCache::getAdapter() {
1628
return MParentContext.getAdapter();
1729
}

sycl/source/detail/kernel_program_cache.hpp

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -361,21 +361,16 @@ class KernelProgramCache {
361361
<< "][Program Cache]" << Identifier << Msg << std::endl;
362362
}
363363

364+
static void traceKernelImpl(const char *Msg, KernelNameStrRefT KernelName,
365+
bool IsFastKernelCache);
366+
364367
// Sends message to std:cerr stream when SYCL_CACHE_TRACE environemnt is
365368
// set.
366-
template <typename MsgType>
367-
static inline void traceKernel(const MsgType &Msg,
368-
KernelNameStrRefT KernelName,
369-
bool IsFastKernelCache = false) {
370-
if (!SYCLConfig<SYCL_CACHE_TRACE>::isTraceInMemCache())
371-
return;
372-
373-
std::string Identifier =
374-
"[IsFastCache: " + std::to_string(IsFastKernelCache) +
375-
"][Key:{Name = " + KernelName.data() + "}]: ";
376-
377-
std::cerr << "[In-Memory Cache][Thread Id:" << std::this_thread::get_id()
378-
<< "][Kernel Cache]" << Identifier << Msg << std::endl;
369+
static void traceKernel(const char *Msg, KernelNameStrRefT KernelName,
370+
bool isFastKernelCache = false) {
371+
if (__builtin_expect(SYCLConfig<SYCL_CACHE_TRACE>::isTraceInMemCache(),
372+
false))
373+
traceKernelImpl(Msg, KernelName, isFastKernelCache);
379374
}
380375

381376
Locked<ProgramCache> acquireCachedPrograms() {
@@ -513,7 +508,8 @@ class KernelProgramCache {
513508
auto LockedCacheKP = acquireKernelsPerProgramCache();
514509
// List kernels that are to be removed from the cache, if tracing is
515510
// enabled.
516-
if (SYCLConfig<SYCL_CACHE_TRACE>::isTraceInMemCache()) {
511+
if (__builtin_expect(SYCLConfig<SYCL_CACHE_TRACE>::isTraceInMemCache(),
512+
false)) {
517513
for (const auto &Kernel : LockedCacheKP.get()[NativePrg])
518514
traceKernel("Kernel evicted.", Kernel.first);
519515
}

sycl/source/detail/persistent_device_code_cache.hpp

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -222,19 +222,17 @@ class PersistentDeviceCodeCache {
222222

223223
/* Sends message to std:cerr stream when SYCL_CACHE_TRACE environemnt is set*/
224224
static void trace(const std::string &msg, const std::string &path = "") {
225-
static const bool traceEnabled =
226-
SYCLConfig<SYCL_CACHE_TRACE>::isTraceDiskCache();
227-
if (traceEnabled) {
225+
if (__builtin_expect(SYCLConfig<SYCL_CACHE_TRACE>::isTraceDiskCache(),
226+
false)) {
228227
auto outputPath = path;
229228
std::replace(outputPath.begin(), outputPath.end(), '\\', '/');
230229
std::cerr << "[Persistent Cache]: " << msg << outputPath << std::endl;
231230
}
232231
}
233232
static void trace_KernelCompiler(const std::string &msg,
234233
const std::string &path = "") {
235-
static const bool traceEnabled =
236-
SYCLConfig<SYCL_CACHE_TRACE>::isTraceKernelCompiler();
237-
if (traceEnabled) {
234+
if (__builtin_expect(SYCLConfig<SYCL_CACHE_TRACE>::isTraceKernelCompiler(),
235+
false)) {
238236
auto outputPath = path;
239237
std::replace(outputPath.begin(), outputPath.end(), '\\', '/');
240238
std::cerr << "[kernel_compiler Persistent Cache]: " << msg << outputPath

sycl/unittests/config/ConfigTests.cpp

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@ TEST(ConfigTests, CheckConfigProcessing) {
2828
File.close();
2929
}
3030
try {
31-
sycl::detail::SYCLConfig<sycl::detail::SYCL_DEVICE_ALLOWLIST>::get();
32-
throw std::logic_error("sycl::exception didn't throw");
31+
sycl::detail::readConfig(true);
32+
throw std::logic_error("sycl::exception didn't throw 1");
3333
} catch (sycl::exception &e) {
3434
EXPECT_EQ(
3535
std::string(
@@ -46,8 +46,8 @@ TEST(ConfigTests, CheckConfigProcessing) {
4646
File.close();
4747
}
4848
try {
49-
sycl::detail::SYCLConfig<sycl::detail::SYCL_DEVICE_ALLOWLIST>::get();
50-
throw std::logic_error("sycl::exception didn't throw");
49+
sycl::detail::readConfig(true);
50+
throw std::logic_error("sycl::exception didn't throw 2");
5151
} catch (sycl::exception &e) {
5252
EXPECT_EQ(
5353
std::string(
@@ -64,8 +64,8 @@ TEST(ConfigTests, CheckConfigProcessing) {
6464
File.close();
6565
}
6666
try {
67-
sycl::detail::SYCLConfig<sycl::detail::SYCL_DEVICE_ALLOWLIST>::get();
68-
throw std::logic_error("sycl::exception didn't throw");
67+
sycl::detail::readConfig(true);
68+
throw std::logic_error("sycl::exception didn't throw 3");
6969
} catch (sycl::exception &e) {
7070
EXPECT_EQ(
7171
std::string(
@@ -82,8 +82,8 @@ TEST(ConfigTests, CheckConfigProcessing) {
8282
File.close();
8383
}
8484
try {
85-
sycl::detail::SYCLConfig<sycl::detail::SYCL_DEVICE_ALLOWLIST>::get();
86-
throw std::logic_error("sycl::exception didn't throw");
85+
sycl::detail::readConfig(true);
86+
throw std::logic_error("sycl::exception didn't throw 4");
8787
} catch (sycl::exception &e) {
8888
EXPECT_EQ(
8989
std::string(
@@ -103,8 +103,8 @@ TEST(ConfigTests, CheckConfigProcessing) {
103103
File.close();
104104
}
105105
try {
106-
sycl::detail::SYCLConfig<sycl::detail::SYCL_DEVICE_ALLOWLIST>::get();
107-
throw std::logic_error("sycl::exception didn't throw");
106+
sycl::detail::readConfig(true);
107+
throw std::logic_error("sycl::exception didn't throw 5");
108108
} catch (sycl::exception &e) {
109109
EXPECT_TRUE(std::regex_match(
110110
e.what(),
@@ -121,8 +121,8 @@ TEST(ConfigTests, CheckConfigProcessing) {
121121
File.close();
122122
}
123123
try {
124-
sycl::detail::SYCLConfig<sycl::detail::SYCL_DEVICE_ALLOWLIST>::get();
125-
throw std::logic_error("sycl::exception didn't throw");
124+
sycl::detail::readConfig(true);
125+
throw std::logic_error("sycl::exception didn't throw 6");
126126
} catch (sycl::exception &e) {
127127
EXPECT_TRUE(std::regex_match(
128128
e.what(), std::regex("Variable name is more than ([\\d]+) or less "
@@ -142,8 +142,8 @@ TEST(ConfigTests, CheckConfigProcessing) {
142142
File.close();
143143
}
144144
try {
145-
sycl::detail::SYCLConfig<sycl::detail::SYCL_DEVICE_ALLOWLIST>::get();
146-
throw std::logic_error("sycl::exception didn't throw");
145+
sycl::detail::readConfig(true);
146+
throw std::logic_error("sycl::exception didn't throw 7");
147147
} catch (sycl::exception &e) {
148148
EXPECT_TRUE(std::regex_match(
149149
e.what(), std::regex("The value contains more than ([\\d]+) characters "
@@ -159,8 +159,8 @@ TEST(ConfigTests, CheckConfigProcessing) {
159159
File.close();
160160
}
161161
try {
162-
sycl::detail::SYCLConfig<sycl::detail::SYCL_DEVICE_ALLOWLIST>::get();
163-
throw std::logic_error("sycl::exception didn't throw");
162+
sycl::detail::readConfig(true);
163+
throw std::logic_error("sycl::exception didn't throw 8");
164164
} catch (sycl::exception &e) {
165165
EXPECT_TRUE(std::regex_match(
166166
e.what(), std::regex("The value contains more than ([\\d]+) characters "
@@ -176,8 +176,8 @@ TEST(ConfigTests, CheckConfigProcessing) {
176176
File.close();
177177
}
178178
try {
179-
sycl::detail::SYCLConfig<sycl::detail::SYCL_DEVICE_ALLOWLIST>::get();
180-
throw std::logic_error("sycl::exception didn't throw");
179+
sycl::detail::readConfig(true);
180+
throw std::logic_error("sycl::exception didn't throw 9");
181181
} catch (sycl::exception &e) {
182182
EXPECT_TRUE(std::regex_match(
183183
e.what(), std::regex("The value contains more than ([\\d]+) characters "

0 commit comments

Comments
 (0)