diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td
index 15aff0c95a154..26eb7c12b6203 100644
--- a/clang/include/clang/Options/Options.td
+++ b/clang/include/clang/Options/Options.td
@@ -7609,6 +7609,14 @@ let Visibility = [SYCLRTCOnlyOption] in {
: Joined<["--"], "persistent-auto-pch=">,
HelpText<"Use Persistent Auto-PCH cache located at
for SYCL "
"RTC Compilation">;
+ def sycl_rtc_use_system_includes
+ : Flag<["--"], "sycl-rtc-use-system-includes">,
+ HelpText<"Use system includes instead of in-memory libcxx/libc">;
+ def sycl_rtc_in_memory_fs_only
+ : Flag<["--"], "sycl-rtc-in-memory-fs-only">,
+ HelpText<"Disable real filesystem access for SYCL RTC compilation, "
+ "debugging/testing only">,
+ Flags<[HelpHidden]>;
} // let Group = sycl_rtc_only_Group
} // let Visibility = [SYCLRTCOnlyOption]
diff --git a/clang/lib/Headers/mm_malloc.h b/clang/lib/Headers/mm_malloc.h
index d32fe59416277..86ed5f1930815 100644
--- a/clang/lib/Headers/mm_malloc.h
+++ b/clang/lib/Headers/mm_malloc.h
@@ -12,7 +12,8 @@
#include
-#ifdef _WIN32
+#if defined(_WIN32) && \
+ !(defined(__SYCL_DEVICE_ONLY__) && defined(__LLVM_LIBC__))
#include
#else
#ifndef __cplusplus
@@ -41,7 +42,8 @@ _mm_malloc(size_t __size, size_t __align) {
void *__mallocedMemory;
#if defined(__MINGW32__)
__mallocedMemory = __mingw_aligned_malloc(__size, __align);
-#elif defined(_WIN32)
+#elif defined(_WIN32) && \
+ !(defined(__SYCL_DEVICE_ONLY__) && defined(__LLVM_LIBC__))
__mallocedMemory = _aligned_malloc(__size, __align);
#else
if (posix_memalign(&__mallocedMemory, __align, __size))
@@ -56,7 +58,8 @@ _mm_free(void *__p)
{
#if defined(__MINGW32__)
__mingw_aligned_free(__p);
-#elif defined(_WIN32)
+#elif defined(_WIN32) && \
+ !(defined(__SYCL_DEVICE_ONLY__) && defined(__LLVM_LIBC__))
_aligned_free(__p);
#else
free(__p);
diff --git a/libc/include/CMakeLists.txt b/libc/include/CMakeLists.txt
index 7b52daf9a8062..8224f27a27313 100644
--- a/libc/include/CMakeLists.txt
+++ b/libc/include/CMakeLists.txt
@@ -859,6 +859,8 @@ foreach(target IN LISTS all_install_header_targets)
endforeach()
if(LLVM_LIBC_FULL_BUILD)
+ add_custom_target(generate-libc-headers
+ DEPENDS libc-headers)
add_custom_target(install-libc-headers
DEPENDS libc-headers
COMMAND "${CMAKE_COMMAND}"
diff --git a/libcxx/include/__config b/libcxx/include/__config
index 357f77b7d27d6..7d1e997623002 100644
--- a/libcxx/include/__config
+++ b/libcxx/include/__config
@@ -274,7 +274,7 @@ _LIBCPP_HARDENING_MODE_DEBUG
# define _LIBCPP_MSVCRT_LIKE
// If mingw not explicitly detected, assume using MS C runtime only if
// a MS compatibility version is specified.
-# if defined(_MSC_VER) && !defined(__MINGW32__)
+# if defined(_MSC_VER) && !defined(__MINGW32__) && !defined(_LIBCPP_NO_VCRUNTIME)
# define _LIBCPP_MSVCRT // Using Microsoft's C Runtime library
# endif
# if (defined(_M_AMD64) || defined(__x86_64__)) || (defined(_M_ARM) || defined(__arm__))
@@ -911,7 +911,7 @@ typedef __char32_t char32_t;
# endif
# if defined(__BIONIC__) || defined(__NuttX__) || defined(__Fuchsia__) || defined(__wasi__) || \
- _LIBCPP_HAS_MUSL_LIBC || defined(__OpenBSD__) || defined(__LLVM_LIBC__)
+ _LIBCPP_HAS_MUSL_LIBC || defined(__OpenBSD__) || defined(__LLVM_LIBC__) || defined(__SYCL_DEVICE_ONLY__)
# define _LIBCPP_PROVIDES_DEFAULT_RUNE_TABLE
# endif
diff --git a/libcxx/include/__locale_dir/locale_base_api.h b/libcxx/include/__locale_dir/locale_base_api.h
index 8c8f00061d1ed..07537491cc26b 100644
--- a/libcxx/include/__locale_dir/locale_base_api.h
+++ b/libcxx/include/__locale_dir/locale_base_api.h
@@ -110,8 +110,9 @@
// }
#if _LIBCPP_HAS_LOCALIZATION
-
-# if defined(__APPLE__)
+# if defined(__SYCL_DEVICE_ONLY__)
+# include <__locale_dir/support/fuchsia.h> // no_locale
+# elif defined(__APPLE__)
# include <__locale_dir/support/apple.h>
# elif defined(__FreeBSD__)
# include <__locale_dir/support/freebsd.h>
diff --git a/sycl-jit/jit-compiler/CMakeLists.txt b/sycl-jit/jit-compiler/CMakeLists.txt
index 30ef7f7205dd2..ccc01121588d3 100644
--- a/sycl-jit/jit-compiler/CMakeLists.txt
+++ b/sycl-jit/jit-compiler/CMakeLists.txt
@@ -39,6 +39,60 @@ set(SYCL_JIT_RESOURCE_DEPS ${SYCL_JIT_RESOURCE_INSTALL_COMPONENTS})
# OpenCL-Headers doesn't have a corresponding build target:
list(FILTER SYCL_JIT_RESOURCE_DEPS EXCLUDE REGEX "^OpenCL-Headers$")
+# We also want to embed LLVM's libc/libcxx headers into resource. We don't want
+# to use them through LLVM_ENABLE_RUNTIMES for a few reasons though:
+# * We configure them in a way that might be incompatible with their normal
+# usage
+# * We don't want to include them in all/install targets
+# As such, configure libc/libcxx via explicit `llvm_ExternalProject_Add` in a
+# separate location.
+set(SYCL_JIT_RUNTIME_PROJECTS "libc;libcxx")
+if (NOT WIN32)
+ list(APPEND SYCL_JIT_RUNTIME_PROJECTS libcxxabi libunwind)
+endif()
+
+# Couldn't pass -DLLVM_ENABLE_RUNTIMES= through CMAKE_ARGS
+# below because semicolon is used as a separate for CMAKE_ARGS itself.
+# Workaround by passing it through PASSTHROUGH_PREFIXES by saving/restoring that
+# variable's original value.
+set(SYCL_JIT_LLVM_ENABLE_RUNTIMES_COPY ${LLVM_ENABLE_RUNTIMES})
+set(LLVM_ENABLE_RUNTIMES ${SYCL_JIT_RUNTIME_PROJECTS})
+llvm_ExternalProject_Add(sycl-jit-extra-headers
+ ${CMAKE_CURRENT_SOURCE_DIR}/../../runtimes
+ CMAKE_ARGS -DCOMPILER_RT_BUILD_BUILTINS=Off
+ -DLLVM_INCLUDE_TESTS=Off
+ -DLLVM_DEFAULT_TARGET_TRIPLE=${LLVM_TARGET_TRIPLE}
+ -DLLVM_ENABLE_PROJECTS_USED=${LLVM_ENABLE_PROJECTS_USED}
+ -DLLVM_ENABLE_PER_TARGET_RUNTIME_DIR=${LLVM_ENABLE_PER_TARGET_RUNTIME_DIR}
+ -DLLVM_BUILD_TOOLS=${LLVM_BUILD_TOOLS}
+ -DCMAKE_C_COMPILER_WORKS=ON
+ -DCMAKE_CXX_COMPILER_WORKS=ON
+ -DCMAKE_Fortran_COMPILER_WORKS=ON
+ -DCMAKE_ASM_COMPILER_WORKS=ON
+ # libc config options:
+ -DLLVM_LIBC_FULL_BUILD=ON
+ -DLLVM_LIBC_ALL_HEADERS=1
+ -DLIBC_CONFIG_PATH=${CMAKE_CURRENT_SOURCE_DIR}/lib/libc-config
+ # libcxx config options:
+ -DLIBCXX_HAS_EXTERNAL_THREAD_API=ON
+ TARGET_TRIPLE ${LLVM_TARGET_TRIPLE}
+ USE_TOOLCHAIN
+ PASSTHROUGH_PREFIXES LLVM_ENABLE_RUNTIMES
+ EXCLUDE_FROM_ALL
+ NO_INSTALL
+ )
+set(LLVM_ENABLE_RUNTIMES ${SYCL_JIT_LLVM_ENABLE_RUNTIMES_COPY})
+list(APPEND SYCL_JIT_RESOURCE_DEPS sycl-jit-extra-headers-configure)
+list(APPEND SYCL_JIT_PREPARE_RESOURCE_COMMANDS
+ # libc
+ COMMAND ${CMAKE_COMMAND} --build ${CMAKE_BINARY_DIR}/tools/sycl-jit/jit-compiler/sycl-jit-extra-headers-bins --target generate-libc-headers
+ COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_BINARY_DIR}/tools/sycl-jit/jit-compiler/sycl-jit-extra-headers-bins/libc/include ${SYCL_JIT_RESOURCE_INSTALL_DIR}/include/libc
+
+ # libcxx
+ COMMAND ${CMAKE_COMMAND} --build ${CMAKE_BINARY_DIR}/tools/sycl-jit/jit-compiler/sycl-jit-extra-headers-bins --target generate-cxx-headers
+ COMMAND ${CMAKE_COMMAND} --install ${CMAKE_BINARY_DIR}/tools/sycl-jit/jit-compiler/sycl-jit-extra-headers-bins --prefix ${SYCL_JIT_RESOURCE_INSTALL_DIR} --component cxx-headers
+)
+
# This is very hacky and I don't quite know what I'm doing, but it's necessary
# to have `resource.cpp` re-generated/re-built when some SYCL header changes.
#
diff --git a/sycl-jit/jit-compiler/lib/libc-config/entrypoints.txt b/sycl-jit/jit-compiler/lib/libc-config/entrypoints.txt
new file mode 100644
index 0000000000000..05e1f4aa2df8c
--- /dev/null
+++ b/sycl-jit/jit-compiler/lib/libc-config/entrypoints.txt
@@ -0,0 +1,5 @@
+if(EXISTS "${LIBC_SOURCE_DIR}/config/${LIBC_TARGET_OS}/${LIBC_TARGET_ARCHITECTURE}/entrypoints.txt")
+ include("${LIBC_SOURCE_DIR}/config/${LIBC_TARGET_OS}/${LIBC_TARGET_ARCHITECTURE}/entrypoints.txt")
+else()
+ include("${LIBC_SOURCE_DIR}/config/${LIBC_TARGET_OS}/entrypoints.txt")
+endif()
diff --git a/sycl-jit/jit-compiler/lib/libc-config/headers.txt b/sycl-jit/jit-compiler/lib/libc-config/headers.txt
new file mode 100644
index 0000000000000..72ae7d85517b9
--- /dev/null
+++ b/sycl-jit/jit-compiler/lib/libc-config/headers.txt
@@ -0,0 +1 @@
+include("${LIBC_SOURCE_DIR}/config/linux/x86_64/headers.txt")
diff --git a/sycl-jit/jit-compiler/lib/resource-includes/include/sycl-rtc-standalone/__external_threading b/sycl-jit/jit-compiler/lib/resource-includes/include/sycl-rtc-standalone/__external_threading
new file mode 100644
index 0000000000000..02b522468f138
--- /dev/null
+++ b/sycl-jit/jit-compiler/lib/resource-includes/include/sycl-rtc-standalone/__external_threading
@@ -0,0 +1,78 @@
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+using __libcpp_timespec_t = int;
+
+//
+// Mutex
+//
+using __libcpp_mutex_t = int;
+#define _LIBCPP_MUTEX_INITIALIZER 0
+
+using __libcpp_recursive_mutex_t = int;
+
+int __libcpp_recursive_mutex_init(__libcpp_recursive_mutex_t*);
+_LIBCPP_NO_THREAD_SAFETY_ANALYSIS int __libcpp_recursive_mutex_lock(__libcpp_recursive_mutex_t*);
+_LIBCPP_NO_THREAD_SAFETY_ANALYSIS bool __libcpp_recursive_mutex_trylock(__libcpp_recursive_mutex_t*);
+_LIBCPP_NO_THREAD_SAFETY_ANALYSIS int __libcpp_recursive_mutex_unlock(__libcpp_recursive_mutex_t*);
+int __libcpp_recursive_mutex_destroy(__libcpp_recursive_mutex_t*);
+
+_LIBCPP_NO_THREAD_SAFETY_ANALYSIS int __libcpp_mutex_lock(__libcpp_mutex_t*);
+_LIBCPP_NO_THREAD_SAFETY_ANALYSIS bool __libcpp_mutex_trylock(__libcpp_mutex_t*);
+_LIBCPP_NO_THREAD_SAFETY_ANALYSIS int __libcpp_mutex_unlock(__libcpp_mutex_t*);
+int __libcpp_mutex_destroy(__libcpp_mutex_t*);
+
+//
+// Condition Variable
+//
+using __libcpp_condvar_t = int;
+#define _LIBCPP_CONDVAR_INITIALIZER 0
+
+int __libcpp_condvar_signal(__libcpp_condvar_t*);
+int __libcpp_condvar_broadcast(__libcpp_condvar_t*);
+_LIBCPP_NO_THREAD_SAFETY_ANALYSIS int __libcpp_condvar_wait(__libcpp_condvar_t*, __libcpp_mutex_t*);
+_LIBCPP_NO_THREAD_SAFETY_ANALYSIS
+int __libcpp_condvar_timedwait(__libcpp_condvar_t*, __libcpp_mutex_t*, __libcpp_timespec_t*);
+int __libcpp_condvar_destroy(__libcpp_condvar_t*);
+
+//
+// Execute once
+//
+using __libcpp_exec_once_flag = int;
+#define _LIBCPP_EXEC_ONCE_INITIALIZER 0
+
+int __libcpp_execute_once(__libcpp_exec_once_flag*, void (*__init_routine)());
+
+//
+// Thread id
+//
+using __libcpp_thread_id = int;
+
+bool __libcpp_thread_id_equal(__libcpp_thread_id, __libcpp_thread_id);
+bool __libcpp_thread_id_less(__libcpp_thread_id, __libcpp_thread_id);
+
+//
+// Thread
+//
+#define _LIBCPP_NULL_THREAD 0
+using __libcpp_thread_t = int;
+
+bool __libcpp_thread_isnull(const __libcpp_thread_t*);
+int __libcpp_thread_create(__libcpp_thread_t*, void* (*__func)(void*), void* __arg);
+__libcpp_thread_id __libcpp_thread_get_current_id();
+__libcpp_thread_id __libcpp_thread_get_id(const __libcpp_thread_t*);
+int __libcpp_thread_join(__libcpp_thread_t*);
+int __libcpp_thread_detach(__libcpp_thread_t*);
+void __libcpp_thread_yield();
+void __libcpp_thread_sleep_for(const chrono::nanoseconds&);
+
+//
+// Thread local storage
+//
+#define _LIBCPP_TLS_DESTRUCTOR_CC 0
+using __libcpp_tls_key = int;
+
+int __libcpp_tls_create(__libcpp_tls_key*, void (*__at_exit)(void*));
+void* __libcpp_tls_get(__libcpp_tls_key);
+int __libcpp_tls_set(__libcpp_tls_key, void*);
+
+_LIBCPP_END_NAMESPACE_STD
diff --git a/sycl-jit/jit-compiler/lib/resource-includes/include/sycl-rtc-standalone/linux/errno.h b/sycl-jit/jit-compiler/lib/resource-includes/include/sycl-rtc-standalone/linux/errno.h
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp
index 192e8e76a06cb..4d5c141f1b019 100644
--- a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp
+++ b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp
@@ -262,6 +262,103 @@ class SYCLToolchain {
DAL.AddJoinedArg(nullptr, OptTable.getOption(OPT_offload_arch_EQ), CPU);
}
+ // Reasons why the following is done here and not in the clang driver:
+ //
+ // 1) Unlike libcxx, upstream libc is installed directly into
+ // `/include` or `//include` together with
+ // other compiler headers meaning we can't magically turn it on or off
+ // (unless we introduce a dedicated VFS overlay just for libc).
+ // 2) Having multiple C libraries in include search paths is unsupported,
+ // so in order to use LLVM libc we have to remove default system
+ // includes. That in turn excludes (at the very least) CUDA/HIP SDKs, so
+ // we want that behavior to be optional. That, in turn, means that
+ // because of (1) we have to have non-standard libc install location (we
+ // chose `/include/libc`) and that has no support in the
+ // clang driver, so we have to add libc headers to system include
+ // directories manually.
+ // 3) However, libcxx headers search path must come *before* libc includes,
+ // but `-isystem` and similar options prepend the list of search paths.
+ // As such, we can't just have the driver do part of the job and then
+ // adjust the behavior via extra options, so we need to maintain
+ // everything on our own.
+ // 4) We could do everything via custom code in the clang driver, but the
+ // location of `include/libc` is controlled in this `sycl-jit` project
+ // and it was slightly more convenient to implement it here, at least
+ // for the downstream implementation.
+ // 5) Once we upstream SYCL support there will be a use-case to move libc
+ // headers installation to a separate directory (similar to libcxx), at
+ // that time we might have support for this in the clang driver
+ // directly and would be able to avoid doing that here.
+
+ // Prefer using in-memory as that's friendlier for the end users of SYCL
+ // applications as that mode doesn't require any C/C++ toolchain to be
+ // installed on the system.
+ bool UseInMemoryCxxCHeaders = true;
+
+ // Unless explicitly told not to:
+ if (UserArgList.hasArg(OPT_sycl_rtc_use_system_includes))
+ UseInMemoryCxxCHeaders = false;
+
+ // CUDA/HIP need SDK headers that we can't distribute ourselves, so we have
+ // to use system includes as well:
+ if (Format == BinaryFormat::PTX || Format == BinaryFormat::AMDGCN)
+ UseInMemoryCxxCHeaders = false;
+
+ if (UseInMemoryCxxCHeaders) {
+ DAL.AddFlagArg(nullptr, OptTable.getOption(OPT_nostdlibinc));
+ auto AddInc = [&](auto RelPath) {
+ DAL.AddJoinedArg(nullptr, OptTable.getOption(OPT_isystem),
+ (getPrefix() + RelPath).str());
+ };
+ // Must come before C/C++ headers as we're intercepting them in those
+ // wrappers:
+ AddInc("include/sycl/stl_wrappers");
+ // Extra headers we provide as part of jit-compiler, e.g.
+ // `__external_threading` and `linux/errno.h` that are needed to make
+ // LLVM's libc/libcxx work. As far as I know, can be anywhere in the
+ // includes search path as those files aren't provide anywhere else.
+ AddInc("include/sycl-rtc-standalone/");
+#if !defined(_WIN32)
+ // On Windows `LIBCXX_GENERATED_INCLUDE_TARGET_DIR` is off and thus we
+ // don't need this.
+ AddInc("include/x86_64-unknown-linux-gnu/c++/v1");
+#endif
+ // libcxx headers, must come before libc headers:
+ AddInc("include/c++/v1");
+ // libc headers, our (SYCL RTC) custom non-standard location:
+ AddInc("include/libc");
+ // SYCL/SYCL-related headers actually, because `` and not
+ // just ``. Can be argued that actual installation layout should
+ // actually be `include/sycl/ur_api.h` and `include/sycl/sycl/sycl.hpp`
+ // but that's outside the SYCL RTC scope. I think any relative order in
+ // relation to libcxx/libc is allowed.
+ AddInc("include/");
+ // NOTE: `include/lib/clang//include/` is added automatically (we
+ // use `--nostdlibinc` and not `--nostdinc`).
+
+ DAL.AddJoinedArg(nullptr, OptTable.getOption(OPT_D),
+ "_LIBCPP_REMOVE_TRANSITIVE_INCLUDES");
+#if defined(_WIN32)
+ // LLVM's libc implements very limited number of entrypoints on WIN,
+ // almost to be unusable, so nobody actually cares about using libcxx over
+ // LLVM libc on that platform. We only use declaration and not definition
+ // so we force libc to generate more header/entrypoints but it's not
+ // working well by default. Options below were find by trial-and-error.
+ DAL.AddJoinedArg(nullptr, OptTable.getOption(OPT_D),
+ "_LIBCPP_WCHAR_H_HAS_CONST_OVERLOADS");
+ DAL.AddJoinedArg(nullptr, OptTable.getOption(OPT_D),
+ "_LIBCPP_NO_VCRUNTIME");
+ DAL.AddJoinedArg(nullptr, OptTable.getOption(OPT_U), "__ELF__");
+
+#endif
+ // Similarly to Windows case above, libcxx over libc isn't fully
+ // supported upstream, even on Linux. Faced some errors (mostly around
+ // `_LIBCPP_USING_IF_EXISTS`) if the files below aren't included early:
+ DAL.AddJoinedArg(nullptr, OptTable.getOption(OPT_include), "stdio.h");
+ DAL.AddJoinedArg(nullptr, OptTable.getOption(OPT_include), "wchar.h");
+ DAL.AddJoinedArg(nullptr, OptTable.getOption(OPT_include), "time.h");
+ }
+
ArgStringList ASL;
for (Arg *A : DAL)
A->render(DAL, ASL);
@@ -543,9 +640,15 @@ class SYCLToolchain {
std::vector CommandLine =
createCommandLine(UserArgList, Format, SourceFilePath);
- auto FS = llvm::makeIntrusiveRefCnt(
- llvm::vfs::getRealFileSystem());
- FS->pushOverlay(getToolchainFS());
+ llvm::IntrusiveRefCntPtr FS;
+ if (UserArgList.hasArg(OPT_sycl_rtc_in_memory_fs_only)) {
+ FS = llvm::makeIntrusiveRefCnt(
+ getToolchainFS());
+ } else {
+ FS = llvm::makeIntrusiveRefCnt(
+ llvm::vfs::getRealFileSystem());
+ FS->pushOverlay(getToolchainFS());
+ }
if (FSOverlay)
FS->pushOverlay(std::move(FSOverlay));
diff --git a/sycl-jit/jit-compiler/utils/generate.py b/sycl-jit/jit-compiler/utils/generate.py
index 449723b8fa268..941d95beb440f 100644
--- a/sycl-jit/jit-compiler/utils/generate.py
+++ b/sycl-jit/jit-compiler/utils/generate.py
@@ -32,11 +32,11 @@ def main():
const resource_file ToolchainFiles[] = {"""
)
- def process_file(file_path):
+ def process_file(file_path, relative_to):
out.write(
f"""
{{
- {{"{args.prefix}{os.path.relpath(file_path, toolchain_dir).replace(os.sep, "/")}"}} ,
+ {{"{args.prefix}{os.path.relpath(file_path, relative_to).replace(os.sep, "/")}"}} ,
[]() {{
static const char data[] = {{
#embed "{file_path}" if_empty(0)
@@ -50,9 +50,17 @@ def process_dir(dir):
for root, _, files in os.walk(dir):
for file in files:
file_path = os.path.join(root, file)
- process_file(file_path)
+ process_file(file_path, dir)
process_dir(args.toolchain_dir)
+ process_dir(
+ os.path.realpath(
+ os.path.join(
+ os.path.dirname(os.path.realpath(__file__)),
+ "../lib/resource-includes/",
+ )
+ )
+ )
out.write(
f"""
diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc
index 261fc96f0676f..f822d302bdb92 100644
--- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc
+++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc
@@ -11,6 +11,7 @@
:lang: en
:dpcpp: pass:[DPC++]
:cpp: pass:[C++]
+:libcxx: pass:[libc++]
:endnote: —{nbsp}end{nbsp}note
// Set the default source code type in this document to C++,
@@ -1046,7 +1047,6 @@ sycl::kernel k_float = kb.ext_oneapi_get_kernel("bartmpl");
sycl::kernel k_int = kb.ext_oneapi_get_kernel("bartmpl");
----
-
== Examples
=== Simple example
@@ -1199,6 +1199,44 @@ this extension.
== Non-normative implementation notes for {dpcpp}
+=== C/{cpp} header files and limitations when the language is sycl
+
+By default, the SYCL runtime compiler uses a self-contained set of C and {cpp}
+header files when compiling kernels in the `sycl` language. This means that SYCL
+applications using this feature can be run even on a system that does not have
+these headers installed. However, the self contained header files may not be the
+same as the C and {cpp} header files that were used to build the host part of
+the SYCL application. As a result, there are additional limitations around data
+that is shared between the host part of the application and the kernel. These
+limitations apply to arguments that are passed to the kernel and also to data
+shared through USM or through accessors. Additionally, other header files might
+be installed in the same location as system C headers (e.g., `/usr/include/`).
+Those will not be available as well.
+
+Types that are defined by the compiler (e.g. fundamental types like `int` and
+`float`) are guaranteed to have the same representation and alignment
+requirements in both the host compiler and in the compiler used to compile the
+kernel. Therefore data using these types can be safely shared. However, types
+defined by the C or {cpp} library (e.g. types in the `std` namespace) are not
+guaranteed to be the same, so data defined using these types cannot be safely
+shared. There are a few specific exceptions to this limitation. The following C
+/ {cpp} types are guaranteed to have the same representation and alignment
+requirements, so data defined as these types can be safely shared:
+
+* The following fixed-width integer types: `int8_t`, `int16_t`, `int32_t`,
+`int64_t`, `uint8_t`, `uint16_t`, `uint32_t`, `uint64_t`.
+* The types `size_t` and `ptrdiff_t`.
+
+Array and class types defined by your application are safe to share so long as
+the element types are safe. Pointer types are safe to share so long as the
+pointed-at type is safe. Enumeration types defined by your application are also
+safe.
+
+It is also possible to use the system C and C++ headers instead of the
+self-contained versions. See the description of the
+`--sycl-rtc-use-system-includes` option for more details.
+
+
=== Supported `build_options` when the language is `sycl`
The SYCL runtime compiler supports the following {dpcpp} options to be passed in
@@ -1351,6 +1389,12 @@ Some notes about the current behavior:
using the conflicting hash would proceed without pre-compiled preamble support
as if this option wasn't enabled.
+==== `--sycl-rtc-use-system-includes`
+
+Force usage of system C/C++ headers instead of the self-containted versions.
+Option has no effect if the target defaults to using system headers by
+default.
+
=== Known issues and limitations when the language is `sycl`
==== Changing the compiler action or output
diff --git a/sycl/include/sycl/builtins.hpp b/sycl/include/sycl/builtins.hpp
index 0aa48c6992525..a8b6597b3be44 100644
--- a/sycl/include/sycl/builtins.hpp
+++ b/sycl/include/sycl/builtins.hpp
@@ -27,8 +27,13 @@ extern __DPCPP_SYCL_EXTERNAL_LIBC char *strncpy(char *dest, const char *src,
extern __DPCPP_SYCL_EXTERNAL_LIBC int strcmp(const char *s1, const char *s2);
extern __DPCPP_SYCL_EXTERNAL_LIBC int strncmp(const char *s1, const char *s2,
size_t n);
+#ifdef __LLVM_LIBC__
+extern __DPCPP_SYCL_EXTERNAL_LIBC int rand() noexcept;
+extern __DPCPP_SYCL_EXTERNAL_LIBC void srand(unsigned int seed) noexcept;
+#else
extern __DPCPP_SYCL_EXTERNAL_LIBC int rand();
extern __DPCPP_SYCL_EXTERNAL_LIBC void srand(unsigned int seed);
+#endif
extern __DPCPP_SYCL_EXTERNAL long long int __imf_llmax(long long int x,
long long int y);
extern __DPCPP_SYCL_EXTERNAL long long int __imf_llmin(long long int x,
diff --git a/sycl/include/sycl/detail/os_util.hpp b/sycl/include/sycl/detail/os_util.hpp
index 5fa8e02f1bb7d..31c109c394e4e 100644
--- a/sycl/include/sycl/detail/os_util.hpp
+++ b/sycl/include/sycl/detail/os_util.hpp
@@ -70,7 +70,10 @@ class __SYCL_EXPORT OSUtil {
/// Checks if specified path is present.
static bool isPathPresent(const std::string &Path) {
-#ifdef __SYCL_RT_OS_WINDOWS
+#ifdef __SYCL_DEVICE_ONLY__
+ (void)Path;
+ return false;
+#elif defined(__SYCL_RT_OS_WINDOWS)
struct _stat Stat;
return !_stat(Path.c_str(), &Stat);
#else
diff --git a/sycl/test-e2e/KernelCompiler/in_memory_only.cpp b/sycl/test-e2e/KernelCompiler/in_memory_only.cpp
new file mode 100644
index 0000000000000..601b1072cffce
--- /dev/null
+++ b/sycl/test-e2e/KernelCompiler/in_memory_only.cpp
@@ -0,0 +1,74 @@
+// RUN: %{build} -o %t.out
+
+// Make sure that debug/test-only option `--sycl-rtc-in-memory-fs-only` works
+// RUN: %{run} not %t.out --sycl-rtc-in-memory-fs-only --sycl-rtc-use-system-includes | FileCheck %s --check-prefix CHECK-ERROR
+// CHECK-ERROR-LABEL: Device compilation failed
+// CHECK-ERROR-NEXT: Detailed information:
+// CHECK-ERROR: In file included from rtc_0.cpp:2:
+// CHECK-ERROR-NEXT: In file included from {{.*}}/sycl-jit-toolchain//bin/../include/sycl/sycl.hpp:38:
+// CHECK-ERROR-NEXT: In file included from {{.*}}/sycl-jit-toolchain//bin/../include/sycl/detail/core.hpp:21:
+// CHECK-ERROR-NEXT: In file included from {{.*}}/sycl-jit-toolchain//bin/../include/sycl/accessor.hpp:11:
+// CHECK-ERROR-NEXT: {{.*}}/sycl-jit-toolchain//bin/../include/sycl/access/access.hpp:14:10: fatal error: 'type_traits' file not found
+// CHECK-ERROR-NEXT: 14 | #include
+// CHECK-ERROR-NEXT: | ^~~~~~~~~~~~~
+
+// Extra check that our in-memory libcxx/libc headers can really work on a
+// system with no C/C++ toolchain:
+// RUN: %{run} %t.out --sycl-rtc-in-memory-fs-only
+
+// CUDA/HIP have SDK dependencies but exclude system includes so those aren't
+// satisfied.
+// REQUIRES: target-spir
+
+#include
+#include
+#include
+
+namespace syclexp = sycl::ext::oneapi::experimental;
+
+int main(int argc, char *argv[]) {
+ sycl::queue q;
+ std::string source = R"""(
+ #include
+ namespace syclext = sycl::ext::oneapi;
+ namespace syclexp = sycl::ext::oneapi::experimental;
+
+ extern "C"
+ SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::single_task_kernel))
+ void foo(int *p) {
+ *p = 42;
+ }
+)""";
+ std::vector opts;
+
+ // Without this we see stack overflows on Win, but for some reason only in
+ // `--sycl-rtc-in-memory-fs-only` mode when it should really be failing
+ // earlier.
+ opts.push_back("-fconstexpr-depth=128");
+
+ for (int i = 1; i < argc; ++i)
+ opts.emplace_back(argv[i]);
+ try {
+
+ auto kb_src = syclexp::create_kernel_bundle_from_source(
+ q.get_context(), syclexp::source_language::sycl, source);
+ auto kb_exe = syclexp::build(
+ kb_src, syclexp::properties{syclexp::build_options{opts}});
+ sycl::kernel krn = kb_exe.ext_oneapi_get_kernel("foo");
+ auto *p = sycl::malloc_shared(1, q);
+ q.submit([&](sycl::handler &cgh) {
+ cgh.set_args(p);
+ cgh.single_task(krn);
+ }).wait();
+ std::cout << "Result: " << *p << std::endl;
+ assert(*p == 42);
+ sycl::free(p, q);
+ } catch (const sycl::exception &e) {
+ // Make `CHECK` lines more portable between Lin/Win:
+ std::string s = e.what();
+ std::replace(s.begin(), s.end(), '\\', '/');
+
+ std::cout << s;
+ return 1;
+ }
+}
diff --git a/sycl/test-e2e/KernelCompiler/sycl_host_device_data_layout.cpp b/sycl/test-e2e/KernelCompiler/sycl_host_device_data_layout.cpp
new file mode 100644
index 0000000000000..b1a62f2e1ca3b
--- /dev/null
+++ b/sycl/test-e2e/KernelCompiler/sycl_host_device_data_layout.cpp
@@ -0,0 +1,211 @@
+// RUN: %{build} -o %t.out
+// RUN: %if hip %{ env SYCL_JIT_AMDGCN_PTX_TARGET_CPU=%{amd_arch} %} %{run} %t.out
+
+// UNSUPPORTED: target-native_cpu
+// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/20142
+
+#include
+#include
+#include
+
+#include
+#include
+#include
+#include
+#include
+
+#define STRINGIFY(x) #x
+#define EXPAND_AND_STRINGIFY(x) STRINGIFY(x)
+
+// Needs to be duplicated between host/device. @{
+
+// Comma would make preprocessor macro trickier.
+using mint3 = sycl::marray;
+
+enum E {
+ V0 = 0x12345689,
+};
+static_assert(sizeof(E) == 4);
+enum class ScopedE {
+ ScopedV0 = 0x12345689,
+};
+static_assert(sizeof(ScopedE) == 4);
+
+// }@
+
+namespace syclexp = sycl::ext::oneapi::experimental;
+int main() {
+ sycl::queue q;
+ std::string src = R"""(
+#include
+#include
+
+#include
+#include
+#include
+#include
+#include
+
+namespace syclext = sycl::ext::oneapi;
+namespace syclexp = sycl::ext::oneapi::experimental;
+
+using mint3 = sycl::marray;
+
+enum E {
+ V0 = 0x12345689,
+};
+static_assert(sizeof(E) == 4);
+enum class ScopedE {
+ ScopedV0 = 0x12345689,
+};
+static_assert(sizeof(ScopedE) == 4);
+
+extern "C"
+SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::single_task_kernel))
+void foo(TYPE *in, TYPE *out, size_t *align_out, size_t *size_out, bool *equal_out) {
+ *out = TYPE{INIT};
+ *align_out = alignof(TYPE);
+ *size_out = sizeof(TYPE);
+ auto Equal = [](const auto &lhs, const auto &rhs) {
+ using T = std::decay_t;
+ if constexpr (sycl::detail::is_vec_v || sycl::detail::is_marray_v) {
+ if (lhs.size() != rhs.size())
+ return false;
+
+ for (size_t i = 0; i < lhs.size(); ++i)
+ if (lhs[i] != rhs[i])
+ return false;
+
+ return true;
+ } else {
+ return lhs == rhs;
+ }
+ };
+ *equal_out = Equal(*in, *out);
+}
+)""";
+ auto kb_src = syclexp::create_kernel_bundle_from_source(
+ q.get_context(), syclexp::source_language::sycl, src);
+
+ auto *align = sycl::malloc_shared(1, q);
+ auto *size = sycl::malloc_shared(1, q);
+ auto *equal = sycl::malloc_shared(1, q);
+
+ auto Test = [&](auto val, auto type_str, auto init_str) {
+ using namespace std::literals::string_literals;
+
+ using type = decltype(val);
+ auto kb_exe = syclexp::build(
+ kb_src,
+ syclexp::properties{syclexp::build_options{std::vector{
+ "-DTYPE="s + type_str, "-DINIT="s + init_str}}});
+
+ sycl::kernel krn = kb_exe.ext_oneapi_get_kernel("foo");
+ auto *host = sycl::malloc_shared(1, q);
+ *host = val;
+ auto *device = sycl::malloc_shared(1, q);
+
+ q.submit([&](sycl::handler &cgh) {
+ cgh.set_args(host, device, align, size, equal);
+ cgh.single_task(krn);
+ }).wait();
+ auto Equal = [](const auto &lhs, const auto &rhs) {
+ using T = std::decay_t;
+ if constexpr (sycl::detail::is_vec_v || sycl::detail::is_marray_v) {
+ if (lhs.size() != rhs.size())
+ return false;
+
+ for (size_t i = 0; i < lhs.size(); ++i)
+ if (lhs[i] != rhs[i])
+ return false;
+
+ return true;
+ } else {
+ return lhs == rhs;
+ }
+ };
+ assert(Equal(*host, *device));
+ assert(*align == alignof(type));
+ assert(*size == sizeof(type));
+ assert(*equal == true);
+ sycl::free(host, q);
+ sycl::free(device, q);
+ };
+
+#define TEST(TYPE, INIT) \
+ Test(TYPE{INIT}, EXPAND_AND_STRINGIFY(TYPE), EXPAND_AND_STRINGIFY(INIT));
+#define TEST2(TYPE, INIT0, INIT1) \
+ Test(TYPE{INIT0, INIT1}, EXPAND_AND_STRINGIFY(TYPE), \
+ EXPAND_AND_STRINGIFY(INIT0) ", " EXPAND_AND_STRINGIFY(INIT1));
+#define TEST3(TYPE, INIT0, INIT1, INIT2) \
+ Test(TYPE{INIT0, INIT1, INIT2}, EXPAND_AND_STRINGIFY(TYPE), \
+ EXPAND_AND_STRINGIFY(INIT0) ", " EXPAND_AND_STRINGIFY( \
+ INIT1) ", " EXPAND_AND_STRINGIFY(INIT2));
+
+ TEST(size_t, 0x1122334455667788)
+ TEST(char, 0x12)
+
+ TEST(int8_t, 0x12)
+ TEST(int8_t, -0x12)
+ TEST(uint8_t, 0x12)
+
+ TEST(int16_t, 0x1234)
+ TEST(int16_t, -0x1234)
+ TEST(uint16_t, 0x1234)
+
+ TEST(int32_t, 0x12345678)
+ TEST(int32_t, -0x12345678)
+ TEST(uint32_t, 0x12345678)
+
+ TEST(int64_t, 0x1122334455667788)
+ TEST(int64_t, -0x1122334455667788)
+ TEST(uint64_t, 0x1122334455667788)
+
+ TEST(size_t, 0x1122334455667788)
+ TEST(ptrdiff_t, 0x1122334455667788)
+
+ TEST(float, 42.0f)
+ if (q.get_device().has(sycl::aspect::fp64)) {
+ TEST(double, 42.0)
+ }
+
+ TEST(sycl::half, 42.0f)
+ TEST(sycl::ext::oneapi::bfloat16, 42.0f)
+
+ TEST(sycl::range<1>, 0x1122334455667788)
+ TEST2(sycl::range<2>, 0x1122334455667788, 0x1223344556677889)
+ TEST3(sycl::range<3>, 0x1122334455667788, 0x1223344556677889,
+ 0x132435465768798A)
+
+ TEST(sycl::id<1>, 0x1122334455667788)
+ TEST2(sycl::id<2>, 0x1122334455667788, 0x1223344556677889)
+ TEST3(sycl::id<3>, 0x1122334455667788, 0x1223344556677889, 0x132435465768798A)
+
+ // Making these work with macros would be too much work:
+ Test(sycl::nd_range<1>{{0x1122334455667788}, {0x1223344556677889}},
+ "sycl::nd_range<1>", "{0x1122334455667788}, {0x1223344556677889}");
+ Test(sycl::nd_range<2>{{0x1122334455667788, 0x2132435465768798},
+ {0x1223344556677889, 0x2233445586778899}},
+ "sycl::nd_range<2>",
+ "{0x1122334455667788, 0x2132435465768798}, {0x1223344556677889, "
+ "0x2233445586778899}");
+ Test(
+ sycl::nd_range<3>{
+ {0x1122334455667788, 0x2132435465768798, 0x31525364758697A8},
+ {0x1223344556677889, 0x2233445586778899, 0x32435465768798A9}},
+ "sycl::nd_range<3>",
+ "{0x1122334455667788, 0x2132435465768798, 0x31525364758697A8}, "
+ "{0x1223344556677889, 0x2233445586778899, 0x32435465768798A9}");
+
+ TEST2(sycl::short2, 0x1234, 0x2345)
+ TEST3(sycl::short3, 0x1234, 0x2345, 0x3456)
+
+ TEST3(mint3, 0x1234, 0x2345, 0x3456)
+
+ TEST(E, V0)
+ TEST(ScopedE, ScopedE::ScopedV0)
+
+ sycl::free(align, q);
+ sycl::free(size, q);
+ sycl::free(equal, q);
+}
diff --git a/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp b/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp
index 7f3757fc70624..6d41cff6dc4cd 100644
--- a/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp
+++ b/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp
@@ -6,7 +6,7 @@
// CHECK-DAG: README.md
// CHECK-DAG: lit.cfg.py
//
-// CHECK-NUM-MATCHES: 29
+// CHECK-NUM-MATCHES: 30
//
// This test verifies that `` isn't used in E2E tests. Instead,
// fine-grained includes should used, see