diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td index 15aff0c95a154..26eb7c12b6203 100644 --- a/clang/include/clang/Options/Options.td +++ b/clang/include/clang/Options/Options.td @@ -7609,6 +7609,14 @@ let Visibility = [SYCLRTCOnlyOption] in { : Joined<["--"], "persistent-auto-pch=">, HelpText<"Use Persistent Auto-PCH cache located at for SYCL " "RTC Compilation">; + def sycl_rtc_use_system_includes + : Flag<["--"], "sycl-rtc-use-system-includes">, + HelpText<"Use system includes instead of in-memory libcxx/libc">; + def sycl_rtc_in_memory_fs_only + : Flag<["--"], "sycl-rtc-in-memory-fs-only">, + HelpText<"Disable real filesystem access for SYCL RTC compilation, " + "debugging/testing only">, + Flags<[HelpHidden]>; } // let Group = sycl_rtc_only_Group } // let Visibility = [SYCLRTCOnlyOption] diff --git a/clang/lib/Headers/mm_malloc.h b/clang/lib/Headers/mm_malloc.h index d32fe59416277..86ed5f1930815 100644 --- a/clang/lib/Headers/mm_malloc.h +++ b/clang/lib/Headers/mm_malloc.h @@ -12,7 +12,8 @@ #include -#ifdef _WIN32 +#if defined(_WIN32) && \ + !(defined(__SYCL_DEVICE_ONLY__) && defined(__LLVM_LIBC__)) #include #else #ifndef __cplusplus @@ -41,7 +42,8 @@ _mm_malloc(size_t __size, size_t __align) { void *__mallocedMemory; #if defined(__MINGW32__) __mallocedMemory = __mingw_aligned_malloc(__size, __align); -#elif defined(_WIN32) +#elif defined(_WIN32) && \ + !(defined(__SYCL_DEVICE_ONLY__) && defined(__LLVM_LIBC__)) __mallocedMemory = _aligned_malloc(__size, __align); #else if (posix_memalign(&__mallocedMemory, __align, __size)) @@ -56,7 +58,8 @@ _mm_free(void *__p) { #if defined(__MINGW32__) __mingw_aligned_free(__p); -#elif defined(_WIN32) +#elif defined(_WIN32) && \ + !(defined(__SYCL_DEVICE_ONLY__) && defined(__LLVM_LIBC__)) _aligned_free(__p); #else free(__p); diff --git a/libc/include/CMakeLists.txt b/libc/include/CMakeLists.txt index 7b52daf9a8062..8224f27a27313 100644 --- a/libc/include/CMakeLists.txt +++ b/libc/include/CMakeLists.txt @@ -859,6 +859,8 @@ foreach(target IN LISTS all_install_header_targets) endforeach() if(LLVM_LIBC_FULL_BUILD) + add_custom_target(generate-libc-headers + DEPENDS libc-headers) add_custom_target(install-libc-headers DEPENDS libc-headers COMMAND "${CMAKE_COMMAND}" diff --git a/libcxx/include/__config b/libcxx/include/__config index 357f77b7d27d6..7d1e997623002 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -274,7 +274,7 @@ _LIBCPP_HARDENING_MODE_DEBUG # define _LIBCPP_MSVCRT_LIKE // If mingw not explicitly detected, assume using MS C runtime only if // a MS compatibility version is specified. -# if defined(_MSC_VER) && !defined(__MINGW32__) +# if defined(_MSC_VER) && !defined(__MINGW32__) && !defined(_LIBCPP_NO_VCRUNTIME) # define _LIBCPP_MSVCRT // Using Microsoft's C Runtime library # endif # if (defined(_M_AMD64) || defined(__x86_64__)) || (defined(_M_ARM) || defined(__arm__)) @@ -911,7 +911,7 @@ typedef __char32_t char32_t; # endif # if defined(__BIONIC__) || defined(__NuttX__) || defined(__Fuchsia__) || defined(__wasi__) || \ - _LIBCPP_HAS_MUSL_LIBC || defined(__OpenBSD__) || defined(__LLVM_LIBC__) + _LIBCPP_HAS_MUSL_LIBC || defined(__OpenBSD__) || defined(__LLVM_LIBC__) || defined(__SYCL_DEVICE_ONLY__) # define _LIBCPP_PROVIDES_DEFAULT_RUNE_TABLE # endif diff --git a/libcxx/include/__locale_dir/locale_base_api.h b/libcxx/include/__locale_dir/locale_base_api.h index 8c8f00061d1ed..07537491cc26b 100644 --- a/libcxx/include/__locale_dir/locale_base_api.h +++ b/libcxx/include/__locale_dir/locale_base_api.h @@ -110,8 +110,9 @@ // } #if _LIBCPP_HAS_LOCALIZATION - -# if defined(__APPLE__) +# if defined(__SYCL_DEVICE_ONLY__) +# include <__locale_dir/support/fuchsia.h> // no_locale +# elif defined(__APPLE__) # include <__locale_dir/support/apple.h> # elif defined(__FreeBSD__) # include <__locale_dir/support/freebsd.h> diff --git a/sycl-jit/jit-compiler/CMakeLists.txt b/sycl-jit/jit-compiler/CMakeLists.txt index 30ef7f7205dd2..ccc01121588d3 100644 --- a/sycl-jit/jit-compiler/CMakeLists.txt +++ b/sycl-jit/jit-compiler/CMakeLists.txt @@ -39,6 +39,60 @@ set(SYCL_JIT_RESOURCE_DEPS ${SYCL_JIT_RESOURCE_INSTALL_COMPONENTS}) # OpenCL-Headers doesn't have a corresponding build target: list(FILTER SYCL_JIT_RESOURCE_DEPS EXCLUDE REGEX "^OpenCL-Headers$") +# We also want to embed LLVM's libc/libcxx headers into resource. We don't want +# to use them through LLVM_ENABLE_RUNTIMES for a few reasons though: +# * We configure them in a way that might be incompatible with their normal +# usage +# * We don't want to include them in all/install targets +# As such, configure libc/libcxx via explicit `llvm_ExternalProject_Add` in a +# separate location. +set(SYCL_JIT_RUNTIME_PROJECTS "libc;libcxx") +if (NOT WIN32) + list(APPEND SYCL_JIT_RUNTIME_PROJECTS libcxxabi libunwind) +endif() + +# Couldn't pass -DLLVM_ENABLE_RUNTIMES= through CMAKE_ARGS +# below because semicolon is used as a separate for CMAKE_ARGS itself. +# Workaround by passing it through PASSTHROUGH_PREFIXES by saving/restoring that +# variable's original value. +set(SYCL_JIT_LLVM_ENABLE_RUNTIMES_COPY ${LLVM_ENABLE_RUNTIMES}) +set(LLVM_ENABLE_RUNTIMES ${SYCL_JIT_RUNTIME_PROJECTS}) +llvm_ExternalProject_Add(sycl-jit-extra-headers + ${CMAKE_CURRENT_SOURCE_DIR}/../../runtimes + CMAKE_ARGS -DCOMPILER_RT_BUILD_BUILTINS=Off + -DLLVM_INCLUDE_TESTS=Off + -DLLVM_DEFAULT_TARGET_TRIPLE=${LLVM_TARGET_TRIPLE} + -DLLVM_ENABLE_PROJECTS_USED=${LLVM_ENABLE_PROJECTS_USED} + -DLLVM_ENABLE_PER_TARGET_RUNTIME_DIR=${LLVM_ENABLE_PER_TARGET_RUNTIME_DIR} + -DLLVM_BUILD_TOOLS=${LLVM_BUILD_TOOLS} + -DCMAKE_C_COMPILER_WORKS=ON + -DCMAKE_CXX_COMPILER_WORKS=ON + -DCMAKE_Fortran_COMPILER_WORKS=ON + -DCMAKE_ASM_COMPILER_WORKS=ON + # libc config options: + -DLLVM_LIBC_FULL_BUILD=ON + -DLLVM_LIBC_ALL_HEADERS=1 + -DLIBC_CONFIG_PATH=${CMAKE_CURRENT_SOURCE_DIR}/lib/libc-config + # libcxx config options: + -DLIBCXX_HAS_EXTERNAL_THREAD_API=ON + TARGET_TRIPLE ${LLVM_TARGET_TRIPLE} + USE_TOOLCHAIN + PASSTHROUGH_PREFIXES LLVM_ENABLE_RUNTIMES + EXCLUDE_FROM_ALL + NO_INSTALL + ) +set(LLVM_ENABLE_RUNTIMES ${SYCL_JIT_LLVM_ENABLE_RUNTIMES_COPY}) +list(APPEND SYCL_JIT_RESOURCE_DEPS sycl-jit-extra-headers-configure) +list(APPEND SYCL_JIT_PREPARE_RESOURCE_COMMANDS + # libc + COMMAND ${CMAKE_COMMAND} --build ${CMAKE_BINARY_DIR}/tools/sycl-jit/jit-compiler/sycl-jit-extra-headers-bins --target generate-libc-headers + COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_BINARY_DIR}/tools/sycl-jit/jit-compiler/sycl-jit-extra-headers-bins/libc/include ${SYCL_JIT_RESOURCE_INSTALL_DIR}/include/libc + + # libcxx + COMMAND ${CMAKE_COMMAND} --build ${CMAKE_BINARY_DIR}/tools/sycl-jit/jit-compiler/sycl-jit-extra-headers-bins --target generate-cxx-headers + COMMAND ${CMAKE_COMMAND} --install ${CMAKE_BINARY_DIR}/tools/sycl-jit/jit-compiler/sycl-jit-extra-headers-bins --prefix ${SYCL_JIT_RESOURCE_INSTALL_DIR} --component cxx-headers +) + # This is very hacky and I don't quite know what I'm doing, but it's necessary # to have `resource.cpp` re-generated/re-built when some SYCL header changes. # diff --git a/sycl-jit/jit-compiler/lib/libc-config/entrypoints.txt b/sycl-jit/jit-compiler/lib/libc-config/entrypoints.txt new file mode 100644 index 0000000000000..05e1f4aa2df8c --- /dev/null +++ b/sycl-jit/jit-compiler/lib/libc-config/entrypoints.txt @@ -0,0 +1,5 @@ +if(EXISTS "${LIBC_SOURCE_DIR}/config/${LIBC_TARGET_OS}/${LIBC_TARGET_ARCHITECTURE}/entrypoints.txt") + include("${LIBC_SOURCE_DIR}/config/${LIBC_TARGET_OS}/${LIBC_TARGET_ARCHITECTURE}/entrypoints.txt") +else() + include("${LIBC_SOURCE_DIR}/config/${LIBC_TARGET_OS}/entrypoints.txt") +endif() diff --git a/sycl-jit/jit-compiler/lib/libc-config/headers.txt b/sycl-jit/jit-compiler/lib/libc-config/headers.txt new file mode 100644 index 0000000000000..72ae7d85517b9 --- /dev/null +++ b/sycl-jit/jit-compiler/lib/libc-config/headers.txt @@ -0,0 +1 @@ +include("${LIBC_SOURCE_DIR}/config/linux/x86_64/headers.txt") diff --git a/sycl-jit/jit-compiler/lib/resource-includes/include/sycl-rtc-standalone/__external_threading b/sycl-jit/jit-compiler/lib/resource-includes/include/sycl-rtc-standalone/__external_threading new file mode 100644 index 0000000000000..02b522468f138 --- /dev/null +++ b/sycl-jit/jit-compiler/lib/resource-includes/include/sycl-rtc-standalone/__external_threading @@ -0,0 +1,78 @@ +_LIBCPP_BEGIN_NAMESPACE_STD + +using __libcpp_timespec_t = int; + +// +// Mutex +// +using __libcpp_mutex_t = int; +#define _LIBCPP_MUTEX_INITIALIZER 0 + +using __libcpp_recursive_mutex_t = int; + +int __libcpp_recursive_mutex_init(__libcpp_recursive_mutex_t*); +_LIBCPP_NO_THREAD_SAFETY_ANALYSIS int __libcpp_recursive_mutex_lock(__libcpp_recursive_mutex_t*); +_LIBCPP_NO_THREAD_SAFETY_ANALYSIS bool __libcpp_recursive_mutex_trylock(__libcpp_recursive_mutex_t*); +_LIBCPP_NO_THREAD_SAFETY_ANALYSIS int __libcpp_recursive_mutex_unlock(__libcpp_recursive_mutex_t*); +int __libcpp_recursive_mutex_destroy(__libcpp_recursive_mutex_t*); + +_LIBCPP_NO_THREAD_SAFETY_ANALYSIS int __libcpp_mutex_lock(__libcpp_mutex_t*); +_LIBCPP_NO_THREAD_SAFETY_ANALYSIS bool __libcpp_mutex_trylock(__libcpp_mutex_t*); +_LIBCPP_NO_THREAD_SAFETY_ANALYSIS int __libcpp_mutex_unlock(__libcpp_mutex_t*); +int __libcpp_mutex_destroy(__libcpp_mutex_t*); + +// +// Condition Variable +// +using __libcpp_condvar_t = int; +#define _LIBCPP_CONDVAR_INITIALIZER 0 + +int __libcpp_condvar_signal(__libcpp_condvar_t*); +int __libcpp_condvar_broadcast(__libcpp_condvar_t*); +_LIBCPP_NO_THREAD_SAFETY_ANALYSIS int __libcpp_condvar_wait(__libcpp_condvar_t*, __libcpp_mutex_t*); +_LIBCPP_NO_THREAD_SAFETY_ANALYSIS +int __libcpp_condvar_timedwait(__libcpp_condvar_t*, __libcpp_mutex_t*, __libcpp_timespec_t*); +int __libcpp_condvar_destroy(__libcpp_condvar_t*); + +// +// Execute once +// +using __libcpp_exec_once_flag = int; +#define _LIBCPP_EXEC_ONCE_INITIALIZER 0 + +int __libcpp_execute_once(__libcpp_exec_once_flag*, void (*__init_routine)()); + +// +// Thread id +// +using __libcpp_thread_id = int; + +bool __libcpp_thread_id_equal(__libcpp_thread_id, __libcpp_thread_id); +bool __libcpp_thread_id_less(__libcpp_thread_id, __libcpp_thread_id); + +// +// Thread +// +#define _LIBCPP_NULL_THREAD 0 +using __libcpp_thread_t = int; + +bool __libcpp_thread_isnull(const __libcpp_thread_t*); +int __libcpp_thread_create(__libcpp_thread_t*, void* (*__func)(void*), void* __arg); +__libcpp_thread_id __libcpp_thread_get_current_id(); +__libcpp_thread_id __libcpp_thread_get_id(const __libcpp_thread_t*); +int __libcpp_thread_join(__libcpp_thread_t*); +int __libcpp_thread_detach(__libcpp_thread_t*); +void __libcpp_thread_yield(); +void __libcpp_thread_sleep_for(const chrono::nanoseconds&); + +// +// Thread local storage +// +#define _LIBCPP_TLS_DESTRUCTOR_CC 0 +using __libcpp_tls_key = int; + +int __libcpp_tls_create(__libcpp_tls_key*, void (*__at_exit)(void*)); +void* __libcpp_tls_get(__libcpp_tls_key); +int __libcpp_tls_set(__libcpp_tls_key, void*); + +_LIBCPP_END_NAMESPACE_STD diff --git a/sycl-jit/jit-compiler/lib/resource-includes/include/sycl-rtc-standalone/linux/errno.h b/sycl-jit/jit-compiler/lib/resource-includes/include/sycl-rtc-standalone/linux/errno.h new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp index 192e8e76a06cb..4d5c141f1b019 100644 --- a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp +++ b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp @@ -262,6 +262,103 @@ class SYCLToolchain { DAL.AddJoinedArg(nullptr, OptTable.getOption(OPT_offload_arch_EQ), CPU); } + // Reasons why the following is done here and not in the clang driver: + // + // 1) Unlike libcxx, upstream libc is installed directly into + // `/include` or `//include` together with + // other compiler headers meaning we can't magically turn it on or off + // (unless we introduce a dedicated VFS overlay just for libc). + // 2) Having multiple C libraries in include search paths is unsupported, + // so in order to use LLVM libc we have to remove default system + // includes. That in turn excludes (at the very least) CUDA/HIP SDKs, so + // we want that behavior to be optional. That, in turn, means that + // because of (1) we have to have non-standard libc install location (we + // chose `/include/libc`) and that has no support in the + // clang driver, so we have to add libc headers to system include + // directories manually. + // 3) However, libcxx headers search path must come *before* libc includes, + // but `-isystem` and similar options prepend the list of search paths. + // As such, we can't just have the driver do part of the job and then + // adjust the behavior via extra options, so we need to maintain + // everything on our own. + // 4) We could do everything via custom code in the clang driver, but the + // location of `include/libc` is controlled in this `sycl-jit` project + // and it was slightly more convenient to implement it here, at least + // for the downstream implementation. + // 5) Once we upstream SYCL support there will be a use-case to move libc + // headers installation to a separate directory (similar to libcxx), at + // that time we might have support for this in the clang driver + // directly and would be able to avoid doing that here. + + // Prefer using in-memory as that's friendlier for the end users of SYCL + // applications as that mode doesn't require any C/C++ toolchain to be + // installed on the system. + bool UseInMemoryCxxCHeaders = true; + + // Unless explicitly told not to: + if (UserArgList.hasArg(OPT_sycl_rtc_use_system_includes)) + UseInMemoryCxxCHeaders = false; + + // CUDA/HIP need SDK headers that we can't distribute ourselves, so we have + // to use system includes as well: + if (Format == BinaryFormat::PTX || Format == BinaryFormat::AMDGCN) + UseInMemoryCxxCHeaders = false; + + if (UseInMemoryCxxCHeaders) { + DAL.AddFlagArg(nullptr, OptTable.getOption(OPT_nostdlibinc)); + auto AddInc = [&](auto RelPath) { + DAL.AddJoinedArg(nullptr, OptTable.getOption(OPT_isystem), + (getPrefix() + RelPath).str()); + }; + // Must come before C/C++ headers as we're intercepting them in those + // wrappers: + AddInc("include/sycl/stl_wrappers"); + // Extra headers we provide as part of jit-compiler, e.g. + // `__external_threading` and `linux/errno.h` that are needed to make + // LLVM's libc/libcxx work. As far as I know, can be anywhere in the + // includes search path as those files aren't provide anywhere else. + AddInc("include/sycl-rtc-standalone/"); +#if !defined(_WIN32) + // On Windows `LIBCXX_GENERATED_INCLUDE_TARGET_DIR` is off and thus we + // don't need this. + AddInc("include/x86_64-unknown-linux-gnu/c++/v1"); +#endif + // libcxx headers, must come before libc headers: + AddInc("include/c++/v1"); + // libc headers, our (SYCL RTC) custom non-standard location: + AddInc("include/libc"); + // SYCL/SYCL-related headers actually, because `` and not + // just ``. Can be argued that actual installation layout should + // actually be `include/sycl/ur_api.h` and `include/sycl/sycl/sycl.hpp` + // but that's outside the SYCL RTC scope. I think any relative order in + // relation to libcxx/libc is allowed. + AddInc("include/"); + // NOTE: `include/lib/clang//include/` is added automatically (we + // use `--nostdlibinc` and not `--nostdinc`). + + DAL.AddJoinedArg(nullptr, OptTable.getOption(OPT_D), + "_LIBCPP_REMOVE_TRANSITIVE_INCLUDES"); +#if defined(_WIN32) + // LLVM's libc implements very limited number of entrypoints on WIN, + // almost to be unusable, so nobody actually cares about using libcxx over + // LLVM libc on that platform. We only use declaration and not definition + // so we force libc to generate more header/entrypoints but it's not + // working well by default. Options below were find by trial-and-error. + DAL.AddJoinedArg(nullptr, OptTable.getOption(OPT_D), + "_LIBCPP_WCHAR_H_HAS_CONST_OVERLOADS"); + DAL.AddJoinedArg(nullptr, OptTable.getOption(OPT_D), + "_LIBCPP_NO_VCRUNTIME"); + DAL.AddJoinedArg(nullptr, OptTable.getOption(OPT_U), "__ELF__"); + +#endif + // Similarly to Windows case above, libcxx over libc isn't fully + // supported upstream, even on Linux. Faced some errors (mostly around + // `_LIBCPP_USING_IF_EXISTS`) if the files below aren't included early: + DAL.AddJoinedArg(nullptr, OptTable.getOption(OPT_include), "stdio.h"); + DAL.AddJoinedArg(nullptr, OptTable.getOption(OPT_include), "wchar.h"); + DAL.AddJoinedArg(nullptr, OptTable.getOption(OPT_include), "time.h"); + } + ArgStringList ASL; for (Arg *A : DAL) A->render(DAL, ASL); @@ -543,9 +640,15 @@ class SYCLToolchain { std::vector CommandLine = createCommandLine(UserArgList, Format, SourceFilePath); - auto FS = llvm::makeIntrusiveRefCnt( - llvm::vfs::getRealFileSystem()); - FS->pushOverlay(getToolchainFS()); + llvm::IntrusiveRefCntPtr FS; + if (UserArgList.hasArg(OPT_sycl_rtc_in_memory_fs_only)) { + FS = llvm::makeIntrusiveRefCnt( + getToolchainFS()); + } else { + FS = llvm::makeIntrusiveRefCnt( + llvm::vfs::getRealFileSystem()); + FS->pushOverlay(getToolchainFS()); + } if (FSOverlay) FS->pushOverlay(std::move(FSOverlay)); diff --git a/sycl-jit/jit-compiler/utils/generate.py b/sycl-jit/jit-compiler/utils/generate.py index 449723b8fa268..941d95beb440f 100644 --- a/sycl-jit/jit-compiler/utils/generate.py +++ b/sycl-jit/jit-compiler/utils/generate.py @@ -32,11 +32,11 @@ def main(): const resource_file ToolchainFiles[] = {""" ) - def process_file(file_path): + def process_file(file_path, relative_to): out.write( f""" {{ - {{"{args.prefix}{os.path.relpath(file_path, toolchain_dir).replace(os.sep, "/")}"}} , + {{"{args.prefix}{os.path.relpath(file_path, relative_to).replace(os.sep, "/")}"}} , []() {{ static const char data[] = {{ #embed "{file_path}" if_empty(0) @@ -50,9 +50,17 @@ def process_dir(dir): for root, _, files in os.walk(dir): for file in files: file_path = os.path.join(root, file) - process_file(file_path) + process_file(file_path, dir) process_dir(args.toolchain_dir) + process_dir( + os.path.realpath( + os.path.join( + os.path.dirname(os.path.realpath(__file__)), + "../lib/resource-includes/", + ) + ) + ) out.write( f""" diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc index 261fc96f0676f..f822d302bdb92 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc @@ -11,6 +11,7 @@ :lang: en :dpcpp: pass:[DPC++] :cpp: pass:[C++] +:libcxx: pass:[libc++] :endnote: —{nbsp}end{nbsp}note // Set the default source code type in this document to C++, @@ -1046,7 +1047,6 @@ sycl::kernel k_float = kb.ext_oneapi_get_kernel("bartmpl"); sycl::kernel k_int = kb.ext_oneapi_get_kernel("bartmpl"); ---- - == Examples === Simple example @@ -1199,6 +1199,44 @@ this extension. == Non-normative implementation notes for {dpcpp} +=== C/{cpp} header files and limitations when the language is sycl + +By default, the SYCL runtime compiler uses a self-contained set of C and {cpp} +header files when compiling kernels in the `sycl` language. This means that SYCL +applications using this feature can be run even on a system that does not have +these headers installed. However, the self contained header files may not be the +same as the C and {cpp} header files that were used to build the host part of +the SYCL application. As a result, there are additional limitations around data +that is shared between the host part of the application and the kernel. These +limitations apply to arguments that are passed to the kernel and also to data +shared through USM or through accessors. Additionally, other header files might +be installed in the same location as system C headers (e.g., `/usr/include/`). +Those will not be available as well. + +Types that are defined by the compiler (e.g. fundamental types like `int` and +`float`) are guaranteed to have the same representation and alignment +requirements in both the host compiler and in the compiler used to compile the +kernel. Therefore data using these types can be safely shared. However, types +defined by the C or {cpp} library (e.g. types in the `std` namespace) are not +guaranteed to be the same, so data defined using these types cannot be safely +shared. There are a few specific exceptions to this limitation. The following C +/ {cpp} types are guaranteed to have the same representation and alignment +requirements, so data defined as these types can be safely shared: + +* The following fixed-width integer types: `int8_t`, `int16_t`, `int32_t`, +`int64_t`, `uint8_t`, `uint16_t`, `uint32_t`, `uint64_t`. +* The types `size_t` and `ptrdiff_t`. + +Array and class types defined by your application are safe to share so long as +the element types are safe. Pointer types are safe to share so long as the +pointed-at type is safe. Enumeration types defined by your application are also +safe. + +It is also possible to use the system C and C++ headers instead of the +self-contained versions. See the description of the +`--sycl-rtc-use-system-includes` option for more details. + + === Supported `build_options` when the language is `sycl` The SYCL runtime compiler supports the following {dpcpp} options to be passed in @@ -1351,6 +1389,12 @@ Some notes about the current behavior: using the conflicting hash would proceed without pre-compiled preamble support as if this option wasn't enabled. +==== `--sycl-rtc-use-system-includes` + +Force usage of system C/C++ headers instead of the self-containted versions. +Option has no effect if the target defaults to using system headers by +default. + === Known issues and limitations when the language is `sycl` ==== Changing the compiler action or output diff --git a/sycl/include/sycl/builtins.hpp b/sycl/include/sycl/builtins.hpp index 0aa48c6992525..a8b6597b3be44 100644 --- a/sycl/include/sycl/builtins.hpp +++ b/sycl/include/sycl/builtins.hpp @@ -27,8 +27,13 @@ extern __DPCPP_SYCL_EXTERNAL_LIBC char *strncpy(char *dest, const char *src, extern __DPCPP_SYCL_EXTERNAL_LIBC int strcmp(const char *s1, const char *s2); extern __DPCPP_SYCL_EXTERNAL_LIBC int strncmp(const char *s1, const char *s2, size_t n); +#ifdef __LLVM_LIBC__ +extern __DPCPP_SYCL_EXTERNAL_LIBC int rand() noexcept; +extern __DPCPP_SYCL_EXTERNAL_LIBC void srand(unsigned int seed) noexcept; +#else extern __DPCPP_SYCL_EXTERNAL_LIBC int rand(); extern __DPCPP_SYCL_EXTERNAL_LIBC void srand(unsigned int seed); +#endif extern __DPCPP_SYCL_EXTERNAL long long int __imf_llmax(long long int x, long long int y); extern __DPCPP_SYCL_EXTERNAL long long int __imf_llmin(long long int x, diff --git a/sycl/include/sycl/detail/os_util.hpp b/sycl/include/sycl/detail/os_util.hpp index 5fa8e02f1bb7d..31c109c394e4e 100644 --- a/sycl/include/sycl/detail/os_util.hpp +++ b/sycl/include/sycl/detail/os_util.hpp @@ -70,7 +70,10 @@ class __SYCL_EXPORT OSUtil { /// Checks if specified path is present. static bool isPathPresent(const std::string &Path) { -#ifdef __SYCL_RT_OS_WINDOWS +#ifdef __SYCL_DEVICE_ONLY__ + (void)Path; + return false; +#elif defined(__SYCL_RT_OS_WINDOWS) struct _stat Stat; return !_stat(Path.c_str(), &Stat); #else diff --git a/sycl/test-e2e/KernelCompiler/in_memory_only.cpp b/sycl/test-e2e/KernelCompiler/in_memory_only.cpp new file mode 100644 index 0000000000000..601b1072cffce --- /dev/null +++ b/sycl/test-e2e/KernelCompiler/in_memory_only.cpp @@ -0,0 +1,74 @@ +// RUN: %{build} -o %t.out + +// Make sure that debug/test-only option `--sycl-rtc-in-memory-fs-only` works +// RUN: %{run} not %t.out --sycl-rtc-in-memory-fs-only --sycl-rtc-use-system-includes | FileCheck %s --check-prefix CHECK-ERROR +// CHECK-ERROR-LABEL: Device compilation failed +// CHECK-ERROR-NEXT: Detailed information: +// CHECK-ERROR: In file included from rtc_0.cpp:2: +// CHECK-ERROR-NEXT: In file included from {{.*}}/sycl-jit-toolchain//bin/../include/sycl/sycl.hpp:38: +// CHECK-ERROR-NEXT: In file included from {{.*}}/sycl-jit-toolchain//bin/../include/sycl/detail/core.hpp:21: +// CHECK-ERROR-NEXT: In file included from {{.*}}/sycl-jit-toolchain//bin/../include/sycl/accessor.hpp:11: +// CHECK-ERROR-NEXT: {{.*}}/sycl-jit-toolchain//bin/../include/sycl/access/access.hpp:14:10: fatal error: 'type_traits' file not found +// CHECK-ERROR-NEXT: 14 | #include +// CHECK-ERROR-NEXT: | ^~~~~~~~~~~~~ + +// Extra check that our in-memory libcxx/libc headers can really work on a +// system with no C/C++ toolchain: +// RUN: %{run} %t.out --sycl-rtc-in-memory-fs-only + +// CUDA/HIP have SDK dependencies but exclude system includes so those aren't +// satisfied. +// REQUIRES: target-spir + +#include +#include +#include + +namespace syclexp = sycl::ext::oneapi::experimental; + +int main(int argc, char *argv[]) { + sycl::queue q; + std::string source = R"""( + #include + namespace syclext = sycl::ext::oneapi; + namespace syclexp = sycl::ext::oneapi::experimental; + + extern "C" + SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::single_task_kernel)) + void foo(int *p) { + *p = 42; + } +)"""; + std::vector opts; + + // Without this we see stack overflows on Win, but for some reason only in + // `--sycl-rtc-in-memory-fs-only` mode when it should really be failing + // earlier. + opts.push_back("-fconstexpr-depth=128"); + + for (int i = 1; i < argc; ++i) + opts.emplace_back(argv[i]); + try { + + auto kb_src = syclexp::create_kernel_bundle_from_source( + q.get_context(), syclexp::source_language::sycl, source); + auto kb_exe = syclexp::build( + kb_src, syclexp::properties{syclexp::build_options{opts}}); + sycl::kernel krn = kb_exe.ext_oneapi_get_kernel("foo"); + auto *p = sycl::malloc_shared(1, q); + q.submit([&](sycl::handler &cgh) { + cgh.set_args(p); + cgh.single_task(krn); + }).wait(); + std::cout << "Result: " << *p << std::endl; + assert(*p == 42); + sycl::free(p, q); + } catch (const sycl::exception &e) { + // Make `CHECK` lines more portable between Lin/Win: + std::string s = e.what(); + std::replace(s.begin(), s.end(), '\\', '/'); + + std::cout << s; + return 1; + } +} diff --git a/sycl/test-e2e/KernelCompiler/sycl_host_device_data_layout.cpp b/sycl/test-e2e/KernelCompiler/sycl_host_device_data_layout.cpp new file mode 100644 index 0000000000000..b1a62f2e1ca3b --- /dev/null +++ b/sycl/test-e2e/KernelCompiler/sycl_host_device_data_layout.cpp @@ -0,0 +1,211 @@ +// RUN: %{build} -o %t.out +// RUN: %if hip %{ env SYCL_JIT_AMDGCN_PTX_TARGET_CPU=%{amd_arch} %} %{run} %t.out + +// UNSUPPORTED: target-native_cpu +// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/20142 + +#include +#include +#include + +#include +#include +#include +#include +#include + +#define STRINGIFY(x) #x +#define EXPAND_AND_STRINGIFY(x) STRINGIFY(x) + +// Needs to be duplicated between host/device. @{ + +// Comma would make preprocessor macro trickier. +using mint3 = sycl::marray; + +enum E { + V0 = 0x12345689, +}; +static_assert(sizeof(E) == 4); +enum class ScopedE { + ScopedV0 = 0x12345689, +}; +static_assert(sizeof(ScopedE) == 4); + +// }@ + +namespace syclexp = sycl::ext::oneapi::experimental; +int main() { + sycl::queue q; + std::string src = R"""( +#include +#include + +#include +#include +#include +#include +#include + +namespace syclext = sycl::ext::oneapi; +namespace syclexp = sycl::ext::oneapi::experimental; + +using mint3 = sycl::marray; + +enum E { + V0 = 0x12345689, +}; +static_assert(sizeof(E) == 4); +enum class ScopedE { + ScopedV0 = 0x12345689, +}; +static_assert(sizeof(ScopedE) == 4); + +extern "C" +SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::single_task_kernel)) +void foo(TYPE *in, TYPE *out, size_t *align_out, size_t *size_out, bool *equal_out) { + *out = TYPE{INIT}; + *align_out = alignof(TYPE); + *size_out = sizeof(TYPE); + auto Equal = [](const auto &lhs, const auto &rhs) { + using T = std::decay_t; + if constexpr (sycl::detail::is_vec_v || sycl::detail::is_marray_v) { + if (lhs.size() != rhs.size()) + return false; + + for (size_t i = 0; i < lhs.size(); ++i) + if (lhs[i] != rhs[i]) + return false; + + return true; + } else { + return lhs == rhs; + } + }; + *equal_out = Equal(*in, *out); +} +)"""; + auto kb_src = syclexp::create_kernel_bundle_from_source( + q.get_context(), syclexp::source_language::sycl, src); + + auto *align = sycl::malloc_shared(1, q); + auto *size = sycl::malloc_shared(1, q); + auto *equal = sycl::malloc_shared(1, q); + + auto Test = [&](auto val, auto type_str, auto init_str) { + using namespace std::literals::string_literals; + + using type = decltype(val); + auto kb_exe = syclexp::build( + kb_src, + syclexp::properties{syclexp::build_options{std::vector{ + "-DTYPE="s + type_str, "-DINIT="s + init_str}}}); + + sycl::kernel krn = kb_exe.ext_oneapi_get_kernel("foo"); + auto *host = sycl::malloc_shared(1, q); + *host = val; + auto *device = sycl::malloc_shared(1, q); + + q.submit([&](sycl::handler &cgh) { + cgh.set_args(host, device, align, size, equal); + cgh.single_task(krn); + }).wait(); + auto Equal = [](const auto &lhs, const auto &rhs) { + using T = std::decay_t; + if constexpr (sycl::detail::is_vec_v || sycl::detail::is_marray_v) { + if (lhs.size() != rhs.size()) + return false; + + for (size_t i = 0; i < lhs.size(); ++i) + if (lhs[i] != rhs[i]) + return false; + + return true; + } else { + return lhs == rhs; + } + }; + assert(Equal(*host, *device)); + assert(*align == alignof(type)); + assert(*size == sizeof(type)); + assert(*equal == true); + sycl::free(host, q); + sycl::free(device, q); + }; + +#define TEST(TYPE, INIT) \ + Test(TYPE{INIT}, EXPAND_AND_STRINGIFY(TYPE), EXPAND_AND_STRINGIFY(INIT)); +#define TEST2(TYPE, INIT0, INIT1) \ + Test(TYPE{INIT0, INIT1}, EXPAND_AND_STRINGIFY(TYPE), \ + EXPAND_AND_STRINGIFY(INIT0) ", " EXPAND_AND_STRINGIFY(INIT1)); +#define TEST3(TYPE, INIT0, INIT1, INIT2) \ + Test(TYPE{INIT0, INIT1, INIT2}, EXPAND_AND_STRINGIFY(TYPE), \ + EXPAND_AND_STRINGIFY(INIT0) ", " EXPAND_AND_STRINGIFY( \ + INIT1) ", " EXPAND_AND_STRINGIFY(INIT2)); + + TEST(size_t, 0x1122334455667788) + TEST(char, 0x12) + + TEST(int8_t, 0x12) + TEST(int8_t, -0x12) + TEST(uint8_t, 0x12) + + TEST(int16_t, 0x1234) + TEST(int16_t, -0x1234) + TEST(uint16_t, 0x1234) + + TEST(int32_t, 0x12345678) + TEST(int32_t, -0x12345678) + TEST(uint32_t, 0x12345678) + + TEST(int64_t, 0x1122334455667788) + TEST(int64_t, -0x1122334455667788) + TEST(uint64_t, 0x1122334455667788) + + TEST(size_t, 0x1122334455667788) + TEST(ptrdiff_t, 0x1122334455667788) + + TEST(float, 42.0f) + if (q.get_device().has(sycl::aspect::fp64)) { + TEST(double, 42.0) + } + + TEST(sycl::half, 42.0f) + TEST(sycl::ext::oneapi::bfloat16, 42.0f) + + TEST(sycl::range<1>, 0x1122334455667788) + TEST2(sycl::range<2>, 0x1122334455667788, 0x1223344556677889) + TEST3(sycl::range<3>, 0x1122334455667788, 0x1223344556677889, + 0x132435465768798A) + + TEST(sycl::id<1>, 0x1122334455667788) + TEST2(sycl::id<2>, 0x1122334455667788, 0x1223344556677889) + TEST3(sycl::id<3>, 0x1122334455667788, 0x1223344556677889, 0x132435465768798A) + + // Making these work with macros would be too much work: + Test(sycl::nd_range<1>{{0x1122334455667788}, {0x1223344556677889}}, + "sycl::nd_range<1>", "{0x1122334455667788}, {0x1223344556677889}"); + Test(sycl::nd_range<2>{{0x1122334455667788, 0x2132435465768798}, + {0x1223344556677889, 0x2233445586778899}}, + "sycl::nd_range<2>", + "{0x1122334455667788, 0x2132435465768798}, {0x1223344556677889, " + "0x2233445586778899}"); + Test( + sycl::nd_range<3>{ + {0x1122334455667788, 0x2132435465768798, 0x31525364758697A8}, + {0x1223344556677889, 0x2233445586778899, 0x32435465768798A9}}, + "sycl::nd_range<3>", + "{0x1122334455667788, 0x2132435465768798, 0x31525364758697A8}, " + "{0x1223344556677889, 0x2233445586778899, 0x32435465768798A9}"); + + TEST2(sycl::short2, 0x1234, 0x2345) + TEST3(sycl::short3, 0x1234, 0x2345, 0x3456) + + TEST3(mint3, 0x1234, 0x2345, 0x3456) + + TEST(E, V0) + TEST(ScopedE, ScopedE::ScopedV0) + + sycl::free(align, q); + sycl::free(size, q); + sycl::free(equal, q); +} diff --git a/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp b/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp index 7f3757fc70624..6d41cff6dc4cd 100644 --- a/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp +++ b/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp @@ -6,7 +6,7 @@ // CHECK-DAG: README.md // CHECK-DAG: lit.cfg.py // -// CHECK-NUM-MATCHES: 29 +// CHECK-NUM-MATCHES: 30 // // This test verifies that `` isn't used in E2E tests. Instead, // fine-grained includes should used, see