From 0416bb5e67f1cc12b4da5e9b152c70b8c89a8cbc Mon Sep 17 00:00:00 2001 From: CrazyDragon Date: Fri, 30 May 2025 17:13:16 +0300 Subject: [PATCH] Refactor CMake configuration, enhance cross-platform support, and improve file handling --- .gitignore | 3 ++ CMakeLists.txt | 4 +-- genbundle.cmake | 60 ++++++++++++++++++++++++++++++++++++++++ src/CMakeLists.txt | 68 +++++++++++++++++++++++++++------------------- src/FFTConfig.cpp | 15 ++++++---- src/File.cpp | 2 +- src/File.h | 48 +++++++++++++++++++------------- src/Gpu.h | 3 +- src/common.h | 6 ++++ src/main.cpp | 8 +++++- src/tune.cpp | 6 ++-- 11 files changed, 161 insertions(+), 62 deletions(-) create mode 100644 genbundle.cmake diff --git a/.gitignore b/.gitignore index 782a62ef..5913754e 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,6 @@ txt/ bug/ prpll-debug prpll-release +build/ +.vscode/ +.vs/ \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 1dd391b0..5d9611f8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.5) +cmake_minimum_required(VERSION 3.16) project(gpuowl LANGUAGES CXX) @@ -6,4 +6,4 @@ set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) -add_subdirectory(src) +add_subdirectory(src) \ No newline at end of file diff --git a/genbundle.cmake b/genbundle.cmake new file mode 100644 index 00000000..b22d02eb --- /dev/null +++ b/genbundle.cmake @@ -0,0 +1,60 @@ +# genbundle.sh for crossplatform cmake +separate_arguments(CL_SOURCES) +message("CL_SOURCES = ${CL_SOURCES}") + +file(WRITE "${OUTPUT}" "// bundle.cpp generated with genbundle.cmake\n") +file(APPEND "${OUTPUT}" "// Required CMAKE 3.16+ (for windows MSVC 17.0+)\n") +file(APPEND "${OUTPUT}" "// Works with Windows 10/11 | Linux/UNIX\n") +file(APPEND "${OUTPUT}" "// by CrazyDragon(GoodDrakon4ik)\n") +file(APPEND "${OUTPUT}" "// Copyright (C) Mihai Preda\n") +file(APPEND "${OUTPUT}" "// Generated file, do not edit. See genbundle.cmake and src/cl/*.cl\n\n") +file(APPEND "${OUTPUT}" "#include \n") +file(APPEND "${OUTPUT}" "static const std::vector CL_FILES{\n") + +set(names "") +set(file_count 0) +foreach(cl_file IN LISTS CL_SOURCES) + get_filename_component(fname "${cl_file}" NAME) + file(READ "${cl_file}" cl_content) + + set(chunk_size 15000) + string(LENGTH "${cl_content}" len) + math(EXPR num_chunks "${len} / ${chunk_size} + 1") + message(STATUS "File: ${cl_file} | Lenght: ${len} | Needed chunks = ${num_chunks}") + + file(APPEND "${OUTPUT}" "// ${cl_file}\n") + + set(pos 0) + foreach(i RANGE 0 ${num_chunks}) + math(EXPR start_pos "${i} * ${chunk_size}") + math(EXPR remaning "${len} - ${start_pos}") + if (remaning LESS 1) + break() + endif() + if (remaning GREATER ${chunk_size}) + math(EXPR this_size "${chunk_size}") + else() + set(this_size "${remaning}") + endif() + string(SUBSTRING "${cl_content}" ${start_pos} ${this_size} chunk) + if (i EQUAL 0) + file(APPEND "${OUTPUT}" "R\"cltag(${chunk})cltag\"") + else() + file(APPEND "${OUTPUT}" "\n R\"cltag(${chunk})cltag\"") + endif() + message(STATUS "Chunk: ${i} - generated.") + endforeach() + math(EXPR file_count "${file_count} + 1") + list(LENGTH CL_SOURCES total_files) + if (NOT file_count EQUAL total_files) + file(APPEND "${OUTPUT}" ",\n") + else() + file(APPEND "${OUTPUT}" "\n") + endif() + set(names "${names}\"${fname}\",") +endforeach() + +file(APPEND "${OUTPUT}" "};\n\n") +file(APPEND "${OUTPUT}" "static const std::vector CL_FILE_NAMES{${names}};\n\n") +file(APPEND "${OUTPUT}" "const std::vector& getClFileNames() { return CL_FILE_NAMES; }\n") +file(APPEND "${OUTPUT}" "const std::vector& getClFiles() { return CL_FILES; }\n") diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c1cdc2bc..fe663452 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,8 +1,41 @@ +if (CMAKE_SYSTEM_NAME STREQUAL "Darwin") + set(CMAKE_CXX_COMPILE g++-14) +endif() + +#Replace add_custom_command version.inc +execute_process( + COMMAND git describe --tags --long --dirty --always --match v/prpll/* + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + OUTPUT_VARIABLE GIT_VERSION + OUTPUT_STRIP_TRAILING_WHITESPACE + ERROR_QUIET +) + +if (NOT GIT_VERSION) + set(GIT_VERSION "unknown") + message(WARNING "Cant take version, using default value") +endif() + +set(GIT_VERSION_QUOTED "\"${GIT_VERSION}\"") + +file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/version.inc" "${GIT_VERSION_QUOTED}\n") + +file(GLOB CL_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/cl/*.cl") + +set(bundle_cpp "${CMAKE_CURRENT_BINARY_DIR}/bundle.cpp") + +add_custom_command( + OUTPUT ${bundle_cpp} + COMMAND ${CMAKE_COMMAND} -DCL_SOURCES="${CL_SOURCES}" -DOUTPUT=${bundle_cpp} -P ${CMAKE_SOURCE_DIR}/genbundle.cmake + DEPENDS ${CL_SOURCES} + COMMENT "Generation bundle.cpp" +) + +add_library(bundle STATIC ${bundle_cpp}) add_executable(prpll Trig.cpp Primes.cpp - bundle.cpp Proof.cpp log.cpp md5.cpp sha3.cpp AllocTrac.cpp FFTConfig.cpp Worktodo.cpp common.cpp main.cpp Gpu.cpp clwrap.cpp Task.cpp timeutil.cpp Args.cpp state.cpp Signal.cpp File.cpp @@ -21,39 +54,18 @@ add_executable(prpll TuneEntry.cpp fs.cpp version.inc - ) +) -if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin") - # Real GCC (not clang), needed for 128-bit floats and std::filesystem::path - set(CMAKE_CXX_COMPILER g++-14) -endif() +target_link_libraries(prpll PRIVATE bundle) find_package(OpenCL) -if (${OpenCL_FOUND}) +if (OpenCL_FOUND) # Use OpenCL library found by cmake - target_link_libraries(prpll OpenCL::OpenCL) + target_link_libraries(prpll PRIVATE OpenCL::OpenCL) else() # Pass -lOpenCL to the linker and hope for the best - target_link_libraries(prpll OpenCL) + target_link_libraries(prpll PRIVATE OpenCL) endif() -target_include_directories(prpll PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) - -add_custom_command( - OUTPUT version.inc - WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} - COMMAND git describe --tags --long --dirty --always --match ${CMAKE_SOURCE_DIR} | sed 's/.*/"&"/' > ${CMAKE_CURRENT_BINARY_DIR}/version.inc - DEPENDS ${CMAKE_SOURCE_DIR} - ) - -file( - GLOB CL_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/cl/*.cl - ) - -add_custom_command( - OUTPUT bundle.cpp - WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} - COMMAND ${CMAKE_SOURCE_DIR}/genbundle.sh ${CL_SOURCES} > ${CMAKE_CURRENT_BINARY_DIR}/bundle.cpp - DEPENDS ${CL_SOURCES} - ) +target_include_directories(prpll PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) \ No newline at end of file diff --git a/src/FFTConfig.cpp b/src/FFTConfig.cpp index acb595e0..d5236da9 100644 --- a/src/FFTConfig.cpp +++ b/src/FFTConfig.cpp @@ -151,15 +151,15 @@ FFTConfig::FFTConfig(const string& spec) { // assert(v.size() == 1 || v.size() == 3 || v.size() == 4 || v.size() == 5); if (v.size() == 1) { - *this = {FFTShape::multiSpec(spec).front(), 3, CARRY_AUTO}; + *this = {FFTShape::multiSpec(spec).front(), 3, static_cast(CARRY_AUTO)}; } if (v.size() == 3) { - *this = {FFTShape{v[0], v[1], v[2]}, 3, CARRY_AUTO}; + *this = {FFTShape{v[0], v[1], v[2]}, 3, static_cast(CARRY_AUTO)}; } else if (v.size() == 4) { - *this = {FFTShape{v[0], v[1], v[2]}, parseInt(v[3]), CARRY_AUTO}; + *this = {FFTShape{v[0], v[1], v[2]}, parseInt(v[3]), static_cast(CARRY_AUTO)}; } else if (v.size() == 5) { int c = parseInt(v[4]); assert(c == 0 || c == 1); - *this = {FFTShape{v[0], v[1], v[2]}, parseInt(v[3]), c == 0 ? CARRY_32 : CARRY_64}; + *this = {FFTShape{v[0], v[1], v[2]}, parseInt(v[3]), static_cast(c == 0 ? CARRY_32 : CARRY_64)}; } else { throw "FFT spec"; } @@ -175,7 +175,7 @@ FFTConfig::FFTConfig(FFTShape shape, u32 variant, u32 carry) : string FFTConfig::spec() const { string s = shape.spec() + ":" + to_string(variant); - return carry == CARRY_AUTO ? s : (s + (carry == CARRY_32 ? ":0" : ":1")); + return carry == static_cast(CARRY_AUTO) ? s : (s + (carry == CARRY_32 ? ":0" : ":1")); } double FFTConfig::maxBpw() const { @@ -205,7 +205,10 @@ FFTConfig FFTConfig::bestFit(const Args& args, u32 E, const string& spec) { // Take the first FFT that can handle E for (const FFTShape& shape : FFTShape::allShapes()) { for (u32 v = 0; v < 4; ++v) { - if (FFTConfig fft{shape, v, CARRY_AUTO}; fft.maxExp() * args.fftOverdrive >= E) { return fft; } + if (FFTConfig fft{shape, v, static_cast(CARRY_AUTO)}; fft.maxExp() * args.fftOverdrive >= E) + { + return fft; + } } } diff --git a/src/File.cpp b/src/File.cpp index 3aaad2e0..2d54bda2 100644 --- a/src/File.cpp +++ b/src/File.cpp @@ -6,7 +6,7 @@ using namespace std; -File::File(const std::filesystem::__cxx11::path& path, const string& mode, bool throwOnError) +File::File(const std::filesystem::path& path, const string& mode, bool throwOnError) : readOnly{mode == "rb"}, name{path.string()} { assert(readOnly || throwOnError); diff --git a/src/File.h b/src/File.h index 6049246d..121cb0d7 100644 --- a/src/File.h +++ b/src/File.h @@ -8,26 +8,36 @@ #include #include #include -#include +//! Support for CrossPlatform +#if defined(_WIN32) || defined(__WIN32__) +#include //? windows +#elif defined(__APPLE__) +#include //? apple +#else +#include //? linux/unix +#endif +//! End #include #include #include #include -#if defined(_WIN32) || defined(__WIN32__) -#include -#endif - -#if defined(__APPLE__) -#include -#endif - #if defined(_DEFAULT_SOURCE) || defined(_BSD_SOURCE) #define HAS_SETLINEBUF 1 #else #define HAS_SETLINEBUF 0 #endif +//! Macros for __attribute__ compiller/scrossplatform support +#if defined(__GNUC__) || defined(__clang__) +#define FORMAT_PRINTF(fmt_idx, arg_idx) __attribute__((format(printf, fmt_idx, arg_idx))) +#define FORMAT_SCANF(fmt_idx, arg_idx) __attribute__((format(scanf, fmt_idx, arg_idx))) +#else +#define FORMAT_PRINTF(fmt_idx, arg_idx) +#define FORMAT_SCANF(fmt_idx, arg_idx) +#endif +//! End + namespace fs = std::filesystem; struct CRCError { @@ -48,7 +58,7 @@ class File { File(const fs::path &path, const string& mode, bool throwOnError); - bool readNoThrow(void* data, u32 nBytes) const { return fread(data, nBytes, 1, get()); } + bool readNoThrow(void *data, u32 nBytes) const { return fread(data, nBytes, 1, this->get()); } void read(void* data, u32 nBytes) const { if (!readNoThrow(data, nBytes)) { throw ReadError{name}; } @@ -121,19 +131,19 @@ class File { template void write(const T& x) const { write(&x, sizeof(T)); } - void write(const void* data, u32 nBytes) const { - if (!fwrite(data, nBytes, 1, get())) { throw WriteError{name}; } + void write(const void *data, u32 nBytes) const { + if (!fwrite(data, nBytes, 1, this->get())) { throw WriteError{name}; } } void seek(long offset, int whence = SEEK_SET) { - int ret = fseek(get(), offset, whence); + int ret = fseek(this->get(), offset, whence); if (ret) { throw ReadError{name}; } // throw(std::ios_base::failure(("fseek: "s + to_string(ret)).c_str())); } - void flush() { fflush(get()); } + void flush() { fflush(this->get()); } - int printf(const char *fmt, ...) const __attribute__((format(printf, 2, 3))) { + int printf(const char *fmt, ...) const FORMAT_PRINTF(2, 3) { va_list va; va_start(va, fmt); int ret = vfprintf(f, fmt, va); @@ -146,7 +156,7 @@ class File { return ret; } - int scanf(const char *fmt, ...) __attribute__((format(scanf, 2, 3))) { + int scanf(const char *fmt, ...) FORMAT_SCANF(2, 3) { va_list va; va_start(va, fmt); int ret = vfscanf(f, fmt, va); @@ -162,7 +172,7 @@ class File { FILE* get() const { return f; } long ftell() const { - long pos = ::ftell(get()); + long pos = ::ftell(this->get()); assert(pos >= 0); return pos; } @@ -185,7 +195,7 @@ class File { std::string readLine() { char buf[1024]; buf[0] = 0; - bool ok = fgets(buf, sizeof(buf), get()); + bool ok = fgets(buf, sizeof(buf), this->get()); if (!ok) { return ""; } // EOF or error string line = buf; if (line.empty() || line.back() != '\n') { @@ -239,7 +249,7 @@ class File { return data; } - u32 readUpTo(void* data, u32 nUpToBytes) { return fread(data, 1, nUpToBytes, get()); } + u32 readUpTo(void *data, u32 nUpToBytes) { return fread(data, 1, nUpToBytes, this->get()); } string readAll() { size_t sz = size(); diff --git a/src/Gpu.h b/src/Gpu.h index d232157b..55b11d82 100644 --- a/src/Gpu.h +++ b/src/Gpu.h @@ -21,7 +21,7 @@ #include struct PRPResult; -struct Task; +class Task; class Signal; class ProofSet; @@ -299,7 +299,6 @@ class Gpu { vector readCheck(); vector readData(); - u32 getFFTSize() { return N; } // return A^h * B diff --git a/src/common.h b/src/common.h index fbf5deda..fea2d2ec 100644 --- a/src/common.h +++ b/src/common.h @@ -11,7 +11,13 @@ using i32 = int32_t; using u32 = uint32_t; using i64 = int64_t; using u64 = uint64_t; +//! Support for CrossPlatform +#if defined(_WIN32) || defined(__WIN32__) +// using f128 = long double; #add only if need to be used for compilation. __float128 didnt support by MSVC +#else using f128 = __float128; +#endif +//! End static_assert(sizeof(u8) == 1, "size u8"); static_assert(sizeof(u32) == 4, "size u32"); diff --git a/src/main.cpp b/src/main.cpp index e1b00588..227aa628 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -18,6 +18,7 @@ #include #include +#include // #include from GCC-13 onwards namespace fs = std::filesystem; @@ -41,15 +42,20 @@ void gpuWorker(GpuCommon shared, Queue *q, i32 instance) { } } - #ifdef __MINGW32__ // for Windows extern int putenv(const char *); #endif int main(int argc, char **argv) { +//!MSVC version support +#ifdef _MSC_VER + _set_printf_count_output(1); +#endif #ifdef __MINGW32__ putenv("ROC_SIGNAL_POOL_SIZE=32"); +#elif defined(_WIN32) + _putenv_s("ROC_SIGNAL_POOL_SIZE", "32"); #else // Required to work around a ROCm bug when using multiple queues setenv("ROC_SIGNAL_POOL_SIZE", "32", 0); diff --git a/src/tune.cpp b/src/tune.cpp index 8eb1655f..eaac01fc 100644 --- a/src/tune.cpp +++ b/src/tune.cpp @@ -162,7 +162,7 @@ void Tune::ztune() { double bpw[4]; double A[4]; for (u32 variant = 0; variant < FFTConfig::N_VARIANT; ++variant) { - FFTConfig fft{shape, variant, CARRY_AUTO}; + FFTConfig fft{shape, variant, static_cast(CARRY_AUTO)}; std::tie(bpw[variant], A[variant]) = maxBpw(fft); } string s = "\""s + shape.spec() + "\""; @@ -178,7 +178,7 @@ void Tune::carryTune() { shared.args->flags["STATS"] = "1"; u32 prevSize = 0; for (FFTShape shape : FFTShape::multiSpec(shared.args->fftSpec)) { - FFTConfig fft{shape, 3, CARRY_AUTO}; + FFTConfig fft{shape, 3, static_cast(CARRY_AUTO)}; if (prevSize == fft.size()) { continue; } prevSize = fft.size(); @@ -283,7 +283,7 @@ void Tune::tune() { } for (auto carry : carryToTest) { - FFTConfig fft{shape, variant, carry}; + FFTConfig fft{shape, variant, static_cast(carry)}; if (minCost > 0 && !TuneEntry{minCost, fft}.willUpdate(results)) { // log("skipped %s %9u\n", fft.spec().c_str(), fft.maxExp());