-
Notifications
You must be signed in to change notification settings - Fork 0
Add runtime memory monitoring functions #2
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
31296c8
8fed5f7
6a8577a
443dbcc
5ce64a9
7678738
45f0022
ad51978
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,57 @@ | ||
| # set CMAKE_BUILD_TYPE if not defined | ||
| if(NOT CMAKE_BUILD_TYPE) | ||
| set(default_build_type "RelWithDebInfo") | ||
| message(STATUS "Setting build type to '${default_build_type}' as none was specified.") | ||
| set( | ||
| CMAKE_BUILD_TYPE | ||
| "${default_build_type}" | ||
| CACHE | ||
| STRING | ||
| "Choose the type of build, options are: Debug, Release, RelWithDebInfo and MinSizeRel." | ||
| FORCE | ||
| ) | ||
| endif() | ||
|
|
||
| # find Kokkos as an already existing target | ||
| if(TARGET Kokkos::kokkos) | ||
| return() | ||
| endif() | ||
|
|
||
| # find Kokkos as installed | ||
| find_package(Kokkos CONFIG) | ||
| if(Kokkos_FOUND) | ||
| message(STATUS "Kokkos provided as installed: ${Kokkos_DIR} (version \"${Kokkos_VERSION}\")") | ||
|
|
||
| return() | ||
| endif() | ||
|
|
||
| # find Kokkos as an existing source directory | ||
| set( | ||
| CexaExperimental_KOKKOS_SOURCE_DIR | ||
| "${CMAKE_CURRENT_SOURCE_DIR}/../../../vendor/kokkos" | ||
| CACHE | ||
| PATH | ||
| "Path to the local source directory of Kokkos" | ||
| ) | ||
| if(EXISTS "${CexaExperimental_KOKKOS_SOURCE_DIR}/CMakeLists.txt") | ||
| message(STATUS "Kokkos provided as a source directory: ${CexaExperimental_KOKKOS_SOURCE_DIR}") | ||
|
|
||
| add_subdirectory("${CexaExperimental_KOKKOS_SOURCE_DIR}" kokkos) | ||
| set(Kokkos_FOUND True) | ||
|
|
||
| return() | ||
| endif() | ||
|
|
||
| # download Kokkos from release and find it | ||
| message(STATUS "Kokkos downloaded: ${CexaExperimental_KOKKOS_SOURCE_DIR}") | ||
|
|
||
| include(FetchContent) | ||
|
|
||
| FetchContent_Declare( | ||
| kokkos | ||
| DOWNLOAD_EXTRACT_TIMESTAMP ON | ||
| URL https://github.com/kokkos/kokkos/releases/download/4.5.01/kokkos-4.5.01.zip | ||
| SOURCE_DIR ${CexaExperimental_KOKKOS_SOURCE_DIR} | ||
| ) | ||
| FetchContent_MakeAvailable(kokkos) | ||
| set(Kokkos_FOUND True) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,18 @@ | ||
| cmake_minimum_required(VERSION 3.16) | ||
|
|
||
| project(cexa-experimental-meminfo LANGUAGES CXX) | ||
| set(CMAKE_BUILD_TYPE "RelWithDebInfo") | ||
|
|
||
| if (NOT ${Kokkos_DIR} STREQUAL "") | ||
| add_subdirectory(${Kokkos_DIR}) | ||
| include_directories(${Kokkos_INCLUDE_DIRS_RET}) | ||
| else() | ||
| list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/../cmake/modules") | ||
| find_package(Kokkos REQUIRED) | ||
| endif() | ||
|
|
||
| include(CTest) | ||
|
|
||
| add_subdirectory(src) | ||
| add_subdirectory(unit_test) | ||
|
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,9 @@ | ||
| ### memInfo | ||
| This is a wrapper for the device _memGetInfo_ function, but it's also available for unified memory space and host space. | ||
|
|
||
| ### Usage | ||
| ``` | ||
| Kokkos::Experimental::memInfo(&free, &total); | ||
| ``` | ||
| - `total`: Amount of RAM on the system (HBM/DRAM) | ||
| - `free`: Available memory |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| add_library(memInfo INTERFACE) | ||
| target_include_directories(memInfo INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}) | ||
| target_link_libraries(memInfo INTERFACE Kokkos::kokkos) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,88 @@ | ||
| #ifndef KOKKOS_MEMINFO_HPP | ||
| #define KOKKOS_MEMINFO_HPP | ||
|
|
||
| #ifdef _WIN32 | ||
| #include <windows.h> | ||
| #else | ||
| #include <unixMemInfo.hpp> | ||
| #endif | ||
|
|
||
| #include <cstddef> | ||
| #include <sstream> | ||
| #include <fstream> | ||
|
|
||
| #include <Kokkos_Core.hpp> | ||
|
|
||
| namespace Kokkos { | ||
| namespace Experimental { | ||
|
|
||
| template <typename Space = Kokkos::DefaultExecutionSpace> | ||
| void MemGetInfo(size_t* free, size_t* total) { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not fond of this API. Can we return the value instead of playing with addresses (Is it host or device pointer ? etc. ) We could also a simple
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A boolean flag could also be either passed as an argument or returned in order to be sure whether the total and free memory available were obtained successfully. Right now there are unsuccessful code paths and there is no way to know that. cudaMemGetInfo returns a cudaError_t.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A more modern C++ will be to use
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree that However, I am feeling that it is better to go with the interface of |
||
| using MemorySpace = typename Space::memory_space; | ||
science-enthusiast marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| MemGetInfo<MemorySpace>(free, total); | ||
| } | ||
|
|
||
| // Single node memory info | ||
| #ifdef _WIN32 | ||
| template <> | ||
| void MemGetInfo<Kokkos::HostSpace>(size_t* free, size_t* total) { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would you happen to know if it works from inside a cpuset? Very often, in hpc, scheduler like slurm will put your process in a cpuset with limited resources.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Slurm and other schedulers use cgroups to limit jobs, I've set up a way to retrieve these values for cgroups v1. |
||
| MEMORYSTATUSEX statex; | ||
| statex.dwLength = sizeof(statex); | ||
| if (GlobalMemoryStatusEx(&statex) != 0) { | ||
| *free = statex.ullAvailPhys; | ||
| *total = statex.ullTotalPhys; | ||
| } | ||
science-enthusiast marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| return; | ||
| } | ||
| #endif | ||
|
|
||
| #if defined(KOKKOS_ENABLE_CUDA) | ||
| template <> | ||
| void MemGetInfo<Kokkos::CudaSpace>(size_t* free, size_t* total) { | ||
| KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMemGetInfo(free, total)); | ||
| } | ||
| template <> | ||
| void MemGetInfo<Kokkos::CudaUVMSpace>(size_t* free, size_t* total) { | ||
| MemGetInfo<Kokkos::HostSpace>(free, total); | ||
| } | ||
| template <> | ||
| void MemGetInfo<Kokkos::CudaHostPinnedSpace>(size_t* free, size_t* total) { | ||
| MemGetInfo<Kokkos::HostSpace>(free, total); | ||
| } | ||
| #endif | ||
|
|
||
| #if defined(KOKKOS_ENABLE_HIP) | ||
| template <> | ||
| void MemGetInfo<Kokkos::HIPSpace>(size_t* free, size_t* total) { | ||
| KOKKOS_IMPL_HIP_SAFE_CALL(hipMemGetInfo(free, total)); | ||
| } | ||
| template <> | ||
| void MemGetInfo<Kokkos::HIPManagedSpace>(size_t* free, size_t* total) { | ||
| MemGetInfo<Kokkos::HostSpace>(free, total); | ||
| } | ||
| template <> | ||
| void MemGetInfo<Kokkos::HIPHostPinnedSpace>(size_t* free, size_t* total) { | ||
| MemGetInfo<Kokkos::HostSpace>(free, total); | ||
| } | ||
| #endif | ||
|
|
||
| #if defined(KOKKOS_ENABLE_SYCL) | ||
| template <> | ||
| void MemGetInfo<Kokkos::SYCLDeviceUSMSpace>(size_t* free, size_t* total) { | ||
| std::vector<sycl::device> devices = Kokkos::Impl::get_sycl_devices(); | ||
| for (auto& dev : devices) { | ||
| if (dev.is_gpu()) { | ||
| *total += dev.get_info<sycl::info::device::global_mem_size>(); | ||
| // https://github.com/triSYCL/sycl/blob/sycl/unified/master/sycl/doc/extensions/supported/sycl_ext_intel_device_info.md#free-global-memory | ||
| if (dev.has(sycl::aspect::ext_intel_free_memory)) { | ||
| *free += dev.get_info<sycl::ext::intel::info::device::free_memory>(); | ||
| } | ||
| } | ||
| } | ||
| } | ||
| #endif | ||
|
|
||
| } // namespace Experimental | ||
| } // namespace Kokkos | ||
|
|
||
| #endif // KOKKOS_MEMINFO_HPP | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,194 @@ | ||
| #ifndef KOKKOS_UNIX_MEMINFO_HPP | ||
| #define KOKKOS_UNIX_MEMINFO_HPP | ||
|
|
||
| #include <unistd.h> | ||
|
|
||
| #include <cstddef> | ||
| #include <sstream> | ||
| #include <fstream> | ||
|
|
||
| #include <Kokkos_Core.hpp> | ||
|
|
||
| namespace Kokkos { | ||
| namespace Experimental { | ||
|
|
||
| namespace { | ||
| constexpr size_t NO_LIMIT = 1ull << 50; // No limit (like PAGE_COUNT_MAX) | ||
| constexpr int OVERCOMMIT_DISABLED = 2; | ||
| // Memory info keys | ||
| constexpr char MEM_FREE_KEY[] = "MemFree:"; | ||
| constexpr char MEM_TOTAL_KEY[] = "MemTotal:"; | ||
| constexpr char COMMITTED_AS_KEY[] = "Committed_AS:"; | ||
| constexpr char COMMIT_LIMIT_KEY[] = "CommitLimit:"; | ||
| // Cgroup v1 memory info | ||
| constexpr char CGROUP_PROCS[] = "cgroup.procs"; | ||
| constexpr char MEM_LIMIT_BYTES[] = "memory.limit_in_bytes"; | ||
| constexpr char MEM_USAGE_BYTES[] = "memory.usage_in_bytes"; | ||
| // Paths | ||
| constexpr char MEMINFO_PATH[] = "/proc/meminfo"; | ||
| constexpr char OVERCOMMIT_PATH[] = "/proc/sys/vm/overcommit_memory"; | ||
| constexpr char CGROUP_MEM_PATH[] = "/sys/fs/cgroup/memory"; | ||
| } | ||
|
|
||
| template <typename Space> | ||
| void MemGetInfo(size_t* free, size_t* total); | ||
|
|
||
| // On some systems, overcommit is disabled, and the kernel does not allow | ||
| // memory allocation beyond the commit limit. This means that allocations | ||
| // that touch only a small amount of memory are still counted at their full size. | ||
| // man proc_sys_vm | ||
| bool is_overcommit_disabled() { | ||
| std::ifstream overcommit_file(OVERCOMMIT_PATH); | ||
| int overcommit_value = 0; | ||
|
|
||
| if (overcommit_file.is_open()) { | ||
| overcommit_file >> overcommit_value; | ||
| overcommit_value = (overcommit_file.fail()) ? 0 : overcommit_value; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Check for
if (overcommit_file.is_open()) { |
||
| return (overcommit_value == OVERCOMMIT_DISABLED); | ||
| } | ||
| return false; | ||
| } | ||
|
|
||
| size_t get_meminfo_value(const char* key) { | ||
| std::ifstream meminfo(MEMINFO_PATH); | ||
| size_t value = 0; | ||
| std::string line; | ||
|
|
||
| if (meminfo.is_open()) { | ||
| while (std::getline(meminfo, line)) { | ||
| if (line.find(key) != std::string::npos) { | ||
| std::istringstream iss(line); | ||
| iss.ignore(256, ':'); | ||
| iss >> value; | ||
| value = (iss.fail()) ? 0 : value * 1024; | ||
| break; | ||
| } | ||
| } | ||
| } | ||
| return value; | ||
| } | ||
|
|
||
| // Extract a value from a cgroup file | ||
| size_t get_cgroup_value(const char* path) { | ||
| std::ifstream cgroup_file(path); | ||
| size_t value = 0; | ||
|
|
||
| if (cgroup_file.is_open()) { | ||
| cgroup_file >> value; | ||
| value = (cgroup_file.fail()) ? 0 : value; | ||
| } | ||
| return value; | ||
| } | ||
|
|
||
| // Check if a process is in the cgroup.procs file | ||
| bool is_pid_in_cgroup_procs(const char* cgroup_procs_path, const pid_t pid) { | ||
| std::ifstream cgroup_procs(cgroup_procs_path); | ||
| pid_t proc_id = 0; | ||
|
|
||
| if (cgroup_procs.is_open()) { | ||
| while (cgroup_procs >> proc_id) { | ||
| if (proc_id == pid) { | ||
| return true; | ||
| } | ||
| } | ||
| } | ||
| return false; | ||
| } | ||
|
|
||
| // Find out if memory controller is enabled (cgroup v1) | ||
| // Check in /proc/<pid>/cgroup | ||
| bool is_cgroup_mem_control_enabled() { | ||
| const pid_t pid = getpid(); | ||
| std::ifstream cgroup_memory_limit("/proc/" + std::to_string(pid) + "/cgroup"); | ||
| std::string line; | ||
|
|
||
| if (cgroup_memory_limit.is_open()) { | ||
| while (std::getline(cgroup_memory_limit, line)) { | ||
| if (line.find("memory") != std::string::npos) { | ||
| return true; | ||
| } | ||
| } | ||
| } | ||
| return false; | ||
| } | ||
|
|
||
| // Find the cgroup memory path for the current process | ||
| // Verify if the process is in the cgroup.procs file | ||
| std::string find_cgroup_memory_path() { | ||
| const pid_t pid = getpid(); | ||
| std::ifstream cgroup_memory_limit("/proc/" + std::to_string(pid) + "/cgroup"); | ||
| std::string cgroup_path; | ||
|
|
||
| if (cgroup_memory_limit.is_open()) { | ||
| std::string line; | ||
| while (std::getline(cgroup_memory_limit, line)) { | ||
| if (line.find(":memory:") != std::string::npos) { | ||
| const size_t pos = line.find_last_of(':'); | ||
| if (pos != std::string::npos) { | ||
| std::string cgroup_path = line.substr(pos + 1); | ||
| if (is_pid_in_cgroup_procs((CGROUP_MEM_PATH + cgroup_path + "/" + CGROUP_PROCS).c_str(), pid)) { | ||
| return CGROUP_MEM_PATH + cgroup_path; | ||
| } | ||
| const size_t last_slash = cgroup_path.find_last_of('/'); | ||
| if (last_slash != std::string::npos) { | ||
| std::string parent_path = cgroup_path.substr(0, last_slash); | ||
| if (is_pid_in_cgroup_procs((CGROUP_MEM_PATH + parent_path + "/" + CGROUP_PROCS).c_str(), pid)) { | ||
| return CGROUP_MEM_PATH + parent_path; | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| // Fallback to the default path | ||
| return CGROUP_MEM_PATH + cgroup_path; | ||
| } | ||
|
|
||
| // Single node memory info | ||
| template <> | ||
| void MemGetInfo<Kokkos::HostSpace>(size_t* free, size_t* total) { | ||
| static bool overcommit_disabled = is_overcommit_disabled(); | ||
| bool cgroup_mem_enable = is_cgroup_mem_control_enabled(); | ||
|
|
||
| // Cgroup memory info | ||
| if (cgroup_mem_enable) { | ||
| std::string cgroup_mem_path = find_cgroup_memory_path(); | ||
| const size_t mem_limit = get_cgroup_value((cgroup_mem_path + "/" + MEM_LIMIT_BYTES).c_str()); | ||
| if (mem_limit == 0 || mem_limit > NO_LIMIT) { | ||
| if (overcommit_disabled) { | ||
| *total = get_meminfo_value(COMMIT_LIMIT_KEY); | ||
| } else { | ||
| *total = get_meminfo_value(MEM_TOTAL_KEY); | ||
| } | ||
| } else { | ||
| *total = mem_limit; | ||
| } | ||
| const size_t mem_usage = get_cgroup_value((cgroup_mem_path + "/" + MEM_USAGE_BYTES).c_str()); | ||
| if (mem_usage == 0 || mem_usage > NO_LIMIT ) { | ||
| if (overcommit_disabled) { | ||
| *free = *total - get_meminfo_value(COMMITTED_AS_KEY); | ||
| } else { | ||
| *free = get_meminfo_value(MEM_FREE_KEY); | ||
| } | ||
| } else { | ||
| *free = *total - mem_usage; | ||
| } | ||
| return; | ||
| } | ||
|
|
||
| // System memory info | ||
| if (overcommit_disabled) { | ||
| *total = get_meminfo_value(COMMIT_LIMIT_KEY); | ||
| *free = *total - get_meminfo_value(COMMITTED_AS_KEY); | ||
| } else { | ||
| *total = get_meminfo_value(MEM_TOTAL_KEY); | ||
| *free = get_meminfo_value(MEM_FREE_KEY); | ||
| } | ||
| return; | ||
| } | ||
|
|
||
| } // namespace Experimental | ||
| } // namespace Kokkos | ||
|
|
||
| #endif // KOKKOS_UNIX_MEMINFO_HPP | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,17 @@ | ||
|
|
||
| include(FetchContent) | ||
|
|
||
| FetchContent_Declare( | ||
| googletest | ||
| GIT_REPOSITORY https://github.com/google/googletest.git | ||
| GIT_TAG 6910c9d9165801d8827d628cb72eb7ea9dd538c5 | ||
| DOWNLOAD_EXTRACT_TIMESTAMP true) | ||
|
|
||
| FetchContent_MakeAvailable(googletest) | ||
|
|
||
| enable_testing() | ||
| add_executable(MemInfoTest TestMemInfo.cpp) | ||
science-enthusiast marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| target_link_libraries(MemInfoTest GTest::gtest_main Kokkos::kokkos memInfo) | ||
science-enthusiast marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| include(GoogleTest) | ||
| gtest_discover_tests(MemInfoTest) | ||
Uh oh!
There was an error while loading. Please reload this page.