Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions cmake/modules/FindKokkos.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# set CMAKE_BUILD_TYPE if not defined
if(NOT CMAKE_BUILD_TYPE)
set(default_build_type "RelWithDebInfo")
message(STATUS "Setting build type to '${default_build_type}' as none was specified.")
set(
CMAKE_BUILD_TYPE
"${default_build_type}"
CACHE
STRING
"Choose the type of build, options are: Debug, Release, RelWithDebInfo and MinSizeRel."
FORCE
)
endif()

# find Kokkos as an already existing target
if(TARGET Kokkos::kokkos)
return()
endif()

# find Kokkos as installed
find_package(Kokkos CONFIG)
if(Kokkos_FOUND)
message(STATUS "Kokkos provided as installed: ${Kokkos_DIR} (version \"${Kokkos_VERSION}\")")

return()
endif()

# find Kokkos as an existing source directory
set(
CexaExperimental_KOKKOS_SOURCE_DIR
"${CMAKE_CURRENT_SOURCE_DIR}/../../../vendor/kokkos"
CACHE
PATH
"Path to the local source directory of Kokkos"
)
if(EXISTS "${CexaExperimental_KOKKOS_SOURCE_DIR}/CMakeLists.txt")
message(STATUS "Kokkos provided as a source directory: ${CexaExperimental_KOKKOS_SOURCE_DIR}")

add_subdirectory("${CexaExperimental_KOKKOS_SOURCE_DIR}" kokkos)
set(Kokkos_FOUND True)

return()
endif()

# download Kokkos from release and find it
message(STATUS "Kokkos downloaded: ${CexaExperimental_KOKKOS_SOURCE_DIR}")

include(FetchContent)

FetchContent_Declare(
kokkos
DOWNLOAD_EXTRACT_TIMESTAMP ON
URL https://github.com/kokkos/kokkos/releases/download/4.5.01/kokkos-4.5.01.zip
SOURCE_DIR ${CexaExperimental_KOKKOS_SOURCE_DIR}
)
FetchContent_MakeAvailable(kokkos)
set(Kokkos_FOUND True)
18 changes: 18 additions & 0 deletions memInfo/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
cmake_minimum_required(VERSION 3.16)

project(cexa-experimental-meminfo LANGUAGES CXX)
set(CMAKE_BUILD_TYPE "RelWithDebInfo")

if (NOT ${Kokkos_DIR} STREQUAL "")
add_subdirectory(${Kokkos_DIR})
include_directories(${Kokkos_INCLUDE_DIRS_RET})
else()
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/../cmake/modules")
find_package(Kokkos REQUIRED)
endif()

include(CTest)

add_subdirectory(src)
add_subdirectory(unit_test)

9 changes: 9 additions & 0 deletions memInfo/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
### memInfo
This is a wrapper for the device _memGetInfo_ function, but it's also available for unified memory space and host space.

### Usage
```
Kokkos::Experimental::memInfo(&free, &total);
```
- `total`: Amount of RAM on the system (HBM/DRAM)
- `free`: Available memory
3 changes: 3 additions & 0 deletions memInfo/src/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
add_library(memInfo INTERFACE)
target_include_directories(memInfo INTERFACE ${CMAKE_CURRENT_SOURCE_DIR})
target_link_libraries(memInfo INTERFACE Kokkos::kokkos)
88 changes: 88 additions & 0 deletions memInfo/src/cexa_MemInfo.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
#ifndef KOKKOS_MEMINFO_HPP
#define KOKKOS_MEMINFO_HPP

#ifdef _WIN32
#include <windows.h>
#else
#include <unixMemInfo.hpp>
#endif

#include <cstddef>
#include <sstream>
#include <fstream>

#include <Kokkos_Core.hpp>

namespace Kokkos {
namespace Experimental {

template <typename Space = Kokkos::DefaultExecutionSpace>
void MemGetInfo(size_t* free, size_t* total) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not fond of this API. Can we return the value instead of playing with addresses (Is it host or device pointer ? etc. )

We could also a simple getFreeMemSize or something like this to return only one value.

Copy link
Member

@science-enthusiast science-enthusiast Mar 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A boolean flag could also be either passed as an argument or returned in order to be sure whether the total and free memory available were obtained successfully. Right now there are unsuccessful code paths and there is no way to know that.

cudaMemGetInfo returns a ​cudaError_t.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A more modern C++ will be to use std::optional or expected the day it will become available.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree that std::optional or std::expected based approach is a good one. std::expected being even more suitable as the return type of a function.

However, I am feeling that it is better to go with the interface of cudaMemGetInfo. There is a hipMemGetInfo also with the same interface. There will be less cognitive load on the user.

using MemorySpace = typename Space::memory_space;
MemGetInfo<MemorySpace>(free, total);
}

// Single node memory info
#ifdef _WIN32
template <>
void MemGetInfo<Kokkos::HostSpace>(size_t* free, size_t* total) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would you happen to know if it works from inside a cpuset? Very often, in hpc, scheduler like slurm will put your process in a cpuset with limited resources.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Slurm and other schedulers use cgroups to limit jobs, I've set up a way to retrieve these values for cgroups v1.

MEMORYSTATUSEX statex;
statex.dwLength = sizeof(statex);
if (GlobalMemoryStatusEx(&statex) != 0) {
*free = statex.ullAvailPhys;
*total = statex.ullTotalPhys;
}
return;
}
#endif

#if defined(KOKKOS_ENABLE_CUDA)
template <>
void MemGetInfo<Kokkos::CudaSpace>(size_t* free, size_t* total) {
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMemGetInfo(free, total));
}
template <>
void MemGetInfo<Kokkos::CudaUVMSpace>(size_t* free, size_t* total) {
MemGetInfo<Kokkos::HostSpace>(free, total);
}
template <>
void MemGetInfo<Kokkos::CudaHostPinnedSpace>(size_t* free, size_t* total) {
MemGetInfo<Kokkos::HostSpace>(free, total);
}
#endif

#if defined(KOKKOS_ENABLE_HIP)
template <>
void MemGetInfo<Kokkos::HIPSpace>(size_t* free, size_t* total) {
KOKKOS_IMPL_HIP_SAFE_CALL(hipMemGetInfo(free, total));
}
template <>
void MemGetInfo<Kokkos::HIPManagedSpace>(size_t* free, size_t* total) {
MemGetInfo<Kokkos::HostSpace>(free, total);
}
template <>
void MemGetInfo<Kokkos::HIPHostPinnedSpace>(size_t* free, size_t* total) {
MemGetInfo<Kokkos::HostSpace>(free, total);
}
#endif

#if defined(KOKKOS_ENABLE_SYCL)
template <>
void MemGetInfo<Kokkos::SYCLDeviceUSMSpace>(size_t* free, size_t* total) {
std::vector<sycl::device> devices = Kokkos::Impl::get_sycl_devices();
for (auto& dev : devices) {
if (dev.is_gpu()) {
*total += dev.get_info<sycl::info::device::global_mem_size>();
// https://github.com/triSYCL/sycl/blob/sycl/unified/master/sycl/doc/extensions/supported/sycl_ext_intel_device_info.md#free-global-memory
if (dev.has(sycl::aspect::ext_intel_free_memory)) {
*free += dev.get_info<sycl::ext::intel::info::device::free_memory>();
}
}
}
}
#endif

} // namespace Experimental
} // namespace Kokkos

#endif // KOKKOS_MEMINFO_HPP
194 changes: 194 additions & 0 deletions memInfo/src/unixMemInfo.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
#ifndef KOKKOS_UNIX_MEMINFO_HPP
#define KOKKOS_UNIX_MEMINFO_HPP

#include <unistd.h>

#include <cstddef>
#include <sstream>
#include <fstream>

#include <Kokkos_Core.hpp>

namespace Kokkos {
namespace Experimental {

namespace {
constexpr size_t NO_LIMIT = 1ull << 50; // No limit (like PAGE_COUNT_MAX)
constexpr int OVERCOMMIT_DISABLED = 2;
// Memory info keys
constexpr char MEM_FREE_KEY[] = "MemFree:";
constexpr char MEM_TOTAL_KEY[] = "MemTotal:";
constexpr char COMMITTED_AS_KEY[] = "Committed_AS:";
constexpr char COMMIT_LIMIT_KEY[] = "CommitLimit:";
// Cgroup v1 memory info
constexpr char CGROUP_PROCS[] = "cgroup.procs";
constexpr char MEM_LIMIT_BYTES[] = "memory.limit_in_bytes";
constexpr char MEM_USAGE_BYTES[] = "memory.usage_in_bytes";
// Paths
constexpr char MEMINFO_PATH[] = "/proc/meminfo";
constexpr char OVERCOMMIT_PATH[] = "/proc/sys/vm/overcommit_memory";
constexpr char CGROUP_MEM_PATH[] = "/sys/fs/cgroup/memory";
}

template <typename Space>
void MemGetInfo(size_t* free, size_t* total);

// On some systems, overcommit is disabled, and the kernel does not allow
// memory allocation beyond the commit limit. This means that allocations
// that touch only a small amount of memory are still counted at their full size.
// man proc_sys_vm
bool is_overcommit_disabled() {
std::ifstream overcommit_file(OVERCOMMIT_PATH);
int overcommit_value = 0;

if (overcommit_file.is_open()) {
overcommit_file >> overcommit_value;
overcommit_value = (overcommit_file.fail()) ? 0 : overcommit_value;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Check for overcommit_file.fail() before reading from it (overcommit_file >> overcommit_value;). Otherwise it looks like undefined behavior.

overcommit_value is anyway initialized to 0. Thus, the following logic could be enough:

if (overcommit_file.is_open()) {
if (!overcommit_file.fail()) { overcommit_file >> overcommit_value; }
return (overcommit_value == OVERCOMMIT_DISABLED);
}

return (overcommit_value == OVERCOMMIT_DISABLED);
}
return false;
}

size_t get_meminfo_value(const char* key) {
std::ifstream meminfo(MEMINFO_PATH);
size_t value = 0;
std::string line;

if (meminfo.is_open()) {
while (std::getline(meminfo, line)) {
if (line.find(key) != std::string::npos) {
std::istringstream iss(line);
iss.ignore(256, ':');
iss >> value;
value = (iss.fail()) ? 0 : value * 1024;
break;
}
}
}
return value;
}

// Extract a value from a cgroup file
size_t get_cgroup_value(const char* path) {
std::ifstream cgroup_file(path);
size_t value = 0;

if (cgroup_file.is_open()) {
cgroup_file >> value;
value = (cgroup_file.fail()) ? 0 : value;
}
return value;
}

// Check if a process is in the cgroup.procs file
bool is_pid_in_cgroup_procs(const char* cgroup_procs_path, const pid_t pid) {
std::ifstream cgroup_procs(cgroup_procs_path);
pid_t proc_id = 0;

if (cgroup_procs.is_open()) {
while (cgroup_procs >> proc_id) {
if (proc_id == pid) {
return true;
}
}
}
return false;
}

// Find out if memory controller is enabled (cgroup v1)
// Check in /proc/<pid>/cgroup
bool is_cgroup_mem_control_enabled() {
const pid_t pid = getpid();
std::ifstream cgroup_memory_limit("/proc/" + std::to_string(pid) + "/cgroup");
std::string line;

if (cgroup_memory_limit.is_open()) {
while (std::getline(cgroup_memory_limit, line)) {
if (line.find("memory") != std::string::npos) {
return true;
}
}
}
return false;
}

// Find the cgroup memory path for the current process
// Verify if the process is in the cgroup.procs file
std::string find_cgroup_memory_path() {
const pid_t pid = getpid();
std::ifstream cgroup_memory_limit("/proc/" + std::to_string(pid) + "/cgroup");
std::string cgroup_path;

if (cgroup_memory_limit.is_open()) {
std::string line;
while (std::getline(cgroup_memory_limit, line)) {
if (line.find(":memory:") != std::string::npos) {
const size_t pos = line.find_last_of(':');
if (pos != std::string::npos) {
std::string cgroup_path = line.substr(pos + 1);
if (is_pid_in_cgroup_procs((CGROUP_MEM_PATH + cgroup_path + "/" + CGROUP_PROCS).c_str(), pid)) {
return CGROUP_MEM_PATH + cgroup_path;
}
const size_t last_slash = cgroup_path.find_last_of('/');
if (last_slash != std::string::npos) {
std::string parent_path = cgroup_path.substr(0, last_slash);
if (is_pid_in_cgroup_procs((CGROUP_MEM_PATH + parent_path + "/" + CGROUP_PROCS).c_str(), pid)) {
return CGROUP_MEM_PATH + parent_path;
}
}
}
}
}
}

// Fallback to the default path
return CGROUP_MEM_PATH + cgroup_path;
}

// Single node memory info
template <>
void MemGetInfo<Kokkos::HostSpace>(size_t* free, size_t* total) {
static bool overcommit_disabled = is_overcommit_disabled();
bool cgroup_mem_enable = is_cgroup_mem_control_enabled();

// Cgroup memory info
if (cgroup_mem_enable) {
std::string cgroup_mem_path = find_cgroup_memory_path();
const size_t mem_limit = get_cgroup_value((cgroup_mem_path + "/" + MEM_LIMIT_BYTES).c_str());
if (mem_limit == 0 || mem_limit > NO_LIMIT) {
if (overcommit_disabled) {
*total = get_meminfo_value(COMMIT_LIMIT_KEY);
} else {
*total = get_meminfo_value(MEM_TOTAL_KEY);
}
} else {
*total = mem_limit;
}
const size_t mem_usage = get_cgroup_value((cgroup_mem_path + "/" + MEM_USAGE_BYTES).c_str());
if (mem_usage == 0 || mem_usage > NO_LIMIT ) {
if (overcommit_disabled) {
*free = *total - get_meminfo_value(COMMITTED_AS_KEY);
} else {
*free = get_meminfo_value(MEM_FREE_KEY);
}
} else {
*free = *total - mem_usage;
}
return;
}

// System memory info
if (overcommit_disabled) {
*total = get_meminfo_value(COMMIT_LIMIT_KEY);
*free = *total - get_meminfo_value(COMMITTED_AS_KEY);
} else {
*total = get_meminfo_value(MEM_TOTAL_KEY);
*free = get_meminfo_value(MEM_FREE_KEY);
}
return;
}

} // namespace Experimental
} // namespace Kokkos

#endif // KOKKOS_UNIX_MEMINFO_HPP
17 changes: 17 additions & 0 deletions memInfo/unit_test/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@

include(FetchContent)

FetchContent_Declare(
googletest
GIT_REPOSITORY https://github.com/google/googletest.git
GIT_TAG 6910c9d9165801d8827d628cb72eb7ea9dd538c5
DOWNLOAD_EXTRACT_TIMESTAMP true)

FetchContent_MakeAvailable(googletest)

enable_testing()
add_executable(MemInfoTest TestMemInfo.cpp)
target_link_libraries(MemInfoTest GTest::gtest_main Kokkos::kokkos memInfo)

include(GoogleTest)
gtest_discover_tests(MemInfoTest)
Loading