Skip to content

Commit 7719fc5

Browse files
committed
First pass at cuda.core.system
1 parent 083315c commit 7719fc5

File tree

15 files changed

+1372
-134
lines changed

15 files changed

+1372
-134
lines changed

cuda_bindings/cuda/bindings/_nvml.pyx

Lines changed: 407 additions & 7 deletions
Large diffs are not rendered by default.

cuda_core/cuda/core/__init__.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
finally:
2929
del bindings, importlib, subdir, cuda_major, cuda_minor
3030

31-
from cuda.core import utils # noqa: E402
31+
from cuda.core import system, utils # noqa: E402
3232
from cuda.core._device import Device # noqa: E402
3333
from cuda.core._event import Event, EventOptions # noqa: E402
3434
from cuda.core._graph import ( # noqa: E402
@@ -62,8 +62,3 @@
6262
from cuda.core._module import Kernel, ObjectCode # noqa: E402
6363
from cuda.core._program import Program, ProgramOptions # noqa: E402
6464
from cuda.core._stream import Stream, StreamOptions # noqa: E402
65-
from cuda.core._system import System # noqa: E402
66-
67-
system = System()
68-
__import__("sys").modules[__spec__.name + ".system"] = system
69-
del System

cuda_core/cuda/core/_system.py

Lines changed: 0 additions & 114 deletions
This file was deleted.

cuda_core/cuda/core/experimental/__init__.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def _warn_deprecated():
3838
_warn_deprecated()
3939

4040

41-
from cuda.core import utils # noqa: E402
41+
from cuda.core import system, utils # noqa: E402
4242

4343
# Make utils accessible as a submodule for backward compatibility
4444
__import__("sys").modules[__spec__.name + ".utils"] = utils
@@ -72,9 +72,4 @@ def _warn_deprecated():
7272
)
7373
from cuda.core._module import Kernel, ObjectCode # noqa: E402
7474
from cuda.core._program import Program, ProgramOptions # noqa: E402
75-
from cuda.core._stream import Stream, StreamOptions # noqa: E402
76-
from cuda.core._system import System # noqa: E402
77-
78-
system = System()
79-
__import__("sys").modules[__spec__.name + ".system"] = system
80-
del System
75+
from cuda.core._stream import Stream, StreamOptions # noqa: E402
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
# ruff: noqa: F403, F405
6+
7+
8+
__all__ = [
9+
"get_driver_version",
10+
"get_driver_version_full",
11+
"get_gpu_driver_version",
12+
"get_num_devices",
13+
"get_process_name",
14+
]
15+
16+
17+
import cuda.bindings
18+
19+
from .system import *
20+
21+
# We need both the existence of cuda.bindings._nvml and a sufficient version
22+
# with the APIs implemented as we need them.
23+
24+
_BINDINGS_VERSION = tuple(int(x) for x in cuda.bindings.__version__.split("."))
25+
26+
_HAS_WORKING_NVML = _BINDINGS_VERSION >= (13, 1, 2) or (_BINDINGS_VERSION[0] == 12 and _BINDINGS_VERSION[1:3] >= (9, 6)) or True
27+
28+
if _HAS_WORKING_NVML:
29+
from cuda.bindings import _nvml
30+
31+
from ._nvml_context import initialize
32+
from .device import Device, DeviceArchitecture
33+
34+
initialize()
35+
36+
__all__.extend(["Device", "DeviceArchitecture"])
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
import os
6+
import threading
7+
8+
from cuda.bindings import _nvml as nvml
9+
10+
11+
ctypedef enum _NVMLState:
12+
UNINITIALIZED = 0
13+
INITIALIZED = 1
14+
DISABLED_LIBRARY_NOT_FOUND = 2
15+
16+
17+
# Initialisation must occur per-process, so an initialised state is a
18+
# (state, pid) pair
19+
_NVML_STATE = _NVMLState.UNINITIALIZED
20+
# """Current initialization state"""
21+
22+
_NVML_OWNER_PID = 0
23+
# """PID of process that successfully called pynvml.nvmlInit"""
24+
25+
26+
_lock = threading.Lock()
27+
28+
29+
def initialize() -> None:
30+
"""Idempotent (per-process) initialization of NVUtil's NVML
31+
32+
Notes
33+
-----
34+
35+
Modifies global variables _NVML_STATE and _NVML_OWNER_PID"""
36+
global _NVML_STATE, _NVML_OWNER_PID
37+
38+
with _lock:
39+
if _NVML_STATE == _NVMLState.DISABLED_LIBRARY_NOT_FOUND or (
40+
_NVML_STATE == _NVMLState.INITIALIZED and os.getpid() == _NVML_OWNER_PID
41+
):
42+
return
43+
elif (
44+
_NVML_STATE == _NVMLState.INITIALIZED and os.getpid() != _NVML_OWNER_PID
45+
) or _NVML_STATE == _NVMLState.UNINITIALIZED:
46+
try:
47+
nvml.init_v2()
48+
except (
49+
nvml.LibraryNotFoundError,
50+
nvml.DriverNotLoadedError,
51+
nvml.UnknownError,
52+
):
53+
_NVML_STATE = _NVMLState.DISABLED_LIBRARY_NOT_FOUND
54+
return
55+
56+
# initialization was successful
57+
_NVML_STATE = _NVMLState.INITIALIZED
58+
_NVML_OWNER_PID = os.getpid()
59+
else:
60+
raise RuntimeError(f"Unhandled initialisation state ({_NVML_STATE=}, {_NVML_OWNER_PID=})")
61+
62+
63+
def is_initialized() -> bool:
64+
"""
65+
Check whether the NVML context is initialized on this process.
66+
67+
Returns
68+
-------
69+
result: bool
70+
Whether the NVML context is initialized on this process.
71+
"""
72+
return _NVML_STATE == _NVMLState.INITIALIZED and os.getpid() == _NVML_OWNER_PID
73+
74+
75+
def validate() -> None:
76+
"""
77+
Validate NVML state.
78+
79+
Validate that NVML is functional and that the system has at least one GPU available.
80+
81+
Raises
82+
------
83+
nvml.LibraryNotFoundError
84+
If the NVML library could not be found.
85+
nvml.GpuNotFoundError
86+
If no GPUs are available.
87+
"""
88+
if _NVML_STATE == _NVMLState.DISABLED_LIBRARY_NOT_FOUND:
89+
raise nvml.LibraryNotFoundError("The underlying NVML library was not found")
90+
elif nvml.device_get_count_v2() == 0:
91+
raise nvml.GpuNotFoundError("No GPUs available")

0 commit comments

Comments
 (0)