Skip to content

Commit d2b7605

Browse files
committed
Add per-interpreter storage for gil_safe_call_once_and_store
1 parent 729654c commit d2b7605

File tree

2 files changed

+126
-4
lines changed

2 files changed

+126
-4
lines changed

include/pybind11/detail/internals.h

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
/// further ABI-incompatible changes may be made before the ABI is officially
4040
/// changed to the new version.
4141
#ifndef PYBIND11_INTERNALS_VERSION
42-
# define PYBIND11_INTERNALS_VERSION 11
42+
# define PYBIND11_INTERNALS_VERSION 12
4343
#endif
4444

4545
#if PYBIND11_INTERNALS_VERSION < 11
@@ -234,6 +234,34 @@ inline uint64_t round_up_to_next_pow2(uint64_t x) {
234234

235235
class loader_life_support;
236236

237+
struct call_once_storage_base {
238+
call_once_storage_base() = default;
239+
virtual ~call_once_storage_base() = default;
240+
call_once_storage_base(const call_once_storage_base &) = delete;
241+
call_once_storage_base(call_once_storage_base &&) = delete;
242+
call_once_storage_base &operator=(const call_once_storage_base &) = delete;
243+
call_once_storage_base &operator=(call_once_storage_base &&) = delete;
244+
};
245+
246+
template <typename T>
247+
struct call_once_storage : call_once_storage_base {
248+
void (*finalize)(T &) = nullptr;
249+
alignas(T) char storage[sizeof(T)] = {0};
250+
251+
call_once_storage() = default;
252+
~call_once_storage() override {
253+
if (finalize != nullptr) {
254+
finalize(*reinterpret_cast<T *>(storage));
255+
}
256+
memset(storage, 0, sizeof(T));
257+
finalize = nullptr;
258+
};
259+
call_once_storage(const call_once_storage &) = delete;
260+
call_once_storage(call_once_storage &&) = delete;
261+
call_once_storage &operator=(const call_once_storage &) = delete;
262+
call_once_storage &operator=(call_once_storage &&) = delete;
263+
};
264+
237265
/// Internal data structure used to track registered instances and types.
238266
/// Whenever binary incompatible changes are made to this structure,
239267
/// `PYBIND11_INTERNALS_VERSION` must be incremented.
@@ -283,6 +311,8 @@ struct internals {
283311

284312
type_map<PyObject *> native_enum_type_map;
285313

314+
std::unordered_map<const void *, call_once_storage_base *> call_once_storage_map;
315+
286316
internals()
287317
: static_property_type(make_static_property_type()),
288318
default_metaclass(make_default_metaclass()) {
@@ -308,7 +338,12 @@ struct internals {
308338
internals(internals &&other) = delete;
309339
internals &operator=(const internals &other) = delete;
310340
internals &operator=(internals &&other) = delete;
311-
~internals() = default;
341+
~internals() {
342+
for (auto &[_, storage_ptr] : call_once_storage_map) {
343+
delete storage_ptr;
344+
}
345+
call_once_storage_map.clear();
346+
}
312347
};
313348

314349
// the internals struct (above) is shared between all the modules. local_internals are only

include/pybind11/gil_safe_call_once.h

Lines changed: 89 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#pragma once
44

55
#include "detail/common.h"
6+
#include "detail/internals.h"
67
#include "gil.h"
78

89
#include <cassert>
@@ -52,13 +53,15 @@ PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
5253
// functions, which is usually the case.
5354
//
5455
// For in-depth background, see docs/advanced/deadlock.md
56+
#ifndef PYBIND11_HAS_SUBINTERPRETER_SUPPORT
5557
template <typename T>
5658
class gil_safe_call_once_and_store {
5759
public:
5860
// PRECONDITION: The GIL must be held when `call_once_and_store_result()` is called.
5961
template <typename Callable>
6062
gil_safe_call_once_and_store &call_once_and_store_result(Callable &&fn,
6163
void (*finalize_fn)(T &) = nullptr) {
64+
6265
if (!is_initialized_) { // This read is guarded by the GIL.
6366
// Multiple threads may enter here, because the GIL is released in the next line and
6467
// CPython API calls in the `fn()` call below may release and reacquire the GIL.
@@ -80,10 +83,10 @@ class gil_safe_call_once_and_store {
8083
T &get_stored() {
8184
assert(is_initialized_);
8285
PYBIND11_WARNING_PUSH
83-
#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ < 5
86+
# if !defined(__clang__) && defined(__GNUC__) && __GNUC__ < 5
8487
// Needed for gcc 4.8.5
8588
PYBIND11_WARNING_DISABLE_GCC("-Wstrict-aliasing")
86-
#endif
89+
# endif
8790
return *reinterpret_cast<T *>(storage_);
8891
PYBIND11_WARNING_POP
8992
}
@@ -96,6 +99,7 @@ class gil_safe_call_once_and_store {
9699
}
97100

98101
private:
102+
// Global static storage (per process) when subinterpreter support is disabled.
99103
alignas(T) char storage_[sizeof(T)] = {};
100104
std::once_flag once_flag_;
101105
void (*finalize_fn_)(T &) = nullptr;
@@ -105,5 +109,88 @@ class gil_safe_call_once_and_store {
105109
// therefore `std::optional` is not a viable alternative here.
106110
atomic_bool is_initialized_{false};
107111
};
112+
#else
113+
// Subinterpreter support is enabled.
114+
// In this case, we should store the result per-interpreter instead of globally, because
115+
// each subinterpreter has its own separate state. The cached object may not shareable
116+
// across interpreters (e.g., imported modules and their members).
117+
template <typename T>
118+
class gil_safe_call_once_and_store {
119+
public:
120+
// PRECONDITION: The GIL must be held when `call_once_and_store_result()` is called.
121+
template <typename Callable>
122+
gil_safe_call_once_and_store &call_once_and_store_result(Callable &&fn,
123+
void (*finalize_fn)(T &) = nullptr) {
124+
if (!is_initialized_by_atleast_one_interpreter_
125+
|| detail::get_num_interpreters_seen() > 1) {
126+
detail::with_internals([&](detail::internals &internals) {
127+
const void *key = reinterpret_cast<const void *>(this);
128+
auto &storage_map = internals.call_once_storage_map;
129+
auto it = storage_map.find(key);
130+
if (it == storage_map.end()) {
131+
gil_scoped_release gil_rel; // Needed to establish lock ordering.
132+
{
133+
// Only one thread will ever enter here.
134+
gil_scoped_acquire gil_acq;
135+
auto s = new detail::call_once_storage<T>{};
136+
::new (s->storage) T(fn()); // fn may release, but will reacquire, the GIL.
137+
s->finalize = finalize_fn;
138+
last_storage_ = reinterpret_cast<T *>(s->storage);
139+
storage_map.emplace(key, s);
140+
};
141+
}
142+
is_initialized_by_atleast_one_interpreter_ = true;
143+
});
144+
// All threads will observe `is_initialized_by_atleast_one_interp_` as true here.
145+
}
146+
// Intentionally not returning `T &` to ensure the calling code is self-documenting.
147+
return *this;
148+
}
108149

150+
// This must only be called after `call_once_and_store_result()` was called.
151+
T &get_stored() {
152+
T *result = last_storage_;
153+
if (!is_initialized_by_atleast_one_interpreter_
154+
|| detail::get_num_interpreters_seen() > 1) {
155+
detail::with_internals([&](detail::internals &internals) {
156+
const void *key = reinterpret_cast<const void *>(this);
157+
auto &storage_map = internals.call_once_storage_map;
158+
auto it = storage_map.find(key);
159+
assert(it != storage_map.end());
160+
auto *s = static_cast<detail::call_once_storage<T> *>(it->second);
161+
result = last_storage_ = reinterpret_cast<T *>(s->storage);
162+
});
163+
}
164+
assert(result != nullptr);
165+
return *result;
166+
}
167+
168+
constexpr gil_safe_call_once_and_store() = default;
169+
PYBIND11_DTOR_CONSTEXPR ~gil_safe_call_once_and_store() {
170+
if (is_initialized_by_atleast_one_interpreter_) {
171+
detail::with_internals([&](detail::internals &internals) {
172+
const void *key = reinterpret_cast<const void *>(this);
173+
auto &storage_map = internals.call_once_storage_map;
174+
auto it = storage_map.find(key);
175+
if (it != storage_map.end()) {
176+
delete it->second;
177+
storage_map.erase(it);
178+
}
179+
});
180+
}
181+
}
182+
183+
private:
184+
// No storage needed when subinterpreter support is enabled.
185+
// The actual storage is stored in the per-interpreter state dict in
186+
// `internals.call_once_storage_map`.
187+
188+
// Fast local cache to avoid repeated lookups when there are no multiple interpreters.
189+
// This is only valid if there is a single interpreter. Otherwise, it is not used.
190+
T *last_storage_ = nullptr;
191+
// This flag is true if the value has been initialized by any interpreter (may not be the
192+
// current one).
193+
atomic_bool is_initialized_by_atleast_one_interpreter_{false};
194+
};
195+
#endif
109196
PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)

0 commit comments

Comments
 (0)