Skip to content

Commit aa1cad4

Browse files
authored
Transparent ShutdownManager with cascading ShutdownTrackers (#6040)
* Idea for transparent ShutdownManager use * Tracker hierarchies * Fix wasm shutdown, convinience shutdown method
1 parent 44ac5e1 commit aa1cad4

File tree

13 files changed

+219
-90
lines changed

13 files changed

+219
-90
lines changed
Lines changed: 0 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
11
// Copyright 2023 - Nym Technologies SA <contact@nymtech.net>
22
// SPDX-License-Identifier: Apache-2.0
33

4-
use crate::error::ClientCoreError;
54
use crate::{client::replies::reply_storage, config::DebugConfig};
6-
use nym_task::{ShutdownManager, ShutdownToken, ShutdownTracker};
75

86
pub fn setup_empty_reply_surb_backend(debug_config: &DebugConfig) -> reply_storage::Empty {
97
reply_storage::Empty {
@@ -15,49 +13,3 @@ pub fn setup_empty_reply_surb_backend(debug_config: &DebugConfig) -> reply_stora
1513
.maximum_reply_surb_storage_threshold,
1614
}
1715
}
18-
19-
// old 'TaskHandle'
20-
pub(crate) enum ShutdownHelper {
21-
Internal(ShutdownManager),
22-
External(ShutdownTracker),
23-
}
24-
25-
fn new_shutdown_manager() -> Result<ShutdownManager, ClientCoreError> {
26-
cfg_if::cfg_if! {
27-
if #[cfg(not(target_arch = "wasm32"))] {
28-
Ok(ShutdownManager::build_new_default()?)
29-
} else {
30-
Ok(ShutdownManager::new_without_signals())
31-
}
32-
}
33-
}
34-
35-
impl ShutdownHelper {
36-
pub(crate) fn new(shutdown_tracker: Option<ShutdownTracker>) -> Result<Self, ClientCoreError> {
37-
match shutdown_tracker {
38-
None => Ok(ShutdownHelper::Internal(new_shutdown_manager()?)),
39-
Some(shutdown_tracker) => Ok(ShutdownHelper::External(shutdown_tracker)),
40-
}
41-
}
42-
43-
pub(crate) fn into_internal(self) -> Option<ShutdownManager> {
44-
match self {
45-
ShutdownHelper::Internal(manager) => Some(manager),
46-
ShutdownHelper::External(_) => None,
47-
}
48-
}
49-
50-
pub(crate) fn shutdown_token(&self) -> ShutdownToken {
51-
match self {
52-
ShutdownHelper::External(shutdown) => shutdown.clone_shutdown_token(),
53-
ShutdownHelper::Internal(shutdown) => shutdown.clone_shutdown_token(),
54-
}
55-
}
56-
57-
pub(crate) fn tracker(&self) -> &ShutdownTracker {
58-
match self {
59-
ShutdownHelper::External(shutdown) => shutdown,
60-
ShutdownHelper::Internal(shutdown) => shutdown.shutdown_tracker(),
61-
}
62-
}
63-
}

common/client-core/src/client/base_client/mod.rs

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
use super::mix_traffic::ClientRequestSender;
55
use super::received_buffer::ReceivedBufferMessage;
66
use super::statistics_control::StatisticsControl;
7-
use crate::client::base_client::helpers::ShutdownHelper;
87
use crate::client::base_client::storage::helpers::store_client_keys;
98
use crate::client::base_client::storage::MixnetClientStorage;
109
use crate::client::cover_traffic_stream::LoopCoverTrafficStream;
@@ -53,7 +52,7 @@ use nym_sphinx::receiver::{ReconstructedMessage, SphinxMessageReceiver};
5352
use nym_statistics_common::clients::ClientStatsSender;
5453
use nym_statistics_common::generate_client_stats_id;
5554
use nym_task::connections::{ConnectionCommandReceiver, ConnectionCommandSender, LaneQueueLengths};
56-
use nym_task::{ShutdownManager, ShutdownTracker};
55+
use nym_task::ShutdownTracker;
5756
use nym_topology::provider_trait::TopologyProvider;
5857
use nym_topology::HardcodedTopologyProvider;
5958
use nym_validator_client::nym_api::NymApiClientExt;
@@ -881,8 +880,12 @@ where
881880
let shared_topology_accessor =
882881
TopologyAccessor::new(self.config.debug.topology.ignore_egress_epoch_role);
883882

884-
// Shutdown notifier for signalling tasks to stop
885-
let shutdown = ShutdownHelper::new(self.shutdown)?;
883+
// Create a shutdown tracker for this client - either as a child of provided tracker
884+
// or get one from the registry
885+
let shutdown_tracker = match self.shutdown {
886+
Some(parent_tracker) => parent_tracker.child_tracker(),
887+
None => nym_task::get_sdk_shutdown_tracker()?,
888+
};
886889

887890
// channels responsible for dealing with reply-related fun
888891
let (reply_controller_sender, reply_controller_receiver) =
@@ -914,7 +917,7 @@ where
914917
self.user_agent.clone(),
915918
generate_client_stats_id(*self_address.identity()),
916919
input_sender.clone(),
917-
shutdown.tracker(),
920+
&shutdown_tracker.child_tracker(),
918921
);
919922

920923
// needs to be started as the first thing to block if required waiting for the gateway
@@ -924,14 +927,14 @@ where
924927
shared_topology_accessor.clone(),
925928
self_address.gateway(),
926929
self.wait_for_gateway,
927-
shutdown.tracker(),
930+
&shutdown_tracker.child_tracker(),
928931
)
929932
.await?;
930933

931934
let gateway_packet_router = PacketRouter::new(
932935
ack_sender,
933936
mixnet_messages_sender,
934-
shutdown.shutdown_token(),
937+
shutdown_tracker.clone_shutdown_token(),
935938
);
936939

937940
let gateway_transceiver = Self::setup_gateway_transceiver(
@@ -944,15 +947,15 @@ where
944947
stats_reporter.clone(),
945948
#[cfg(unix)]
946949
self.connection_fd_callback,
947-
shutdown.tracker(),
950+
&shutdown_tracker.child_tracker(),
948951
)
949952
.await?;
950953
let gateway_ws_fd = gateway_transceiver.ws_fd();
951954

952955
let reply_storage = Self::setup_persistent_reply_storage(
953956
reply_storage_backend,
954957
key_rotation_config,
955-
shutdown.tracker(),
958+
&shutdown_tracker.child_tracker(),
956959
)
957960
.await?;
958961

@@ -963,16 +966,18 @@ where
963966
reply_storage.key_storage(),
964967
reply_controller_sender.clone(),
965968
stats_reporter.clone(),
966-
shutdown.tracker(),
969+
&shutdown_tracker.child_tracker(),
967970
);
968971

969972
// The message_sender is the transmitter for any component generating sphinx packets
970973
// that are to be sent to the mixnet. They are used by cover traffic stream and real
971974
// traffic stream.
972975
// The MixTrafficController then sends the actual traffic
973976

974-
let (message_sender, client_request_sender) =
975-
Self::start_mix_traffic_controller(gateway_transceiver, shutdown.tracker());
977+
let (message_sender, client_request_sender) = Self::start_mix_traffic_controller(
978+
gateway_transceiver,
979+
&shutdown_tracker.child_tracker(),
980+
);
976981

977982
// Channels that the websocket listener can use to signal downstream to the real traffic
978983
// controller that connections are closed.
@@ -1001,7 +1006,7 @@ where
10011006
shared_lane_queue_lengths.clone(),
10021007
client_connection_rx,
10031008
stats_reporter.clone(),
1004-
shutdown.tracker(),
1009+
&shutdown_tracker.child_tracker(),
10051010
);
10061011

10071012
if !self
@@ -1017,7 +1022,7 @@ where
10171022
shared_topology_accessor.clone(),
10181023
message_sender,
10191024
stats_reporter.clone(),
1020-
shutdown.tracker(),
1025+
&shutdown_tracker.child_tracker(),
10211026
);
10221027
}
10231028

@@ -1045,7 +1050,7 @@ where
10451050
gateway_connection: GatewayConnection { gateway_ws_fd },
10461051
},
10471052
stats_reporter,
1048-
shutdown_handle: shutdown.into_internal(),
1053+
shutdown_handle: Some(shutdown_tracker), // The primary tracker for this client
10491054
client_request_sender,
10501055
forget_me: self.config.debug.forget_me,
10511056
remember_me: self.config.debug.remember_me,
@@ -1061,7 +1066,7 @@ pub struct BaseClient {
10611066
pub client_state: ClientState,
10621067
pub stats_reporter: ClientStatsSender,
10631068
pub client_request_sender: ClientRequestSender,
1064-
pub shutdown_handle: Option<ShutdownManager>,
1069+
pub shutdown_handle: Option<ShutdownTracker>,
10651070
pub forget_me: ForgetMe,
10661071
pub remember_me: RememberMe,
10671072
}

common/client-core/src/error.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
use crate::client::mix_traffic::transceiver::ErasedGatewayError;
55
use nym_crypto::asymmetric::ed25519::Ed25519RecoveryError;
66
use nym_gateway_client::error::GatewayClientError;
7+
use nym_task::RegistryAccessError;
78
use nym_topology::node::RoutingNodeError;
89
use nym_topology::{NodeId, NymTopologyError};
910
use nym_validator_client::nym_api::error::NymAPIError;
@@ -242,6 +243,9 @@ pub enum ClientCoreError {
242243

243244
#[error("failed to select valid gateway due to incomputable latency")]
244245
GatewaySelectionFailure { source: WeightedError },
246+
247+
#[error("Could not access task registry, {0}")]
248+
RegistryAccess(#[from] RegistryAccessError),
245249
}
246250

247251
impl From<tungstenite::Error> for ClientCoreError {

common/task/src/cancellation/token.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,12 @@ pub struct ShutdownToken {
1717
inner: CancellationToken,
1818
}
1919

20+
impl From<CancellationToken> for ShutdownToken {
21+
fn from(inner: CancellationToken) -> Self {
22+
ShutdownToken { inner }
23+
}
24+
}
25+
2026
impl ShutdownToken {
2127
/// A drop in no-op replacement for `send_status_msg` for easier migration from [TaskClient](crate::TaskClient).
2228
#[deprecated]
@@ -45,6 +51,13 @@ impl ShutdownToken {
4551
&self.inner
4652
}
4753

54+
/// Get an owned [CancellationToken](tokio_util::sync::CancellationToken) for public API use.
55+
/// This is useful when you need to expose cancellation to SDK users without
56+
/// exposing the internal ShutdownToken type.
57+
pub fn to_cancellation_token(&self) -> CancellationToken {
58+
self.inner.clone()
59+
}
60+
4861
/// Creates a `ShutdownToken` which will get cancelled whenever the
4962
/// current token gets cancelled. Unlike a cloned `ShutdownToken`,
5063
/// cancelling a child token does not cancel the parent token.

common/task/src/cancellation/tracker.rs

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -314,4 +314,40 @@ impl ShutdownTracker {
314314
pub fn clone_shutdown_token(&self) -> ShutdownToken {
315315
self.root_cancellation_token.clone()
316316
}
317+
318+
/// Create a child ShutdownTracker that inherits cancellation from this tracker
319+
/// but has its own TaskTracker for managing sub-tasks.
320+
///
321+
/// This enables hierarchical task management where:
322+
/// - Parent cancellation flows to all children
323+
/// - Each level tracks its own tasks independently
324+
/// - Components can wait for their specific sub-tasks to complete
325+
pub fn child_tracker(&self) -> ShutdownTracker {
326+
// Child token inherits cancellation from parent
327+
let child_token = self.root_cancellation_token.child_token();
328+
329+
// New TaskTracker for this level's tasks
330+
let child_task_tracker = TaskTracker::new();
331+
332+
ShutdownTracker {
333+
root_cancellation_token: child_token,
334+
tracker: child_task_tracker,
335+
}
336+
}
337+
338+
/// Convenience method to perform a complete shutdown sequence.
339+
/// This method:
340+
/// 1. Signals cancellation to all tasks
341+
/// 2. Closes the tracker to prevent new tasks
342+
/// 3. Waits for all existing tasks to complete
343+
pub async fn shutdown(self) {
344+
// Signal cancellation to all tasks
345+
self.root_cancellation_token.cancel();
346+
347+
// Close the tracker to prevent new tasks from being spawned
348+
self.tracker.close();
349+
350+
// Wait for all existing tasks to complete
351+
self.tracker.wait().await;
352+
}
317353
}

common/task/src/lib.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ pub mod cancellation;
55
pub mod connections;
66
pub mod event;
77
pub mod manager;
8+
pub(crate) mod runtime_registry;
89
#[cfg(not(target_arch = "wasm32"))]
910
pub mod signal;
1011
pub mod spawn;
@@ -18,3 +19,11 @@ pub use tokio_util::task::TaskTracker;
1819

1920
#[cfg(not(target_arch = "wasm32"))]
2021
pub use signal::{wait_for_signal, wait_for_signal_and_error};
22+
23+
pub use crate::runtime_registry::RegistryAccessError;
24+
25+
/// Get or create a ShutdownTracker for SDK use.
26+
/// This provides automatic task management without requiring manual setup.
27+
pub fn get_sdk_shutdown_tracker() -> Result<ShutdownTracker, RegistryAccessError> {
28+
Ok(runtime_registry::RuntimeRegistry::get_or_create_sdk()?.shutdown_tracker_owned())
29+
}
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
// Copyright 2025 - Nym Technologies SA <contact@nymtech.net>
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
use thiserror::Error;
5+
6+
use crate::ShutdownManager;
7+
use std::sync::RwLock;
8+
use std::sync::{Arc, LazyLock};
9+
10+
/// Global registry that manages ShutdownManagers transparently.
11+
/// This allows SDK components to get automatic task management without
12+
/// exposing the complexity to end users.
13+
pub(crate) struct RuntimeRegistry {
14+
// For SDK clients: auto-created manager without signal handling
15+
sdk_manager: RwLock<Option<Arc<ShutdownManager>>>,
16+
}
17+
18+
#[derive(Debug, Error)]
19+
pub enum RegistryAccessError {
20+
#[error("the runtime registry is poisoned")]
21+
Poisoned,
22+
}
23+
24+
impl RuntimeRegistry {
25+
/// Get or create a ShutdownManager for SDK use.
26+
/// This manager doesn't listen to OS signals, making it suitable for library use.
27+
pub(crate) fn get_or_create_sdk() -> Result<Arc<ShutdownManager>, RegistryAccessError> {
28+
let guard = REGISTRY
29+
.sdk_manager
30+
.read()
31+
.map_err(|_| RegistryAccessError::Poisoned)?;
32+
if let Some(manager) = guard.as_ref() {
33+
return Ok(manager.clone());
34+
}
35+
drop(guard);
36+
37+
let mut guard = REGISTRY
38+
.sdk_manager
39+
.write()
40+
.map_err(|_| RegistryAccessError::Poisoned)?;
41+
Ok(guard
42+
.get_or_insert_with(|| Arc::new(ShutdownManager::new_without_signals()))
43+
.clone())
44+
}
45+
46+
/// Check if an SDK manager has been created.
47+
/// Useful for testing and debugging.
48+
#[allow(dead_code)]
49+
pub(crate) fn has_sdk_manager() -> Result<bool, RegistryAccessError> {
50+
Ok(REGISTRY
51+
.sdk_manager
52+
.read()
53+
.map_err(|_| RegistryAccessError::Poisoned)?
54+
.is_some())
55+
}
56+
57+
/// Clear the SDK manager.
58+
/// This is primarily for testing to ensure isolation between tests.
59+
#[cfg(test)]
60+
pub(crate) async fn clear() -> Result<(), RegistryAccessError> {
61+
*REGISTRY
62+
.sdk_manager
63+
.write()
64+
.map_err(|_| RegistryAccessError::Poisoned)? = None;
65+
Ok(())
66+
}
67+
}
68+
69+
/// Global instance of the runtime registry.
70+
/// Uses LazyLock for on-demand initialization.
71+
static REGISTRY: LazyLock<RuntimeRegistry> = LazyLock::new(|| RuntimeRegistry {
72+
sdk_manager: RwLock::new(None),
73+
});
74+
75+
#[cfg(test)]
76+
mod tests {
77+
use super::*;
78+
79+
#[tokio::test]
80+
async fn test_get_or_create_sdk() {
81+
// Clear any existing manager
82+
let _ = RuntimeRegistry::clear().await;
83+
84+
assert!(!RuntimeRegistry::has_sdk_manager().unwrap());
85+
86+
let manager1 = RuntimeRegistry::get_or_create_sdk().unwrap();
87+
assert!(RuntimeRegistry::has_sdk_manager().unwrap());
88+
89+
let manager2 = RuntimeRegistry::get_or_create_sdk().unwrap();
90+
// Should return the same instance
91+
assert!(Arc::ptr_eq(&manager1, &manager2));
92+
93+
let _ = RuntimeRegistry::clear().await;
94+
assert!(!RuntimeRegistry::has_sdk_manager().unwrap());
95+
}
96+
}

0 commit comments

Comments
 (0)