From f30f1948b5eff52a6b84c79c3126743a89ca7735 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Wojciech=20Przytu=C5=82a?= <wojciech.przytula@scylladb.com>
Date: Thu, 5 Jun 2025 08:13:09 +0200
Subject: [PATCH 1/6] default LBP: extract method with sanity checks

`DefaultPolicy::pick()` begins with some sanity checks, whose goal is
to warn the user about misconfiguration. Those checks are extracted
into a separate method, `DefaultPolicy::pick_sanity_checks()`, so that
`pick()` is decluttered. The next commits introduce more sanity checks.
---
 scylla/src/policies/load_balancing/default.rs | 38 +++++++++++--------
 1 file changed, 22 insertions(+), 16 deletions(-)
diff --git a/scylla/src/policies/load_balancing/default.rs b/scylla/src/policies/load_balancing/default.rs
index 9f47bed244..3f613e5ea0 100644
--- a/scylla/src/policies/load_balancing/default.rs
+++ b/scylla/src/policies/load_balancing/default.rs
@@ -167,22 +167,8 @@ impl LoadBalancingPolicy for DefaultPolicy {
          * for the statement, so that we can pick one of them. */
         let routing_info = self.routing_info(query, cluster);
 
-        if let Some(ref token_with_strategy) = routing_info.token_with_strategy {
-            if self.preferences.datacenter().is_some()
-                && !self.permit_dc_failover
-                && matches!(
-                    token_with_strategy.strategy,
-                    Strategy::SimpleStrategy { .. }
-                )
-            {
-                warn!("\
-Combining SimpleStrategy with preferred_datacenter set to Some and disabled datacenter failover may lead to empty query plans for some tokens.\
-It is better to give up using one of them: either operate in a keyspace with NetworkTopologyStrategy, which explicitly states\
-how many replicas there are in each datacenter (you probably want at least 1 to avoid empty plans while preferring that datacenter), \
-or refrain from preferring datacenters (which may ban all other datacenters, if datacenter failover happens to be not possible)."
-                );
-            }
-        }
+        /* Check for misconfiguration and warn if any is discovered. */
+        self.pick_sanity_checks(&routing_info, cluster);
 
         /* LWT statements need to be routed differently: always to the same replica, to avoid Paxos contention. */
         let statement_type = if query.is_confirmed_lwt {
@@ -602,6 +588,26 @@ impl DefaultPolicy {
         routing_info
     }
 
+    /// Checks for misconfiguration and warns if any is discovered.
+    fn pick_sanity_checks(&self, routing_info: &ProcessedRoutingInfo, _cluster: &ClusterState) {
+        if let Some(ref token_with_strategy) = routing_info.token_with_strategy {
+            if self.preferences.datacenter().is_some()
+                && !self.permit_dc_failover
+                && matches!(
+                    token_with_strategy.strategy,
+                    Strategy::SimpleStrategy { .. }
+                )
+            {
+                warn!("\
+Combining SimpleStrategy with preferred_datacenter set to Some and disabled datacenter failover may lead to empty query plans for some tokens.\
+It is better to give up using one of them: either operate in a keyspace with NetworkTopologyStrategy, which explicitly states\
+how many replicas there are in each datacenter (you probably want at least 1 to avoid empty plans while preferring that datacenter), \
+or refrain from preferring datacenters (which may ban all other datacenters, if datacenter failover happens to be not possible)."
+                );
+            }
+        }
+    }
+
     /// Returns all nodes in the local datacenter if one is given,
     /// or else all nodes in the cluster.
     fn preferred_node_set<'a>(&'a self, cluster: &'a ClusterState) -> &'a [Arc<Node>] {

From c6edc7477bcccea99b5078f1d72d0e9bb29c3b4b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Wojciech=20Przytu=C5=82a?= <wojciech.przytula@scylladb.com>
Date: Thu, 5 Jun 2025 08:18:31 +0200
Subject: [PATCH 2/6] default LBP: pick_sanity_checks() slight refactor

This makes the next commit cleaner.
---
 scylla/src/policies/load_balancing/default.rs | 21 ++++++++++---------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/scylla/src/policies/load_balancing/default.rs b/scylla/src/policies/load_balancing/default.rs
index 3f613e5ea0..7160b1bee0 100644
--- a/scylla/src/policies/load_balancing/default.rs
+++ b/scylla/src/policies/load_balancing/default.rs
@@ -590,20 +590,21 @@ impl DefaultPolicy {
 
     /// Checks for misconfiguration and warns if any is discovered.
     fn pick_sanity_checks(&self, routing_info: &ProcessedRoutingInfo, _cluster: &ClusterState) {
-        if let Some(ref token_with_strategy) = routing_info.token_with_strategy {
-            if self.preferences.datacenter().is_some()
-                && !self.permit_dc_failover
-                && matches!(
-                    token_with_strategy.strategy,
-                    Strategy::SimpleStrategy { .. }
-                )
-            {
-                warn!("\
+        if let Some(_preferred_dc) = self.preferences.datacenter() {
+            if let Some(ref token_with_strategy) = routing_info.token_with_strategy {
+                if !self.permit_dc_failover
+                    && matches!(
+                        token_with_strategy.strategy,
+                        Strategy::SimpleStrategy { .. }
+                    )
+                {
+                    warn!("\
 Combining SimpleStrategy with preferred_datacenter set to Some and disabled datacenter failover may lead to empty query plans for some tokens.\
 It is better to give up using one of them: either operate in a keyspace with NetworkTopologyStrategy, which explicitly states\
 how many replicas there are in each datacenter (you probably want at least 1 to avoid empty plans while preferring that datacenter), \
 or refrain from preferring datacenters (which may ban all other datacenters, if datacenter failover happens to be not possible)."
-                );
+                    );
+                }
             }
         }
     }

From e2ba54b8ad95f851d28c9f56071443a737664a15 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Wojciech=20Przytu=C5=82a?= <wojciech.przytula@scylladb.com>
Date: Thu, 5 Jun 2025 08:21:37 +0200
Subject: [PATCH 3/6] default LBP: warn on preferred DC misconfiguration

DefaultPolicy now issues respective warnings:
1. if the preferred datacenter is not present in the cluster;
2. if all nodes in the preferred datacenter are disabled by
   the HostFilter.

This helps avoid confusion when the user expects requests to be sent
to a specific datacenter, but it is not available.

As warnings are going to be emitted on every request (because
the configuration is most likely session-wide), the messages will
quickly flood the logs and the user should notice.
Alternatively, we could implement some frequency limiting of those
warnings, but we rather prefer to:
- keep it simple for now,
- let the user notice the misconfiguration as quickly as possible.
---
 scylla/src/policies/load_balancing/default.rs | 55 ++++++++++++++++++-
 1 file changed, 53 insertions(+), 2 deletions(-)

diff --git a/scylla/src/policies/load_balancing/default.rs b/scylla/src/policies/load_balancing/default.rs
index 7160b1bee0..6e037887ea 100644
--- a/scylla/src/policies/load_balancing/default.rs
+++ b/scylla/src/policies/load_balancing/default.rs
@@ -589,8 +589,9 @@ impl DefaultPolicy {
     }
 
     /// Checks for misconfiguration and warns if any is discovered.
-    fn pick_sanity_checks(&self, routing_info: &ProcessedRoutingInfo, _cluster: &ClusterState) {
-        if let Some(_preferred_dc) = self.preferences.datacenter() {
+    fn pick_sanity_checks(&self, routing_info: &ProcessedRoutingInfo, cluster: &ClusterState) {
+        if let Some(preferred_dc) = self.preferences.datacenter() {
+            // Preferred DC + no datacenter failover + SimpleStrategy is an anti-pattern.
             if let Some(ref token_with_strategy) = routing_info.token_with_strategy {
                 if !self.permit_dc_failover
                     && matches!(
@@ -606,6 +607,56 @@ or refrain from preferring datacenters (which may ban all other datacenters, if
                     );
                 }
             }
+
+            // Verify that the preferred datacenter is actually present in the cluster.
+            // If not, shout a warning.
+            if cluster
+                .replica_locator()
+                .unique_nodes_in_datacenter_ring(preferred_dc)
+                .unwrap_or(&[])
+                .is_empty()
+            {
+                if self.permit_dc_failover {
+                    warn!(
+                        "\
+The preferred datacenter (\"{preferred_dc}\") is not present in the cluster! \
+Datacenter failover is enabled, so the request will be always sent to remote DCs. \
+This is most likely not what you want!"
+                    );
+                } else {
+                    warn!(
+                        "\
+The preferred datacenter (\"{preferred_dc}\") is not present in the cluster! \
+Datacenter failover is disabled, so the query plans will be empty! \
+You won't be able to execute any requests!"
+                    );
+                }
+            }
+            // Verify that there exist any enabled nodes in the preferred datacenter.
+            // If not, shout a warning.
+            else if !cluster
+                .replica_locator()
+                .unique_nodes_in_datacenter_ring(preferred_dc)
+                .unwrap_or(&[])
+                .iter()
+                .any(|node| node.is_enabled())
+            {
+                if self.permit_dc_failover {
+                    warn!(
+                        "\
+All nodes in the preferred datacenter (\"{preferred_dc}\") are disabled by the HostFilter! \
+Datacenter failover is enabled, so the request will be always sent to remote DCs. \
+This is most likely a misconfiguration!"
+                    );
+                } else {
+                    warn!(
+                        "\
+All nodes in the preferred datacenter (\"{preferred_dc}\") are disabled by the HostFilter! \
+Datacenter failover is disabled, so the query plans will be empty! \
+You won't be able to execute any requests! This is most likely a misconfiguration!"
+                    );
+                }
+            }
         }
     }
 

From c8f9a2ca3abf958be533f6b82db41e2b12501c5c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Wojciech=20Przytu=C5=82a?= <wojciech.przytula@scylladb.com>
Date: Sun, 8 Jun 2025 19:46:48 +0200
Subject: [PATCH 4/6] observability: Introduce RateLimiter & macros

RateLimiter is a new utility that allows rate-limiting arbitrary
actions. It provides one central method, `try_acquire`, which checks if
the action should be performed based on a specified interval. If the
action has not been performed:
1. ever, or
2. in the last `interval`,
it updates the last-performed timestamp and returns `true`, allowing the
action to proceed. Otherwise, it returns `false`, indicating the action
should be skipped due to rate limiting.

This utility is designed to be efficient and thread-safe, using atomic
operations to ensure that multiple threads can safely check and update
the last-performed timestamp without locking.

The `RateLimiter` is intended for use in scenarios where actions need to
be performed at a controlled rate, such as logging, warning messages,
or other operations that should not be executed too frequently to avoid
spamming logs or overwhelming resources.

For now, it's used to rate-limit warning messages emitted by
the driver, in the implementation of `warn_rate_limited!` macro.

NOTE to reviewers: this code has been generated with major help of
the Claude Sonnet 4 AI model. Beware of common pitfalls of AI-generated
code.
---
 scylla/src/observability/driver_tracing.rs |   6 +-
 scylla/src/observability/mod.rs            |   3 +
 scylla/src/observability/rate_limiting.rs  | 279 +++++++++++++++++++++
 3 files changed, 284 insertions(+), 4 deletions(-)
 create mode 100644 scylla/src/observability/rate_limiting.rs

diff --git a/scylla/src/observability/driver_tracing.rs b/scylla/src/observability/driver_tracing.rs
index 47b9c3a04d..9e0a549673 100644
--- a/scylla/src/observability/driver_tracing.rs
+++ b/scylla/src/observability/driver_tracing.rs
@@ -3,13 +3,11 @@ use crate::network::Connection;
 use crate::response::query_result::QueryResult;
 use crate::routing::{Shard, Token};
 use itertools::{Either, Itertools};
-use scylla_cql::frame::response::result::ColumnSpec;
-use scylla_cql::frame::response::result::RawMetadataAndRawRows;
+use scylla_cql::frame::response::result::{ColumnSpec, RawMetadataAndRawRows};
 use scylla_cql::value::deser_cql_value;
 use std::borrow::Borrow;
 use std::fmt::Display;
-use std::sync::atomic::AtomicUsize;
-use std::sync::atomic::Ordering;
+use std::sync::atomic::{AtomicUsize, Ordering};
 use std::sync::Arc;
 use tracing::trace_span;
 
diff --git a/scylla/src/observability/mod.rs b/scylla/src/observability/mod.rs
index f5b16a4c70..4edaba9e9d 100644
--- a/scylla/src/observability/mod.rs
+++ b/scylla/src/observability/mod.rs
@@ -9,4 +9,7 @@ pub(crate) mod driver_tracing;
 pub mod history;
 #[cfg(feature = "metrics")]
 pub mod metrics;
+mod rate_limiting;
 pub mod tracing;
+
+pub(crate) use rate_limiting::{rate_limited, warn_rate_limited, RateLimiter};
diff --git a/scylla/src/observability/rate_limiting.rs b/scylla/src/observability/rate_limiting.rs
new file mode 100644
index 0000000000..a9dc6c8720
--- /dev/null
+++ b/scylla/src/observability/rate_limiting.rs
@@ -0,0 +1,279 @@
+//! Utilities for rate limiting actions in a lock-free manner.
+
+use std::{
+    sync::{
+        atomic::{AtomicU64, Ordering},
+        OnceLock,
+    },
+    time::{Duration, Instant},
+};
+
+/// `RateLimiter` can be used to control the rate of permit acquisition.
+/// It uses atomic operations to ensure that permits are acquired at most once per specified interval.
+///
+/// # Example
+///
+/// Ignored because `RateLimiter` is not public API.
+/// ```ignore
+/// let rate_limiter = RateLimiter::new();
+/// // Try to acquire a permit with a 1 second interval.
+/// if rate_limiter.try_acquire(Duration::from_secs(1)) {
+///     println!("Permit acquired!");
+/// } else {
+///     println!("Rate limit exceeded, try again later.");
+/// }
+/// ```
+pub(crate) struct RateLimiter {
+    last_permit_nanos: AtomicU64,
+}
+
+impl RateLimiter {
+    /// Creates a new `RateLimiter` instance, which is used to control the rate of permit acquisition.
+    pub(crate) const fn new() -> Self {
+        Self {
+            last_permit_nanos: AtomicU64::new(0),
+        }
+    }
+
+    /// Attempts to acquire a permit, which is subject of rate limiting,
+    /// given the specified permit interval.
+    ///
+    /// If enough time has passed since the last permit was acquired,
+    /// updates the last permit time and returns `true`. Otherwise,
+    /// the rate is limited and `false` is returned.
+    ///
+    /// The first call to `try_acquire` will always succeed, allowing the first permit to be acquired immediately.
+    pub(crate) fn try_acquire(&self, interval: Duration) -> bool {
+        // Single global reference point for all rate limiters.
+        // Used as a necessary absolute reference point for comparing [std::time::Instant].
+        static GLOBAL_EPOCH: OnceLock<Instant> = OnceLock::new();
+
+        let now = Instant::now();
+        let epoch = *GLOBAL_EPOCH.get_or_init(|| now);
+
+        let now_nanos = now.duration_since(epoch).as_nanos() as u64;
+        let interval_nanos = interval.as_nanos() as u64;
+
+        let last_permit = self.last_permit_nanos.load(Ordering::Relaxed);
+
+        // Special case: if `last_permit` is 0, this is the first call, so allow it.
+        if last_permit == 0 {
+            // Try to update from 0 to current time.
+            return self
+                .last_permit_nanos
+                .compare_exchange(
+                    0,
+                    now_nanos.max(1), // Ensure we never store 0 again.
+                    Ordering::Relaxed,
+                    Ordering::Relaxed,
+                )
+                .is_ok();
+        }
+
+        // Normal case: check if enough time has passed.
+        if now_nanos.saturating_sub(last_permit) >= interval_nanos {
+            self.last_permit_nanos
+                .compare_exchange(last_permit, now_nanos, Ordering::Relaxed, Ordering::Relaxed)
+                .is_ok()
+        } else {
+            false
+        }
+    }
+}
+
+/// A macro to perform an action if the rate limit allows it.
+///
+/// This macro ensures that the given action is performed at most once per
+/// specified interval, using efficient atomic operations for synchronization.
+/// Each unique call site has its own independent rate limiting state.
+///
+/// # Arguments
+///
+/// * `$interval` - A `Duration` specifying the minimum time between actions.
+/// * `$action` - A closure (of type `FnOnce()`) specifying an action executed
+///   if the rate limit allows it.
+///
+/// # Example
+///
+/// Ignored because `rate_limited!` is not public API.
+/// ```ignore
+/// use std::time::Duration;
+///
+/// // This will only print a message once per second, no matter how many times it's called.
+/// rate_limited!(Duration::from_secs(1), || println!("This is a rate-limited print"));
+/// ```
+///
+/// # Implementation Details
+///
+/// - Uses `std::time::Instant` for monotonic time measurements immune to clock adjustments
+/// - Uses `AtomicU64` for lock-free synchronization between threads
+/// - Each macro call site gets its own static rate limiting state for independence
+/// - Uses relaxed memory ordering for optimal performance
+/// - Minimal macro expansion - just static definition + function call
+///
+/// # Thread Safety
+///
+/// This macro is fully thread-safe. Multiple threads can call the same rate-limited action
+/// concurrently, and at most one thread will perform the action per interval.
+macro_rules! rate_limited {
+    ($interval:expr, $action:expr) => {{
+        use $crate::observability::RateLimiter;
+
+        // Each call site gets its own static rate limiting state.
+        static RATE_LIMIT_STATE: RateLimiter = RateLimiter::new();
+
+        // Check if we should warn and emit if so
+        if RATE_LIMIT_STATE.try_acquire($interval) {
+            $action();
+        }
+    }};
+}
+
+/// A rate-limited version of the `warn!()` macro that prevents spamming logs.
+///
+/// This macro ensures that warning messages are only emitted at most once per
+/// specified interval, using efficient atomic operations for synchronization.
+/// Each unique call site has its own independent rate limiting state.
+///
+/// # Arguments
+///
+/// * `$interval` - A `Duration` specifying the minimum time between warnings
+/// * `$args` - Arguments passed to the `warn!` macro (format string and values)
+///
+/// # Examples
+///
+/// Ignored because `warn_rate_limited!` is not public API.
+/// ```ignore
+/// use std::time::Duration;
+///
+/// // This will only warn once per second, no matter how many times it's called
+/// warn_rate_limited!(Duration::from_secs(1), "This is a rate-limited warning");
+///
+/// // With format arguments
+/// warn_rate_limited!(
+///     Duration::from_secs(5),
+///     "Connection failed to {}: {}",
+///     address,
+///     error
+/// );
+/// ```
+///
+/// # Implementation Details
+///
+/// - Uses `std::time::Instant` for monotonic time measurements immune to clock adjustments
+/// - Uses `AtomicU64` for lock-free synchronization between threads
+/// - Each macro call site gets its own static rate limiting state for independence
+/// - Uses relaxed memory ordering for optimal performance
+/// - Minimal macro expansion - just static definition + function call
+///
+/// # Thread Safety
+///
+/// This macro is fully thread-safe. Multiple threads can call the same rate-limited warning
+/// concurrently, and at most one thread will emit the warning per interval.
+macro_rules! warn_rate_limited {
+    ($interval:expr $(, $args:expr)* $(,)?) => {
+        $crate::observability::rate_limited!($interval, || {
+            // Use the tracing crate to log the warning.
+            tracing::warn!($($args),*);
+        });
+    };
+}
+
+pub(crate) use rate_limited;
+pub(crate) use warn_rate_limited;
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::sync::atomic::{AtomicUsize, Ordering};
+    use std::sync::Arc;
+    use std::thread;
+    use std::time::Duration;
+
+    #[test]
+    fn test_rate_limit_state_basic() {
+        let state = RateLimiter::new();
+
+        // First call should return true
+        assert!(state.try_acquire(Duration::from_millis(100)));
+
+        // Immediate second call should return false
+        assert!(!state.try_acquire(Duration::from_millis(100)));
+
+        // After waiting, should return true again
+        std::thread::sleep(Duration::from_millis(150));
+        assert!(state.try_acquire(Duration::from_millis(100)));
+    }
+
+    #[test]
+    fn test_rate_limit_state_concurrent() {
+        let state = Arc::new(RateLimiter::new());
+        let success_count = Arc::new(AtomicUsize::new(0));
+
+        let threads: Vec<_> = (0..10)
+            .map(|_| {
+                let state_clone = state.clone();
+                let success_count_clone = success_count.clone();
+                thread::spawn(move || {
+                    for _ in 0..100 {
+                        if state_clone.try_acquire(Duration::from_millis(10)) {
+                            success_count_clone.fetch_add(1, Ordering::Relaxed);
+                        }
+                    }
+                })
+            })
+            .collect();
+
+        for t in threads {
+            t.join().unwrap();
+        }
+
+        // Should have significantly fewer successes than total attempts due to rate limiting
+        let successes = success_count.load(Ordering::Relaxed);
+        assert!(
+            successes > 0,
+            "Should have at least some successes (at least the first attempt)"
+        );
+        assert!(
+            successes < 1000,
+            "Should be rate limited, got {} successes",
+            successes
+        );
+    }
+
+    #[test]
+    fn test_different_states_independent() {
+        let state1 = RateLimiter::new();
+        let state2 = RateLimiter::new();
+
+        // Both should be able to acquire initially.
+        assert!(state1.try_acquire(Duration::from_millis(100)));
+        assert!(state2.try_acquire(Duration::from_millis(100)));
+
+        // Both should be rate limited independently.
+        assert!(!state1.try_acquire(Duration::from_millis(100)));
+        assert!(!state2.try_acquire(Duration::from_millis(100)));
+    }
+
+    #[test]
+    fn test_zero_interval() {
+        let state = RateLimiter::new();
+
+        // With zero interval, every call should succeed
+        assert!(state.try_acquire(Duration::ZERO));
+        assert!(state.try_acquire(Duration::ZERO));
+        assert!(state.try_acquire(Duration::ZERO));
+    }
+
+    #[test]
+    fn test_very_long_interval() {
+        let state = RateLimiter::new();
+
+        // First call succeeds
+        assert!(state.try_acquire(Duration::from_secs(3600))); // 1 hour
+
+        // Subsequent calls should fail
+        assert!(!state.try_acquire(Duration::from_secs(3600)));
+        assert!(!state.try_acquire(Duration::from_secs(3600)));
+    }
+}

From a3923e406448c7ef41b696c27ca52a628b1d2445 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Wojciech=20Przytu=C5=82a?= <wojciech.przytula@scylladb.com>
Date: Sun, 8 Jun 2025 20:01:51 +0200
Subject: [PATCH 5/6] default LBP: rate limit sanity checks warnings

In case of misconfiguration, the default load balancing policy can throw
a lot of warnings, which flood the logs and overwhelm the machine.
To prevent this, we rate limit the warnings there to one per second.

NOTE: each warning is rate limited separately, so if there are multiple
misconfigurations, each will still be logged.
---
 scylla/src/policies/load_balancing/default.rs | 25 ++++++++++++-------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/scylla/src/policies/load_balancing/default.rs b/scylla/src/policies/load_balancing/default.rs
index 6e037887ea..72626c9bfd 100644
--- a/scylla/src/policies/load_balancing/default.rs
+++ b/scylla/src/policies/load_balancing/default.rs
@@ -3,6 +3,7 @@ pub use self::latency_awareness::LatencyAwarenessBuilder;
 
 use super::{FallbackPlan, LoadBalancingPolicy, NodeRef, RoutingInfo};
 use crate::cluster::ClusterState;
+use crate::observability::warn_rate_limited;
 use crate::{
     cluster::metadata::Strategy,
     cluster::node::Node,
@@ -16,7 +17,7 @@ use rand_pcg::Pcg32;
 use scylla_cql::frame::response::result::TableSpec;
 use std::hash::{Hash, Hasher};
 use std::{fmt, sync::Arc, time::Duration};
-use tracing::{debug, warn};
+use tracing::debug;
 use uuid::Uuid;
 
 #[derive(Clone, Copy)]
@@ -590,6 +591,8 @@ impl DefaultPolicy {
 
     /// Checks for misconfiguration and warns if any is discovered.
     fn pick_sanity_checks(&self, routing_info: &ProcessedRoutingInfo, cluster: &ClusterState) {
+        const SANITY_CHECKS_WARNING_INTERVAL: std::time::Duration =
+            std::time::Duration::from_secs(1);
         if let Some(preferred_dc) = self.preferences.datacenter() {
             // Preferred DC + no datacenter failover + SimpleStrategy is an anti-pattern.
             if let Some(ref token_with_strategy) = routing_info.token_with_strategy {
@@ -599,7 +602,7 @@ impl DefaultPolicy {
                         Strategy::SimpleStrategy { .. }
                     )
                 {
-                    warn!("\
+                    warn_rate_limited!(SANITY_CHECKS_WARNING_INTERVAL, "\
 Combining SimpleStrategy with preferred_datacenter set to Some and disabled datacenter failover may lead to empty query plans for some tokens.\
 It is better to give up using one of them: either operate in a keyspace with NetworkTopologyStrategy, which explicitly states\
 how many replicas there are in each datacenter (you probably want at least 1 to avoid empty plans while preferring that datacenter), \
@@ -617,14 +620,16 @@ or refrain from preferring datacenters (which may ban all other datacenters, if
                 .is_empty()
             {
                 if self.permit_dc_failover {
-                    warn!(
+                    warn_rate_limited!(
+                        SANITY_CHECKS_WARNING_INTERVAL,
                         "\
 The preferred datacenter (\"{preferred_dc}\") is not present in the cluster! \
 Datacenter failover is enabled, so the request will be always sent to remote DCs. \
 This is most likely not what you want!"
                     );
                 } else {
-                    warn!(
+                    warn_rate_limited!(
+                        SANITY_CHECKS_WARNING_INTERVAL,
                         "\
 The preferred datacenter (\"{preferred_dc}\") is not present in the cluster! \
 Datacenter failover is disabled, so the query plans will be empty! \
@@ -642,14 +647,16 @@ You won't be able to execute any requests!"
                 .any(|node| node.is_enabled())
             {
                 if self.permit_dc_failover {
-                    warn!(
+                    warn_rate_limited!(
+                        SANITY_CHECKS_WARNING_INTERVAL,
                         "\
-All nodes in the preferred datacenter (\"{preferred_dc}\") are disabled by the HostFilter! \
-Datacenter failover is enabled, so the request will be always sent to remote DCs. \
-This is most likely a misconfiguration!"
+                    All nodes in the preferred datacenter (\"{preferred_dc}\") are disabled by the HostFilter! \
+                    Datacenter failover is enabled, so the request will be always sent to remote DCs. \
+                    This is most likely a misconfiguration!"
                     );
                 } else {
-                    warn!(
+                    warn_rate_limited!(
+                        SANITY_CHECKS_WARNING_INTERVAL,
                         "\
 All nodes in the preferred datacenter (\"{preferred_dc}\") are disabled by the HostFilter! \
 Datacenter failover is disabled, so the query plans will be empty! \

From 1458c5ab3cf2c33da39214c55b4c16ebdd15ff82 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Wojciech=20Przytu=C5=82a?= <wojciech.przytula@scylladb.com>
Date: Sun, 8 Jun 2025 20:06:47 +0200
Subject: [PATCH 6/6] LBPs: rate limit various warnings

There were some more warnings that were not rate-limited, which could
flood the logs in case of misconfiguration. This commit adds rate
limiting there, too.
---
 scylla/src/policies/load_balancing/default.rs       | 7 ++++---
 scylla/src/policies/load_balancing/single_target.rs | 4 +++-
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/scylla/src/policies/load_balancing/default.rs b/scylla/src/policies/load_balancing/default.rs
index 72626c9bfd..bb2cea0fc8 100644
--- a/scylla/src/policies/load_balancing/default.rs
+++ b/scylla/src/policies/load_balancing/default.rs
@@ -20,6 +20,8 @@ use std::{fmt, sync::Arc, time::Duration};
 use tracing::debug;
 use uuid::Uuid;
 
+const SANITY_CHECKS_WARNING_INTERVAL: std::time::Duration = std::time::Duration::from_secs(1);
+
 #[derive(Clone, Copy)]
 enum NodeLocationCriteria<'a> {
     Any,
@@ -591,8 +593,6 @@ impl DefaultPolicy {
 
     /// Checks for misconfiguration and warns if any is discovered.
     fn pick_sanity_checks(&self, routing_info: &ProcessedRoutingInfo, cluster: &ClusterState) {
-        const SANITY_CHECKS_WARNING_INTERVAL: std::time::Duration =
-            std::time::Duration::from_secs(1);
         if let Some(preferred_dc) = self.preferences.datacenter() {
             // Preferred DC + no datacenter failover + SimpleStrategy is an anti-pattern.
             if let Some(ref token_with_strategy) = routing_info.token_with_strategy {
@@ -677,7 +677,8 @@ You won't be able to execute any requests! This is most likely a misconfiguratio
             {
                 nodes
             } else {
-                tracing::warn!(
+                warn_rate_limited!(
+                    SANITY_CHECKS_WARNING_INTERVAL,
                     "Datacenter specified as the preferred one ({}) does not exist!",
                     preferred_datacenter
                 );
diff --git a/scylla/src/policies/load_balancing/single_target.rs b/scylla/src/policies/load_balancing/single_target.rs
index f9e770722c..3470c437cd 100644
--- a/scylla/src/policies/load_balancing/single_target.rs
+++ b/scylla/src/policies/load_balancing/single_target.rs
@@ -4,6 +4,7 @@ use std::sync::Arc;
 use uuid::Uuid;
 
 use crate::cluster::{ClusterState, Node, NodeRef};
+use crate::observability::warn_rate_limited;
 use crate::routing::Shard;
 
 use super::{LoadBalancingPolicy, RoutingInfo};
@@ -69,7 +70,8 @@ impl LoadBalancingPolicy for SingleTargetLoadBalancingPolicy {
         match node {
             Some(node) => Some((node, self.shard)),
             None => {
-                tracing::warn!(
+                warn_rate_limited!(
+                    std::time::Duration::from_secs(1),
                     "SingleTargetLoadBalancingPolicy failed to find requested node {:?} in cluster metadata.",
                     self.node_identifier
                 );