From 7db11683b25fbdac3f31c2c2123a12dd0845588d Mon Sep 17 00:00:00 2001 From: RJ Burnham Date: Wed, 16 Jul 2025 13:16:38 +0100 Subject: [PATCH] feat: add return_stale_on_timeout parameter for stale-while-revalidate caching MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds new `return_stale_on_timeout` parameter that enables returning stale cached values when `wait_for_calc_timeout` expires, instead of triggering a new calculation. This implements a stale-while-revalidate pattern that keeps applications responsive while ensuring background cache refresh. Key changes: - Add return_stale_on_timeout parameter to all cache backends - Modify core logic to return stale values on RecalculationNeeded exception - Fix memory core timeout handling to properly check wait_for_calc_timeout - Add comprehensive test suite with 7 test cases - Update documentation and maintain backward compatibility When enabled, the behavior follows this pattern: 1. Fresh values (≤ stale_after) return immediately 2. Stale values trigger background refresh 3. Caller waits up to wait_for_calc_timeout for refresh to complete 4. If timeout expires, return stale value instead of blocking 5. Background refresh continues for next request Closes: Implements stale-while-revalidate caching pattern --- demo_return_stale_on_timeout.py | 151 +++++++++++++++++++ src/cachier/config.py | 8 +- src/cachier/core.py | 22 ++- src/cachier/cores/base.py | 2 + src/cachier/cores/memory.py | 26 +++- src/cachier/cores/mongo.py | 3 +- src/cachier/cores/pickle.py | 3 +- src/cachier/cores/redis.py | 3 +- src/cachier/cores/sql.py | 3 +- tests/test_return_stale_on_timeout.py | 209 ++++++++++++++++++++++++++ 10 files changed, 416 insertions(+), 14 deletions(-) create mode 100644 demo_return_stale_on_timeout.py create mode 100644 tests/test_return_stale_on_timeout.py diff --git a/demo_return_stale_on_timeout.py b/demo_return_stale_on_timeout.py new file mode 100644 index 0000000..da8adea --- /dev/null +++ b/demo_return_stale_on_timeout.py @@ -0,0 +1,151 @@ +#!/usr/bin/env python3 +"""Demonstration of the new return_stale_on_timeout feature.""" + +import time +import threading +from datetime import timedelta + +import cachier + + +def demo_return_stale_on_timeout(): + """Demonstrate the return_stale_on_timeout feature.""" + + print("šŸŽÆ Cachier return_stale_on_timeout Feature Demo") + print("=" * 50) + + @cachier.cachier( + backend="memory", + stale_after=timedelta(seconds=2), # Fresh for 2 seconds + wait_for_calc_timeout=3, # Wait up to 3 seconds for calculation + return_stale_on_timeout=True, # Return stale value if timeout + next_time=False, # Don't return stale immediately + ) + def expensive_api_call(query): + """Simulate an expensive API call that takes 5 seconds.""" + print(f" šŸ”„ Making expensive API call for '{query}'...") + time.sleep(5) # Simulates network request + return f"Result for {query}: {len(query)} chars" + + expensive_api_call.clear_cache() + + # 1. First call - will cache the result + print("\n1ļøāƒ£ First call (cold cache):") + result1 = expensive_api_call("hello world") + print(f" āœ… Got: {result1}") + + # 2. Second call while fresh - returns cached result immediately + print("\n2ļøāƒ£ Second call (fresh cache):") + start_time = time.time() + result2 = expensive_api_call("hello world") + elapsed = time.time() - start_time + print(f" āœ… Got: {result2} (took {elapsed:.2f}s)") + + # 3. Wait for cache to become stale + print("\nā° Waiting for cache to become stale (2+ seconds)...") + time.sleep(2.5) + + # 4. Start a background calculation + print("\n3ļøāƒ£ Starting background calculation...") + def background_refresh(): + expensive_api_call("hello world") + + thread = threading.Thread(target=background_refresh) + thread.start() + time.sleep(0.5) # Let background thread start + + # 5. This call will wait up to 3 seconds, then return stale value + print("\n4ļøāƒ£ Main call (should return stale value after 3s timeout):") + start_time = time.time() + result3 = expensive_api_call("hello world") + elapsed = time.time() - start_time + print(f" āœ… Got: {result3} (took {elapsed:.2f}s)") + + if elapsed < 4: + print(" šŸŽ‰ SUCCESS! Returned stale value instead of waiting 5 seconds!") + else: + print(" āŒ Something went wrong - took too long") + + # Wait for background thread to complete + thread.join() + + print("\nšŸ“‹ Summary:") + print(" • Fresh values returned immediately") + print(" • Stale values trigger background refresh") + print(" • If refresh takes too long, return stale value") + print(" • This keeps your application responsive!") + + +def demo_comparison(): + """Compare with and without return_stale_on_timeout.""" + + print("\n\nšŸ”„ Comparison Demo") + print("=" * 50) + + # Without return_stale_on_timeout (default behavior) + @cachier.cachier( + backend="memory", + stale_after=timedelta(seconds=1), + wait_for_calc_timeout=2, + return_stale_on_timeout=False, # Default + ) + def slow_func_old(x): + time.sleep(3) + return x * 2 + + # With return_stale_on_timeout + @cachier.cachier( + backend="memory", + stale_after=timedelta(seconds=1), + wait_for_calc_timeout=2, + return_stale_on_timeout=True, # New feature + ) + def slow_func_new(x): + time.sleep(3) + return x * 2 + + slow_func_old.clear_cache() + slow_func_new.clear_cache() + + # Cache initial values + print("Caching initial values...") + slow_func_old(10) + slow_func_new(10) + + # Wait for stale + time.sleep(1.5) + + # Start background calculations + def bg_old(): + slow_func_old(10) + def bg_new(): + slow_func_new(10) + + threading.Thread(target=bg_old).start() + threading.Thread(target=bg_new).start() + time.sleep(0.5) + + print("\nTesting behavior when calculation times out:") + + # Test old behavior + print("šŸ“Š OLD behavior (return_stale_on_timeout=False):") + start = time.time() + result_old = slow_func_old(10) # Will wait, then start new calculation + elapsed_old = time.time() - start + print(f" Result: {result_old}, Time: {elapsed_old:.2f}s") + + time.sleep(0.5) # Brief pause + + # Test new behavior + print("šŸ†• NEW behavior (return_stale_on_timeout=True):") + start = time.time() + result_new = slow_func_new(10) # Will return stale value after timeout + elapsed_new = time.time() - start + print(f" Result: {result_new}, Time: {elapsed_new:.2f}s") + + print(f"\nšŸ† Time saved: {elapsed_old - elapsed_new:.2f} seconds!") + + +if __name__ == "__main__": + demo_return_stale_on_timeout() + demo_comparison() \ No newline at end of file diff --git a/src/cachier/config.py b/src/cachier/config.py index 53dfbe8..efb8bdd 100644 --- a/src/cachier/config.py +++ b/src/cachier/config.py @@ -62,6 +62,7 @@ class Params: pickle_reload: bool = True separate_files: bool = False wait_for_calc_timeout: int = 0 + return_stale_on_timeout: bool = False allow_none: bool = False cleanup_stale: bool = False cleanup_interval: timedelta = timedelta(days=1) @@ -118,9 +119,10 @@ def set_global_params(**params: Any) -> None: Parameters given directly to a decorator take precedence over any values set by this function. - Only 'stale_after', 'next_time', and 'wait_for_calc_timeout' can be changed - after the memoization decorator has been applied. Other parameters will - only have an effect on decorators applied after this function is run. + Only 'stale_after', 'next_time', 'wait_for_calc_timeout', and + 'return_stale_on_timeout' can be changed after the memoization decorator + has been applied. Other parameters will only have an effect on decorators + applied after this function is run. """ import cachier diff --git a/src/cachier/core.py b/src/cachier/core.py index 4db5e32..7a97694 100644 --- a/src/cachier/core.py +++ b/src/cachier/core.py @@ -120,6 +120,7 @@ def cachier( pickle_reload: Optional[bool] = None, separate_files: Optional[bool] = None, wait_for_calc_timeout: Optional[int] = None, + return_stale_on_timeout: Optional[bool] = None, allow_none: Optional[bool] = None, cleanup_stale: Optional[bool] = None, cleanup_interval: Optional[timedelta] = None, @@ -177,12 +178,16 @@ def cachier( Instead of a single cache file per-function, each function's cache is split between several files, one for each argument set. This can help if you per-function cache files become too large. - wait_for_calc_timeout: int, optional, for MongoDB only + wait_for_calc_timeout: int, optional The maximum time to wait for an ongoing calculation. When a process started to calculate the value setting being_calculated to True, any process trying to read the same entry will wait a maximum of seconds specified in this parameter. 0 means wait forever. Once the timeout expires the calculation will be triggered. + return_stale_on_timeout: bool, optional + If True, when wait_for_calc_timeout expires, return the existing stale + value instead of triggering a new calculation. Only applies when there + is a stale value available. Defaults to False. allow_none: bool, optional Allows storing None values in the cache. If False, functions returning None will not be cached and are recalculated every call. @@ -215,28 +220,32 @@ def cachier( cache_dir=cache_dir, separate_files=separate_files, wait_for_calc_timeout=wait_for_calc_timeout, + return_stale_on_timeout=return_stale_on_timeout, ) elif backend == "mongo": core = _MongoCore( hash_func=hash_func, mongetter=mongetter, wait_for_calc_timeout=wait_for_calc_timeout, + return_stale_on_timeout=return_stale_on_timeout, ) elif backend == "memory": core = _MemoryCore( - hash_func=hash_func, wait_for_calc_timeout=wait_for_calc_timeout + hash_func=hash_func, wait_for_calc_timeout=wait_for_calc_timeout, return_stale_on_timeout=return_stale_on_timeout ) elif backend == "sql": core = _SQLCore( hash_func=hash_func, sql_engine=sql_engine, wait_for_calc_timeout=wait_for_calc_timeout, + return_stale_on_timeout=return_stale_on_timeout, ) elif backend == "redis": core = _RedisCore( hash_func=hash_func, redis_client=redis_client, wait_for_calc_timeout=wait_for_calc_timeout, + return_stale_on_timeout=return_stale_on_timeout, ) else: raise ValueError("specified an invalid core: %s" % backend) @@ -291,6 +300,9 @@ def _call(*args, max_age: Optional[timedelta] = None, **kwds): stale_after, "stale_after", kwds ) _next_time = _update_with_defaults(next_time, "next_time", kwds) + _return_stale_on_timeout = _update_with_defaults( + return_stale_on_timeout, "return_stale_on_timeout", kwds + ) _cleanup_flag = _update_with_defaults( cleanup_stale, "cleanup_stale", kwds ) @@ -362,6 +374,9 @@ def _call(*args, max_age: Optional[timedelta] = None, **kwds): try: return core.wait_on_entry_calc(key) except RecalculationNeeded: + if _return_stale_on_timeout and entry and entry.value is not None: + _print("Timeout reached, returning stale value.") + return entry.value return _calc_entry(core, key, func, args, kwds) if _next_time: _print("Async calc and return stale") @@ -380,6 +395,9 @@ def _call(*args, max_age: Optional[timedelta] = None, **kwds): try: return core.wait_on_entry_calc(key) except RecalculationNeeded: + if _return_stale_on_timeout and entry and entry.value is not None: + _print("Timeout reached, returning stale value.") + return entry.value return _calc_entry(core, key, func, args, kwds) _print("No entry found. No current calc. Calling like a boss.") return _calc_entry(core, key, func, args, kwds) diff --git a/src/cachier/cores/base.py b/src/cachier/cores/base.py index edb8e7e..63d05ed 100644 --- a/src/cachier/cores/base.py +++ b/src/cachier/cores/base.py @@ -34,9 +34,11 @@ def __init__( self, hash_func: Optional[HashFunc], wait_for_calc_timeout: Optional[int], + return_stale_on_timeout: Optional[bool] = None, ): self.hash_func = _update_with_defaults(hash_func, "hash_func") self.wait_for_calc_timeout = wait_for_calc_timeout + self.return_stale_on_timeout = return_stale_on_timeout self.lock = threading.RLock() def set_func(self, func): diff --git a/src/cachier/cores/memory.py b/src/cachier/cores/memory.py index ddd0acd..3ac9b9e 100644 --- a/src/cachier/cores/memory.py +++ b/src/cachier/cores/memory.py @@ -1,6 +1,7 @@ """A memory-based caching core for cachier.""" import threading +import time from datetime import datetime, timedelta from typing import Any, Dict, Optional, Tuple @@ -16,8 +17,9 @@ def __init__( self, hash_func: Optional[HashFunc], wait_for_calc_timeout: Optional[int], + return_stale_on_timeout: Optional[bool] = None, ): - super().__init__(hash_func, wait_for_calc_timeout) + super().__init__(hash_func, wait_for_calc_timeout, return_stale_on_timeout) self.cache: Dict[str, CacheEntry] = {} def _hash_func_key(self, key: str) -> str: @@ -89,10 +91,24 @@ def wait_on_entry_calc(self, key: str) -> Any: return entry.value if entry._condition is None: raise RuntimeError("No condition set for entry") - entry._condition.acquire() - entry._condition.wait() - entry._condition.release() - return self.cache[hash_key].value + + # Wait with timeout checking similar to other cores + time_spent = 0 + while True: + entry._condition.acquire() + # Wait for 1 second at a time to allow timeout checking + signaled = entry._condition.wait(timeout=1.0) + entry._condition.release() + + # Check if the calculation completed + with self.lock: + if hash_key in self.cache and not self.cache[hash_key]._processing: + return self.cache[hash_key].value + + # If we weren't signaled and the entry is still processing, check timeout + if not signaled: + time_spent += 1 + self.check_calc_timeout(time_spent) def clear_cache(self) -> None: with self.lock: diff --git a/src/cachier/cores/mongo.py b/src/cachier/cores/mongo.py index fbc9371..11cf8a5 100644 --- a/src/cachier/cores/mongo.py +++ b/src/cachier/cores/mongo.py @@ -40,6 +40,7 @@ def __init__( hash_func: Optional[HashFunc], mongetter: Optional[Mongetter], wait_for_calc_timeout: Optional[int], + return_stale_on_timeout: Optional[bool] = None, ): if "pymongo" not in sys.modules: warnings.warn( @@ -49,7 +50,7 @@ def __init__( ) # pragma: no cover super().__init__( - hash_func=hash_func, wait_for_calc_timeout=wait_for_calc_timeout + hash_func=hash_func, wait_for_calc_timeout=wait_for_calc_timeout, return_stale_on_timeout=return_stale_on_timeout ) if mongetter is None: raise MissingMongetter( diff --git a/src/cachier/cores/pickle.py b/src/cachier/cores/pickle.py index 344fcba..be202e0 100644 --- a/src/cachier/cores/pickle.py +++ b/src/cachier/cores/pickle.py @@ -78,8 +78,9 @@ def __init__( cache_dir: Optional[Union[str, os.PathLike]], separate_files: Optional[bool], wait_for_calc_timeout: Optional[int], + return_stale_on_timeout: Optional[bool] = None, ): - super().__init__(hash_func, wait_for_calc_timeout) + super().__init__(hash_func, wait_for_calc_timeout, return_stale_on_timeout) self._cache_dict: Dict[str, CacheEntry] = {} self.reload = _update_with_defaults(pickle_reload, "pickle_reload") self.cache_dir = os.path.expanduser( diff --git a/src/cachier/cores/redis.py b/src/cachier/cores/redis.py index ccd0ffe..4170a0c 100644 --- a/src/cachier/cores/redis.py +++ b/src/cachier/cores/redis.py @@ -34,6 +34,7 @@ def __init__( Union["redis.Redis", Callable[[], "redis.Redis"]] ], wait_for_calc_timeout: Optional[int] = None, + return_stale_on_timeout: Optional[bool] = None, key_prefix: str = "cachier", ): if not REDIS_AVAILABLE: @@ -45,7 +46,7 @@ def __init__( ) super().__init__( - hash_func=hash_func, wait_for_calc_timeout=wait_for_calc_timeout + hash_func=hash_func, wait_for_calc_timeout=wait_for_calc_timeout, return_stale_on_timeout=return_stale_on_timeout ) if redis_client is None: raise MissingRedisClient( diff --git a/src/cachier/cores/sql.py b/src/cachier/cores/sql.py index 543531e..871ee73 100644 --- a/src/cachier/cores/sql.py +++ b/src/cachier/cores/sql.py @@ -63,6 +63,7 @@ def __init__( hash_func: Optional[HashFunc], sql_engine: Optional[Union[str, "Engine", Callable[[], "Engine"]]], wait_for_calc_timeout: Optional[int] = None, + return_stale_on_timeout: Optional[bool] = None, ): if not SQLALCHEMY_AVAILABLE: raise ImportError( @@ -70,7 +71,7 @@ def __init__( "Install with `pip install SQLAlchemy`." ) super().__init__( - hash_func=hash_func, wait_for_calc_timeout=wait_for_calc_timeout + hash_func=hash_func, wait_for_calc_timeout=wait_for_calc_timeout, return_stale_on_timeout=return_stale_on_timeout ) self._engine = self._resolve_engine(sql_engine) self._Session = sessionmaker(bind=self._engine) diff --git a/tests/test_return_stale_on_timeout.py b/tests/test_return_stale_on_timeout.py new file mode 100644 index 0000000..4c26876 --- /dev/null +++ b/tests/test_return_stale_on_timeout.py @@ -0,0 +1,209 @@ +"""Test return_stale_on_timeout functionality.""" + +import time +import threading +import queue +from datetime import timedelta + +import pytest + +import cachier + + +@pytest.mark.parametrize("backend", ["memory", "pickle"]) +def test_return_stale_on_timeout_true(backend): + """Test that stale values are returned when timeout expires and return_stale_on_timeout=True.""" + + @cachier.cachier( + backend=backend, + stale_after=timedelta(seconds=1), + wait_for_calc_timeout=2, + return_stale_on_timeout=True, + next_time=False, + ) + def slow_function(x): + time.sleep(3) # Longer than wait_for_calc_timeout + return x * 2 + + slow_function.clear_cache() + + # First call - will be cached + result1 = slow_function(5) + assert result1 == 10 + + # Wait for value to become stale + time.sleep(1.5) + + # Start a background thread that will trigger recalculation + def background_call(result_queue): + result = slow_function(5) + result_queue.put(result) + + result_queue = queue.Queue() + thread1 = threading.Thread(target=background_call, args=(result_queue,)) + thread1.start() + + # Give thread1 time to start the calculation + time.sleep(0.5) + + # This call should timeout waiting for the calculation and return stale value + start_time = time.time() + result2 = slow_function(5) + elapsed_time = time.time() - start_time + + # Should return quickly with stale value, not wait for full calculation + assert elapsed_time < 2.5 # Less than wait_for_calc_timeout + some buffer + assert result2 == 10 # Should return the stale value + + # Clean up + thread1.join(timeout=5) + if not result_queue.empty(): + result_queue.get() + + +@pytest.mark.parametrize("backend", ["memory", "pickle"]) +def test_return_stale_on_timeout_false(backend): + """Test that new calculation is triggered when timeout expires and return_stale_on_timeout=False.""" + + @cachier.cachier( + backend=backend, + stale_after=timedelta(seconds=1), + wait_for_calc_timeout=2, + return_stale_on_timeout=False, # Default behavior + next_time=False, + ) + def slow_function(x): + time.sleep(3) # Longer than wait_for_calc_timeout + return x * 3 # Different multiplier to distinguish results + + slow_function.clear_cache() + + # First call - will be cached + result1 = slow_function(5) + assert result1 == 15 + + # Wait for value to become stale + time.sleep(1.5) + + # Start a background thread that will trigger recalculation + def background_call(result_queue): + result = slow_function(5) + result_queue.put(result) + + result_queue = queue.Queue() + thread1 = threading.Thread(target=background_call, args=(result_queue,)) + thread1.start() + + # Give thread1 time to start the calculation + time.sleep(0.5) + + # This call should timeout waiting for the calculation and trigger a new calculation + start_time = time.time() + result2 = slow_function(5) + elapsed_time = time.time() - start_time + + # Should take about as long as the function execution time + assert elapsed_time >= 2.5 # At least close to the function execution time + assert result2 == 15 # Should return the newly calculated value + + # Clean up + thread1.join(timeout=8) + if not result_queue.empty(): + result_queue.get() + + +@pytest.mark.parametrize("backend", ["memory", "pickle"]) +def test_return_stale_on_timeout_no_stale_value(backend): + """Test that new calculation is triggered when no stale value exists, regardless of return_stale_on_timeout.""" + + @cachier.cachier( + backend=backend, + wait_for_calc_timeout=2, + return_stale_on_timeout=True, + next_time=False, + ) + def slow_function(x): + time.sleep(3) # Longer than wait_for_calc_timeout + return x * 4 + + slow_function.clear_cache() + + # Start two threads simultaneously - no cached value exists + def background_call(result_queue, thread_id): + result = slow_function(5) + result_queue.put((thread_id, result)) + + result_queue = queue.Queue() + thread1 = threading.Thread(target=background_call, args=(result_queue, 1)) + thread2 = threading.Thread(target=background_call, args=(result_queue, 2)) + + thread1.start() + time.sleep(0.1) # Small delay to ensure thread1 starts first + thread2.start() + + # Wait for both threads to complete + thread1.join(timeout=8) + thread2.join(timeout=8) + + # Should get results from both threads + assert result_queue.qsize() == 2 + results = [] + while not result_queue.empty(): + thread_id, result = result_queue.get() + results.append(result) + + # Both should have calculated the value (one calculated, one waited or recalculated) + assert all(result == 20 for result in results) + + +def test_return_stale_on_timeout_global_config(): + """Test that return_stale_on_timeout can be set globally.""" + + # Set global configuration + cachier.set_global_params( + wait_for_calc_timeout=2, + return_stale_on_timeout=True + ) + + @cachier.cachier( + backend="memory", + stale_after=timedelta(seconds=1), + next_time=False, + ) + def slow_function(x): + time.sleep(3) + return x * 5 + + slow_function.clear_cache() + + # First call - will be cached + result1 = slow_function(3) + assert result1 == 15 + + # Wait for value to become stale + time.sleep(1.5) + + # Start background calculation + def background_call(): + slow_function(3) + + thread1 = threading.Thread(target=background_call) + thread1.start() + time.sleep(0.5) # Let background calculation start + + # This should return stale value due to global configuration + start_time = time.time() + result2 = slow_function(3) + elapsed_time = time.time() - start_time + + assert elapsed_time < 2.5 # Should return quickly + assert result2 == 15 # Should return stale value + + # Clean up + thread1.join(timeout=5) + + # Reset global configuration + cachier.set_global_params( + wait_for_calc_timeout=0, + return_stale_on_timeout=False + ) \ No newline at end of file