From eba2bbd07416be302d3f6c7a9d6800c963aac7be Mon Sep 17 00:00:00 2001 From: Phil Haack Date: Tue, 16 Dec 2025 11:28:37 -0800 Subject: [PATCH 1/4] Add urllib3-based retry for feature flag requests Use urllib3's built-in Retry mechanism for feature flag POST requests instead of application-level retry logic. This is simpler and leverages well-tested library code. Key changes: - Add `RETRY_STATUS_FORCELIST` = [408, 500, 502, 503, 504] - Add `_build_flags_session()` with POST retries and `status_forcelist` - Update `flags()` to use dedicated flags session - Add tests for retry configuration and session usage The flags session retries on: - Network failures (connect/read errors) - Transient server errors (408, 500, 502, 503, 504) It does NOT retry on: - 429 (rate limit) - need to wait, not hammer - 402 (quota limit) - won't resolve with retries --- posthog/request.py | 61 ++++++++++++++++++++++++++++++---- posthog/test/test_request.py | 64 ++++++++++++++++++++++++++++++++++++ 2 files changed, 119 insertions(+), 6 deletions(-) diff --git a/posthog/request.py b/posthog/request.py index 138d621d..08211fe5 100644 --- a/posthog/request.py +++ b/posthog/request.py @@ -8,7 +8,6 @@ from io import BytesIO from typing import Any, List, Optional, Tuple, Union - import requests from dateutil.tz import tzutc from requests.adapters import HTTPAdapter # type: ignore[import-untyped] @@ -42,6 +41,9 @@ if hasattr(socket, attr): KEEP_ALIVE_SOCKET_OPTIONS.append((socket.SOL_TCP, getattr(socket, attr), value)) +# Status codes that indicate transient server errors worth retrying +RETRY_STATUS_FORCELIST = [408, 500, 502, 503, 504] + def _mask_tokens_in_url(url: str) -> str: """Mask token values in URLs for safe logging, keeping first 10 chars visible.""" @@ -71,20 +73,49 @@ def init_poolmanager(self, *args, **kwargs): def _build_session(socket_options: Optional[SocketOptions] = None) -> requests.Session: + """Build a session for general requests (batch, decide, etc.).""" + adapter = HTTPAdapterWithSocketOptions( + max_retries=Retry( + total=2, + connect=2, + read=2, + ), + socket_options=socket_options, + ) + session = requests.Session() + session.mount("https://", adapter) + return session + + +def _build_flags_session( + socket_options: Optional[SocketOptions] = None, +) -> requests.Session: + """ + Build a session for feature flag requests with POST retries. + + Feature flag requests are idempotent (read-only), so retrying POST + requests is safe. This session retries on transient server errors + (408, 5xx) and network failures with exponential backoff + (0.5s, 1s delays between retries). + """ adapter = HTTPAdapterWithSocketOptions( max_retries=Retry( total=2, connect=2, read=2, + backoff_factor=0.5, + status_forcelist=RETRY_STATUS_FORCELIST, + allowed_methods=["POST"], ), socket_options=socket_options, ) - session = requests.sessions.Session() + session = requests.Session() session.mount("https://", adapter) return session _session = _build_session() +_flags_session = _build_flags_session() _socket_options: Optional[SocketOptions] = None _pooling_enabled = True @@ -95,6 +126,12 @@ def _get_session() -> requests.Session: return _build_session(_socket_options) +def _get_flags_session() -> requests.Session: + if _pooling_enabled: + return _flags_session + return _build_flags_session(_socket_options) + + def set_socket_options(socket_options: Optional[SocketOptions]) -> None: """ Configure socket options for all HTTP connections. @@ -103,11 +140,12 @@ def set_socket_options(socket_options: Optional[SocketOptions]) -> None: from posthog import set_socket_options set_socket_options([(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1)]) """ - global _session, _socket_options + global _session, _flags_session, _socket_options if socket_options == _socket_options: return _socket_options = socket_options _session = _build_session(socket_options) + _flags_session = _build_flags_session(socket_options) def enable_keep_alive() -> None: @@ -145,6 +183,7 @@ def post( path=None, gzip: bool = False, timeout: int = 15, + session: Optional[requests.Session] = None, **kwargs, ) -> requests.Response: """Post the `kwargs` to the API""" @@ -165,7 +204,9 @@ def post( gz.write(data.encode("utf-8")) data = buf.getvalue() - res = _get_session().post(url, data=data, headers=headers, timeout=timeout) + res = (session or _get_session()).post( + url, data=data, headers=headers, timeout=timeout + ) if res.status_code == 200: log.debug("data uploaded successfully") @@ -221,8 +262,16 @@ def flags( timeout: int = 15, **kwargs, ) -> Any: - """Post the `kwargs to the flags API endpoint""" - res = post(api_key, host, "/flags/?v=2", gzip, timeout, **kwargs) + """Post the kwargs to the flags API endpoint with automatic retries.""" + res = post( + api_key, + host, + "/flags/?v=2", + gzip, + timeout, + session=_get_flags_session(), + **kwargs, + ) return _process_response( res, success_message="Feature flags evaluated successfully" ) diff --git a/posthog/test/test_request.py b/posthog/test/test_request.py index 128123fe..e1798096 100644 --- a/posthog/test/test_request.py +++ b/posthog/test/test_request.py @@ -19,6 +19,7 @@ determine_server_host, disable_connection_reuse, enable_keep_alive, + flags, get, set_socket_options, ) @@ -393,3 +394,66 @@ def test_set_socket_options_is_idempotent(): assert session1 is session2 finally: set_socket_options(None) + + +class TestFlagsSession(unittest.TestCase): + """Tests for flags session configuration.""" + + def test_retry_status_forcelist_excludes_rate_limits(self): + """Verify 429 (rate limit) is NOT retried - need to wait, not hammer.""" + from posthog.request import RETRY_STATUS_FORCELIST + + self.assertNotIn(429, RETRY_STATUS_FORCELIST) + + def test_retry_status_forcelist_excludes_quota_errors(self): + """Verify 402 (payment required/quota) is NOT retried - won't resolve.""" + from posthog.request import RETRY_STATUS_FORCELIST + + self.assertNotIn(402, RETRY_STATUS_FORCELIST) + + @mock.patch("posthog.request._get_flags_session") + def test_flags_uses_flags_session(self, mock_get_flags_session): + """flags() uses the dedicated flags session, not the general session.""" + mock_response = requests.Response() + mock_response.status_code = 200 + mock_response._content = json.dumps( + { + "featureFlags": {"test-flag": True}, + "featureFlagPayloads": {}, + "errorsWhileComputingFlags": False, + } + ).encode("utf-8") + + mock_session = mock.MagicMock() + mock_session.post.return_value = mock_response + mock_get_flags_session.return_value = mock_session + + result = flags("test-key", "https://test.posthog.com", distinct_id="user123") + + self.assertEqual(result["featureFlags"]["test-flag"], True) + mock_get_flags_session.assert_called_once() + mock_session.post.assert_called_once() + + @mock.patch("posthog.request._get_flags_session") + def test_flags_no_retry_on_quota_limit(self, mock_get_flags_session): + """flags() raises QuotaLimitError without retrying (at application level).""" + mock_response = requests.Response() + mock_response.status_code = 200 + mock_response._content = json.dumps( + { + "quotaLimited": ["feature_flags"], + "featureFlags": {}, + "featureFlagPayloads": {}, + "errorsWhileComputingFlags": False, + } + ).encode("utf-8") + + mock_session = mock.MagicMock() + mock_session.post.return_value = mock_response + mock_get_flags_session.return_value = mock_session + + with self.assertRaises(QuotaLimitError): + flags("test-key", "https://test.posthog.com", distinct_id="user123") + + # QuotaLimitError is raised after response is received, not retried + self.assertEqual(mock_session.post.call_count, 1) From 6e3ca1898c7492356bc0011cc2b5367d18f717eb Mon Sep 17 00:00:00 2001 From: Phil Haack Date: Tue, 16 Dec 2025 13:59:58 -0800 Subject: [PATCH 2/4] Make examples run without requiring personal api key --- example.py | 128 ++++++++++++++++++++++++++++------------------------- 1 file changed, 67 insertions(+), 61 deletions(-) diff --git a/example.py b/example.py index 31eb19f7..fbde49fd 100644 --- a/example.py +++ b/example.py @@ -35,54 +35,40 @@ def load_env_file(): personal_api_key = os.getenv("POSTHOG_PERSONAL_API_KEY", "") host = os.getenv("POSTHOG_HOST", "http://localhost:8000") -# Check if credentials are provided -if not project_key or not personal_api_key: - print("āŒ Missing PostHog credentials!") - print( - " Please set POSTHOG_PROJECT_API_KEY and POSTHOG_PERSONAL_API_KEY environment variables" - ) +# Check if project key is provided (required) +if not project_key: + print("āŒ Missing PostHog project API key!") + print(" Please set POSTHOG_PROJECT_API_KEY environment variable") print(" or copy .env.example to .env and fill in your values") exit(1) -# Test authentication before proceeding -print("šŸ”‘ Testing PostHog authentication...") +# Configure PostHog with credentials +posthog.debug = False +posthog.api_key = project_key +posthog.project_api_key = project_key +posthog.host = host +posthog.poll_interval = 10 -try: - # Configure PostHog with credentials - posthog.debug = False # Keep quiet during auth test - posthog.api_key = project_key - posthog.project_api_key = project_key +# Check if personal API key is available for local evaluation +local_eval_available = bool(personal_api_key) +if personal_api_key: posthog.personal_api_key = personal_api_key - posthog.host = host - posthog.poll_interval = 10 - - # Test by attempting to get feature flags (this validates both keys) - # This will fail if credentials are invalid - test_flags = posthog.get_all_flags("test_user", only_evaluate_locally=True) - - # If we get here without exception, credentials work - print("āœ… Authentication successful!") - print(f" Project API Key: {project_key[:9]}...") - print(" Personal API Key: [REDACTED]") - print(f" Host: {host}\n\n") - -except Exception as e: - print("āŒ Authentication failed!") - print(f" Error: {e}") - print("\n Please check your credentials:") - print(" - POSTHOG_PROJECT_API_KEY: Project API key from PostHog settings") - print( - " - POSTHOG_PERSONAL_API_KEY: Personal API key (required for local evaluation)" - ) - print(" - POSTHOG_HOST: Your PostHog instance URL") - exit(1) + +print("šŸ”‘ PostHog Configuration:") +print(f" Project API Key: {project_key[:9]}...") +if local_eval_available: + print(" Personal API Key: [SET]") +else: + print(" Personal API Key: [NOT SET] - Local evaluation examples will be skipped") +print(f" Host: {host}\n") # Display menu and get user choice print("šŸš€ PostHog Python SDK Demo - Choose an example to run:\n") print("1. Identify and capture examples") -print("2. Feature flag local evaluation examples") +local_eval_note = "" if local_eval_available else " [requires personal API key]" +print(f"2. Feature flag local evaluation examples{local_eval_note}") print("3. Feature flag payload examples") -print("4. Flag dependencies examples") +print(f"4. Flag dependencies examples{local_eval_note}") print("5. Context management and tagging examples") print("6. Run all examples") print("7. Exit") @@ -148,6 +134,14 @@ def load_env_file(): ) elif choice == "2": + if not local_eval_available: + print("\nāŒ This example requires a personal API key for local evaluation.") + print( + " Set POSTHOG_PERSONAL_API_KEY environment variable to run this example." + ) + posthog.shutdown() + exit(1) + print("\n" + "=" * 60) print("FEATURE FLAG LOCAL EVALUATION EXAMPLES") print("=" * 60) @@ -215,6 +209,14 @@ def load_env_file(): print(f"Value (variant or enabled): {result.get_value()}") elif choice == "4": + if not local_eval_available: + print("\nāŒ This example requires a personal API key for local evaluation.") + print( + " Set POSTHOG_PERSONAL_API_KEY environment variable to run this example." + ) + posthog.shutdown() + exit(1) + print("\n" + "=" * 60) print("FLAG DEPENDENCIES EXAMPLES") print("=" * 60) @@ -429,6 +431,8 @@ def process_payment(payment_id): elif choice == "6": print("\nšŸ”„ Running all examples...") + if not local_eval_available: + print(" (Skipping local evaluation examples - no personal API key set)\n") # Run example 1 print(f"\n{'šŸ”ø' * 20} IDENTIFY AND CAPTURE {'šŸ”ø' * 20}") @@ -447,35 +451,37 @@ def process_payment(payment_id): distinct_id="new_distinct_id", properties={"email": "something@something.com"} ) - # Run example 2 - print(f"\n{'šŸ”ø' * 20} FEATURE FLAGS {'šŸ”ø' * 20}") - print("šŸ Testing basic feature flags...") - print(f"beta-feature: {posthog.feature_enabled('beta-feature', 'distinct_id')}") - print( - f"Sydney user: {posthog.feature_enabled('test-flag', 'random_id_12345', person_properties={'$geoip_city_name': 'Sydney'})}" - ) + # Run example 2 (requires local evaluation) + if local_eval_available: + print(f"\n{'šŸ”ø' * 20} FEATURE FLAGS {'šŸ”ø' * 20}") + print("šŸ Testing basic feature flags...") + print(f"beta-feature: {posthog.feature_enabled('beta-feature', 'distinct_id')}") + print( + f"Sydney user: {posthog.feature_enabled('test-flag', 'random_id_12345', person_properties={'$geoip_city_name': 'Sydney'})}" + ) # Run example 3 print(f"\n{'šŸ”ø' * 20} PAYLOADS {'šŸ”ø' * 20}") print("šŸ“¦ Testing payloads...") print(f"Payload: {posthog.get_feature_flag_payload('beta-feature', 'distinct_id')}") - # Run example 4 - print(f"\n{'šŸ”ø' * 20} FLAG DEPENDENCIES {'šŸ”ø' * 20}") - print("šŸ”— Testing flag dependencies...") - result1 = posthog.feature_enabled( - "test-flag-dependency", - "demo_user", - person_properties={"email": "user@example.com"}, - only_evaluate_locally=True, - ) - result2 = posthog.feature_enabled( - "test-flag-dependency", - "demo_user2", - person_properties={"email": "user@other.com"}, - only_evaluate_locally=True, - ) - print(f"āœ… @example.com user: {result1}, regular user: {result2}") + # Run example 4 (requires local evaluation) + if local_eval_available: + print(f"\n{'šŸ”ø' * 20} FLAG DEPENDENCIES {'šŸ”ø' * 20}") + print("šŸ”— Testing flag dependencies...") + result1 = posthog.feature_enabled( + "test-flag-dependency", + "demo_user", + person_properties={"email": "user@example.com"}, + only_evaluate_locally=True, + ) + result2 = posthog.feature_enabled( + "test-flag-dependency", + "demo_user2", + person_properties={"email": "user@other.com"}, + only_evaluate_locally=True, + ) + print(f"āœ… @example.com user: {result1}, regular user: {result2}") # Run example 5 print(f"\n{'šŸ”ø' * 20} CONTEXT MANAGEMENT {'šŸ”ø' * 20}") From 64adeece6ab3deb3cdda0145897f3ed6ed3651a3 Mon Sep 17 00:00:00 2001 From: Phil Haack Date: Tue, 16 Dec 2025 15:06:42 -0800 Subject: [PATCH 3/4] Add integration tests for network retry behavior Add tests that verify actual retry behavior, not just configuration: - test_retries_on_503_then_succeeds: Spins up a local HTTP server that returns 503 twice then 200, verifying 3 requests are made - test_connection_errors_are_retried: Verifies connection errors trigger retries by measuring elapsed time with backoff Both tests use dynamically allocated ports for CI safety. --- posthog/test/test_request.py | 207 +++++++++++++++++++++++++++++++++++ 1 file changed, 207 insertions(+) diff --git a/posthog/test/test_request.py b/posthog/test/test_request.py index e1798096..f1671973 100644 --- a/posthog/test/test_request.py +++ b/posthog/test/test_request.py @@ -457,3 +457,210 @@ def test_flags_no_retry_on_quota_limit(self, mock_get_flags_session): # QuotaLimitError is raised after response is received, not retried self.assertEqual(mock_session.post.call_count, 1) + + +class TestFlagsSessionNetworkRetries(unittest.TestCase): + """Tests for network failure retries in the flags session.""" + + def test_flags_session_retry_config_includes_connection_errors(self): + """ + Verify that the flags session is configured to retry on connection errors. + + The urllib3 Retry adapter with connect=2 and read=2 automatically + retries on network-level failures (DNS failures, connection refused, + connection reset, etc.) up to 2 times each. + """ + from posthog.request import _build_flags_session + + session = _build_flags_session() + + # Get the adapter for https:// + adapter = session.get_adapter("https://test.posthog.com") + + # Verify retry configuration + retry = adapter.max_retries + self.assertEqual(retry.total, 2, "Should have 2 total retries") + self.assertEqual(retry.connect, 2, "Should retry connection errors twice") + self.assertEqual(retry.read, 2, "Should retry read errors twice") + self.assertIn("POST", retry.allowed_methods, "Should allow POST retries") + + def test_flags_session_retries_on_server_errors(self): + """ + Verify that transient server errors (5xx) trigger retries. + + This tests the status_forcelist configuration which specifies + which HTTP status codes should trigger a retry. + """ + from posthog.request import _build_flags_session, RETRY_STATUS_FORCELIST + + session = _build_flags_session() + adapter = session.get_adapter("https://test.posthog.com") + retry = adapter.max_retries + + # Verify the status codes that trigger retries + self.assertEqual( + set(retry.status_forcelist), + set(RETRY_STATUS_FORCELIST), + "Should retry on transient server errors", + ) + + # Verify specific codes are included + self.assertIn(500, retry.status_forcelist) + self.assertIn(502, retry.status_forcelist) + self.assertIn(503, retry.status_forcelist) + self.assertIn(504, retry.status_forcelist) + + # Verify rate limits and quota errors are NOT retried + self.assertNotIn(429, retry.status_forcelist) + self.assertNotIn(402, retry.status_forcelist) + + def test_flags_session_has_backoff(self): + """ + Verify that retries use exponential backoff to avoid thundering herd. + """ + from posthog.request import _build_flags_session + + session = _build_flags_session() + adapter = session.get_adapter("https://test.posthog.com") + retry = adapter.max_retries + + self.assertEqual( + retry.backoff_factor, + 0.5, + "Should use 0.5s backoff factor (0.5s, 1s delays)", + ) + + +class TestFlagsSessionRetryIntegration(unittest.TestCase): + """Integration tests that verify actual retry behavior with a local server.""" + + def test_retries_on_503_then_succeeds(self): + """ + Verify that 503 errors trigger retries and eventually succeed. + + Uses a local HTTP server that fails twice with 503, then succeeds. + This tests the full retry flow including backoff timing. + """ + import threading + from http.server import HTTPServer, BaseHTTPRequestHandler + from socketserver import ThreadingMixIn + from urllib3.util.retry import Retry + from posthog.request import HTTPAdapterWithSocketOptions, RETRY_STATUS_FORCELIST + + request_count = 0 + + class RetryTestHandler(BaseHTTPRequestHandler): + protocol_version = "HTTP/1.1" + + def do_POST(self): + nonlocal request_count + request_count += 1 + + # Read and discard request body to prevent connection issues + content_length = int(self.headers.get("Content-Length", 0)) + if content_length > 0: + self.rfile.read(content_length) + + if request_count <= 2: + self.send_response(503) + self.send_header("Content-Type", "application/json") + body = b'{"error": "Service unavailable"}' + self.send_header("Content-Length", str(len(body))) + self.end_headers() + self.wfile.write(body) + else: + self.send_response(200) + self.send_header("Content-Type", "application/json") + body = ( + b'{"featureFlags": {"test": true}, "featureFlagPayloads": {}}' + ) + self.send_header("Content-Length", str(len(body))) + self.end_headers() + self.wfile.write(body) + + def log_message(self, format, *args): + pass # Suppress logging + + # Use ThreadingMixIn for cleaner shutdown + class ThreadedHTTPServer(ThreadingMixIn, HTTPServer): + daemon_threads = True + + # Start server on a random available port + server = ThreadedHTTPServer(("127.0.0.1", 0), RetryTestHandler) + port = server.server_address[1] + server_thread = threading.Thread(target=server.serve_forever) + server_thread.daemon = True + server_thread.start() + + try: + # Build session with same retry config as _build_flags_session + # but mounted on http:// for local testing + adapter = HTTPAdapterWithSocketOptions( + max_retries=Retry( + total=2, + connect=2, + read=2, + backoff_factor=0.01, # Fast backoff for testing + status_forcelist=RETRY_STATUS_FORCELIST, + allowed_methods=["POST"], + ), + ) + session = requests.Session() + session.mount("http://", adapter) + + response = session.post( + f"http://127.0.0.1:{port}/flags/?v=2", + json={"distinct_id": "user123"}, + timeout=5, + ) + + # Should succeed on 3rd attempt + self.assertEqual(response.status_code, 200) + self.assertEqual(request_count, 3) # 1 initial + 2 retries + finally: + server.shutdown() + server.server_close() + + def test_connection_errors_are_retried(self): + """ + Verify that connection errors (no server) trigger retries. + + Binds a socket to get a guaranteed available port, then closes it + so connection attempts fail with ConnectionError. + """ + import socket + import time + from urllib3.util.retry import Retry + from posthog.request import HTTPAdapterWithSocketOptions, RETRY_STATUS_FORCELIST + + # Get an available port by binding then closing a socket + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.bind(("127.0.0.1", 0)) + port = sock.getsockname()[1] + sock.close() # Port is now available but nothing is listening + + adapter = HTTPAdapterWithSocketOptions( + max_retries=Retry( + total=2, + connect=2, + read=2, + backoff_factor=0.05, # Very fast for testing + status_forcelist=RETRY_STATUS_FORCELIST, + allowed_methods=["POST"], + ), + ) + session = requests.Session() + session.mount("http://", adapter) + + start = time.time() + with self.assertRaises(requests.exceptions.ConnectionError): + session.post( + f"http://127.0.0.1:{port}/flags/?v=2", + json={"distinct_id": "user123"}, + timeout=1, + ) + elapsed = time.time() - start + + # With 3 attempts and backoff, should take more than instant + # but less than timeout (confirms retries happened) + self.assertGreater(elapsed, 0.05, "Should have some delay from retries") From 7bc05f10028a93f2f49cb91da57e9801465674cd Mon Sep 17 00:00:00 2001 From: Phil Haack Date: Tue, 16 Dec 2025 15:20:26 -0800 Subject: [PATCH 4/4] Bump version to 7.4.0 --- CHANGELOG.md | 11 +++++++++++ posthog/version.py | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f05417fc..9d228159 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,14 @@ +# 7.4.0 - 2025-12-16 + +feat: Add automatic retries for feature flag requests + +Feature flag API requests now automatically retry on transient failures: +- Network errors (connection refused, DNS failures, timeouts) +- Server errors (500, 502, 503, 504) +- Up to 2 retries with exponential backoff (0.5s, 1s delays) + +Rate limit (429) and quota (402) errors are not retried. + # 7.3.1 - 2025-12-06 fix: remove unused $exception_message and $exception_type diff --git a/posthog/version.py b/posthog/version.py index 492e6667..3de0587a 100644 --- a/posthog/version.py +++ b/posthog/version.py @@ -1,4 +1,4 @@ -VERSION = "7.3.1" +VERSION = "7.4.0" if __name__ == "__main__": print(VERSION, end="") # noqa: T201