adding exponential backoff retry decorator to utils.py and using the decorator to safely call the user-provided LLM function and the AIMon Detect function. Re-raises last encountered exception upon failure.

ashah-aanya · ashah-aanya · commit ee1e10693b28 · 2025-07-31T18:15:14.000-07:00
diff --git a/aimon/reprompting_api/pipeline.py b/aimon/reprompting_api/pipeline.py
@@ -1,7 +1,7 @@
 from aimon.reprompting_api.config import RepromptingConfig, StopReasons
 from aimon.reprompting_api.telemetry import TelemetryLogger
 from aimon.reprompting_api.reprompter import Reprompter
-from aimon.reprompting_api.utils import toxicity_check, get_failed_instructions_count, get_failed_instructions, get_residual_error_score, get_failed_toxicity_instructions
+from aimon.reprompting_api.utils import retry, toxicity_check, get_failed_instructions_count, get_failed_instructions, get_residual_error_score, get_failed_toxicity_instructions
 from aimon import Detect
 import time
 import random
@@ -243,57 +243,50 @@ def _build_aimon_payload(self, context, user_query, user_instructions, generated
         }
         return payload
 
-    def _call_llm(self, prompt_template: Template, max_attempts, system_prompt=None, context=None, user_query=None, base_delay=1):
+    def _call_llm(self, prompt_template: Template, max_attempts, system_prompt=None, context=None, user_query=None):
         """
         Calls the LLM with exponential backoff. Retries if the LLM call fails
-        OR returns a non-string value. Raises an exception if all retries fail.
+        OR returns a non-string value.  If all retries fail, the last encountered
+        exception from the LLM call is re-raised.
 
         Args:
             prompt_template (Template): Prompt template for the LLM.
             max_attempts (int): Max retry attempts.
-            base_delay (float): Initial delay in seconds before backoff.
-
+            
         Returns:
             str: LLM response text.
 
         Raises:
-            RuntimeError: If the LLM call fails or returns an invalid type after all retries.
+            RuntimeError: If the LLM call repeatedly fails, re-raises the last encountered error.
+            TypeError: If the LLM call fails to return a string.
         """
-        last_exception = None
-        for attempt in range(max_attempts):
-            try:
-                logger.debug(f"LLM call attempt {attempt+1} with prompt template.")
-                result = self.llm_fn(prompt_template, system_prompt, context, user_query)
-                # Validate type
-                if not isinstance(result, str):
-                    raise TypeError(f"LLM returned invalid type {type(result).__name__}, expected str.")
-                return result
-            except Exception as e:
-                last_exception = e
-                logger.warning(f"LLM call failed on attempt {attempt+1}: {e}")
-                wait_time = base_delay * (2 ** attempt) + random.uniform(0, 0.1)
-                time.sleep(wait_time)
-        raise RuntimeError(f"LLM call failed or returned invalid type after maximum retries. Last error: {last_exception}")
+        @retry(exception_to_check=Exception, tries=max_attempts, delay=1, backoff=2, logger=logger)
+        def backoff_call():
+            result = self.llm_fn(prompt_template, system_prompt, context, user_query)
+            if not isinstance(result, str):
+                raise TypeError(f"LLM returned invalid type {type(result).__name__}, expected str.")
+            return result
+        return backoff_call()
     
-    def _detect_aimon_response(self, payload, max_attempts, base_delay=1):
+    def _detect_aimon_response(self, payload, max_attempts):
         """
         Calls AIMon Detect with exponential backoff and returns the detection result.
 
         This method wraps the AIMon evaluation call, retrying if it fails due to transient 
         errors (e.g., network issues or temporary service unavailability). It retries up to 
-        `max_attempts` times with exponential backoff before raising a RuntimeError.
+        `max_attempts` times with exponential backoff before raising the last encountered 
+        exception from the AIMon Detect call.
 
         Args:
             payload (dict): A dictionary containing 'context', 'user_query', 
                             'instructions', and 'generated_text' for evaluation.
             max_attempts (int): Maximum number of retry attempts.
-            base_delay (float): Initial delay in seconds before exponential backoff.
 
         Returns:
             object: The AIMon detection result containing evaluation scores and feedback.
 
         Raises:
-            RuntimeError: If AIMon Detect fails after all retry attempts.
+            RuntimeError: If AIMon Detect fails after all retry attempts, re-raises the last encountered error.
         """
         aimon_context = f"{payload['context']}\n\nUser Query:\n{payload['user_query']}"
         aimon_query = f"{payload['user_query']}\n\nInstructions:\n{payload['instructions']}"
@@ -302,21 +295,23 @@ def _detect_aimon_response(self, payload, max_attempts, base_delay=1):
         def run_detection(query, instructions, generated_text, context):
             return query, instructions, generated_text, context
 
-        for attempt in range(max_attempts):
-            try:
-                logger.debug(f"AIMon detect attempt {attempt+1} with payload: {payload}")
-                _, _, _, _, result = run_detection(
-                    aimon_query,
-                    payload['instructions'],
-                    payload['generated_text'],
-                    aimon_context
-                )
-                return result
-            except Exception as e:
-                logger.debug(f"AIMon detect failed on attempt {attempt+1}: {e}")
-                wait_time = base_delay * (2 ** attempt) + random.uniform(0, 0.1)
-                time.sleep(wait_time)
-        raise RuntimeError("AIMon detect call failed after maximum retries.")
+        @retry(
+            exception_to_check=Exception,
+            tries=max_attempts,
+            delay=1,
+            backoff=2,
+            logger=logger
+        )
+        def inner_detection():
+            logger.debug(f"AIMon detect call with payload: {payload}")
+            _, _, _, _, result = run_detection(
+                aimon_query,
+                payload['instructions'],
+                payload['generated_text'],
+                aimon_context
+            )
+            return result
+        return inner_detection()
 
     def get_response_feedback(self, result):
             """
diff --git a/aimon/reprompting_api/utils.py b/aimon/reprompting_api/utils.py
@@ -14,7 +14,62 @@
 - Toxicity failures are flagged when follow_probability > TOXICITY_THRESHOLD (default 0.25).
 - Residual error scoring penalizes low follow probabilities more heavily and adds a flat penalty for any toxicity failures.
 """
-from typing import List
+from typing import Callable, Type, Union, Tuple, Optional, List
+from functools import wraps
+import logging
+import random
+import time
+
+def retry(
+        exception_to_check: Union[Type[BaseException], Tuple[Type[BaseException], ...]],
+        tries: int = 5,
+        delay: int = 3,
+        backoff: int = 2,
+        logger: Optional[logging.Logger] = None,
+        log_level: int = logging.WARNING,
+        re_raise: bool = True,
+        jitter: float = 0.1
+) -> Callable:
+    """
+    Retry calling the decorated function using an exponential backoff.
+    :param exception_to_check: Exception or a tuple of exceptions to check.
+    :param tries: Number of times to try (not retry) before giving up.
+    :param delay: Initial delay between retries in seconds.
+    :param backoff: Backoff multiplier e.g., a value of 2 will double the delay each retry.
+    :param logger: Logger to use. If None, print.
+    :param log_level: Logging level.
+    :param re_raise: Whether to re-raise the exception after the last retry.
+    :param jitter: The maximum jitter to apply to the delay as a fraction of the delay.
+    """
+
+    def deco_retry(func: Callable) -> Callable:
+        @wraps(func)
+        def f_retry(*args, **kwargs):
+            remaining_tries, current_delay = tries, delay
+            while remaining_tries > 1:
+                try:
+                    return func(*args, **kwargs)
+                except exception_to_check as e:
+                    msg = f"{e}, Retrying in {current_delay} seconds..."
+                    if logger:
+                        logger.log(log_level, msg)
+                    else:
+                        print(msg)
+                    time.sleep(current_delay * (1 + jitter * (2 * random.random() - 1)))
+                    remaining_tries -= 1
+                    current_delay *= backoff
+            try:
+                return func(*args, **kwargs)
+            except exception_to_check as e:
+                msg = f"Failed after {tries} tries. {e}"
+                if logger:
+                    logger.log(log_level, msg)
+                else:
+                    print(msg)
+                if re_raise:
+                    raise
+        return f_retry
+    return deco_retry
 
 # toxicity threshold for AIMon detection; Follow probabilities above this are considered failures
 TOXICITY_THRESHOLD = 0.25
@@ -168,11 +223,14 @@ def penalized_average(probs: List[float]) -> float:
     Probabilities > 0.5 (passed instructions) recieve no penalty
 
     Args:
-        probs (List[float]): A list of follow probabilities.
+        probs (List[float]): A list of follow probabilities. Must be non-empty.
 
     Returns:
-        float: Penalized average.
+        float: Penalized average. Return -1 if probs is empty.
     """
+    if not probs:  # handle division by zero for empty list
+        return -1
+    
     penalties = []
     for p in probs:
         if p >= 0.5:
diff --git a/tests/test_reprompting_failures.py b/tests/test_reprompting_failures.py
@@ -2,6 +2,7 @@
 import pytest
 from string import Template
 from together import Together
+import aimon
 from aimon.reprompting_api.config import RepromptingConfig
 from aimon.reprompting_api.runner import run_reprompting_pipeline
 
@@ -61,7 +62,7 @@ def get_config_with_invalid_aimon_api_key():
 def test_llm_failure():
     """Should raise RuntimeError when the LLM function always fails."""
     config = get_config()
-    with pytest.raises(RuntimeError, match="LLM call failed or returned invalid type after maximum retries."):
+    with pytest.raises(RuntimeError, match="LLM call failed intentionally for testing"):
         run_reprompting_pipeline(
             user_query="Test LLM failure handling",
             context="Context for failure test",
@@ -85,9 +86,9 @@ def test_invalid_llm_fn():
 
 @pytest.mark.integration
 def test_invalid_return_value():
-    """Should raise RuntimeError when the LLM returns a non-string value."""
+    """Should raise TypeError when the LLM returns a non-string value."""
     config = get_config()
-    with pytest.raises(RuntimeError, match="LLM call failed or returned invalid type"):
+    with pytest.raises(TypeError, match="LLM returned invalid type int, expected str."):
         run_reprompting_pipeline(
             user_query="Test invalid return type",
             context="Context for type error",
@@ -113,7 +114,7 @@ def test_empty_query():
 def test_invalid_api_key():
     """Should fail due to invalid AIMon API key."""
     config = get_config_with_invalid_aimon_api_key()
-    with pytest.raises(RuntimeError):
+    with pytest.raises(aimon.AuthenticationError):
         run_reprompting_pipeline(
             user_query="Testing with invalid AIMon API key",
             context="Context for invalid key test",