11from aimon .reprompting_api .config import RepromptingConfig , StopReasons
22from aimon .reprompting_api .telemetry import TelemetryLogger
33from aimon .reprompting_api .reprompter import Reprompter
4- from aimon .reprompting_api .utils import toxicity_check , get_failed_instructions_count , get_failed_instructions , get_residual_error_score , get_failed_toxicity_instructions
4+ from aimon .reprompting_api .utils import retry , toxicity_check , get_failed_instructions_count , get_failed_instructions , get_residual_error_score , get_failed_toxicity_instructions
55from aimon import Detect
66import time
77import random
@@ -243,57 +243,50 @@ def _build_aimon_payload(self, context, user_query, user_instructions, generated
243243 }
244244 return payload
245245
246- def _call_llm (self , prompt_template : Template , max_attempts , system_prompt = None , context = None , user_query = None , base_delay = 1 ):
246+ def _call_llm (self , prompt_template : Template , max_attempts , system_prompt = None , context = None , user_query = None ):
247247 """
248248 Calls the LLM with exponential backoff. Retries if the LLM call fails
249- OR returns a non-string value. Raises an exception if all retries fail.
249+ OR returns a non-string value. If all retries fail, the last encountered
250+ exception from the LLM call is re-raised.
250251
251252 Args:
252253 prompt_template (Template): Prompt template for the LLM.
253254 max_attempts (int): Max retry attempts.
254- base_delay (float): Initial delay in seconds before backoff.
255-
255+
256256 Returns:
257257 str: LLM response text.
258258
259259 Raises:
260- RuntimeError: If the LLM call fails or returns an invalid type after all retries.
260+ RuntimeError: If the LLM call repeatedly fails, re-raises the last encountered error.
261+ TypeError: If the LLM call fails to return a string.
261262 """
262- last_exception = None
263- for attempt in range (max_attempts ):
264- try :
265- logger .debug (f"LLM call attempt { attempt + 1 } with prompt template." )
266- result = self .llm_fn (prompt_template , system_prompt , context , user_query )
267- # Validate type
268- if not isinstance (result , str ):
269- raise TypeError (f"LLM returned invalid type { type (result ).__name__ } , expected str." )
270- return result
271- except Exception as e :
272- last_exception = e
273- logger .warning (f"LLM call failed on attempt { attempt + 1 } : { e } " )
274- wait_time = base_delay * (2 ** attempt ) + random .uniform (0 , 0.1 )
275- time .sleep (wait_time )
276- raise RuntimeError (f"LLM call failed or returned invalid type after maximum retries. Last error: { last_exception } " )
263+ @retry (exception_to_check = Exception , tries = max_attempts , delay = 1 , backoff = 2 , logger = logger )
264+ def backoff_call ():
265+ result = self .llm_fn (prompt_template , system_prompt , context , user_query )
266+ if not isinstance (result , str ):
267+ raise TypeError (f"LLM returned invalid type { type (result ).__name__ } , expected str." )
268+ return result
269+ return backoff_call ()
277270
278- def _detect_aimon_response (self , payload , max_attempts , base_delay = 1 ):
271+ def _detect_aimon_response (self , payload , max_attempts ):
279272 """
280273 Calls AIMon Detect with exponential backoff and returns the detection result.
281274
282275 This method wraps the AIMon evaluation call, retrying if it fails due to transient
283276 errors (e.g., network issues or temporary service unavailability). It retries up to
284- `max_attempts` times with exponential backoff before raising a RuntimeError.
277+ `max_attempts` times with exponential backoff before raising the last encountered
278+ exception from the AIMon Detect call.
285279
286280 Args:
287281 payload (dict): A dictionary containing 'context', 'user_query',
288282 'instructions', and 'generated_text' for evaluation.
289283 max_attempts (int): Maximum number of retry attempts.
290- base_delay (float): Initial delay in seconds before exponential backoff.
291284
292285 Returns:
293286 object: The AIMon detection result containing evaluation scores and feedback.
294287
295288 Raises:
296- RuntimeError: If AIMon Detect fails after all retry attempts.
289+ RuntimeError: If AIMon Detect fails after all retry attempts, re-raises the last encountered error .
297290 """
298291 aimon_context = f"{ payload ['context' ]} \n \n User Query:\n { payload ['user_query' ]} "
299292 aimon_query = f"{ payload ['user_query' ]} \n \n Instructions:\n { payload ['instructions' ]} "
@@ -302,21 +295,23 @@ def _detect_aimon_response(self, payload, max_attempts, base_delay=1):
302295 def run_detection (query , instructions , generated_text , context ):
303296 return query , instructions , generated_text , context
304297
305- for attempt in range (max_attempts ):
306- try :
307- logger .debug (f"AIMon detect attempt { attempt + 1 } with payload: { payload } " )
308- _ , _ , _ , _ , result = run_detection (
309- aimon_query ,
310- payload ['instructions' ],
311- payload ['generated_text' ],
312- aimon_context
313- )
314- return result
315- except Exception as e :
316- logger .debug (f"AIMon detect failed on attempt { attempt + 1 } : { e } " )
317- wait_time = base_delay * (2 ** attempt ) + random .uniform (0 , 0.1 )
318- time .sleep (wait_time )
319- raise RuntimeError ("AIMon detect call failed after maximum retries." )
298+ @retry (
299+ exception_to_check = Exception ,
300+ tries = max_attempts ,
301+ delay = 1 ,
302+ backoff = 2 ,
303+ logger = logger
304+ )
305+ def inner_detection ():
306+ logger .debug (f"AIMon detect call with payload: { payload } " )
307+ _ , _ , _ , _ , result = run_detection (
308+ aimon_query ,
309+ payload ['instructions' ],
310+ payload ['generated_text' ],
311+ aimon_context
312+ )
313+ return result
314+ return inner_detection ()
320315
321316 def get_response_feedback (self , result ):
322317 """
0 commit comments