diff --git a/aimon/reprompting_api/_init_.py b/aimon/reprompting_api/_init_.py new file mode 100644 index 0000000..e69de29 diff --git a/aimon/reprompting_api/config.py b/aimon/reprompting_api/config.py new file mode 100644 index 0000000..ba2bae8 --- /dev/null +++ b/aimon/reprompting_api/config.py @@ -0,0 +1,59 @@ +import os +from typing import Optional +from dataclasses import dataclass +import random +import string + +def generate_random_string(length: int) -> str: + """Generates a random string of letters and digits.""" + if not isinstance(length, int) or length <= 0: + raise ValueError("Length must be a positive integer.") + characters = string.ascii_letters + string.digits + return ''.join(random.choice(characters) for i in range(length)) + +class StopReasons: + ALL_INSTRUCTIONS_ADHERED = "all_instructions_adhered" + MAX_ITERATIONS_REACHED = "max_iterations_reached" + CONTINUE = "instructions_failed_continue_reprompting" + CONTINUE_TOXICITY = "toxicity_detect_continue_reprompting" + + ## limits + LATENCY_LIMIT_EXCEEDED = "latency_limit_exceeded" + + ##errors + REPROMPTING_FAILED = "reprompting_failed" + UNKNOWN_ERROR = "unknown_error" + +@dataclass +class RepromptingConfig: + """ + Configuration for the automated re-prompting pipeline. + + Attributes: + publish (bool): Whether to publish results to app.aimon.ai. + max_iterations (int): Maximum number of re-prompting iterations (1 initial + N retries). + aimon_api_key (Optional[str]): API key for AIMon integration. Defaults to "AIMON_API_KEY" env var. + model_name (Optional[str]): Model identifier for telemetry. Defaults to "aimon-react-model-{rand}". + application_name (Optional[str]): Application identifier for telemetry. Defaults to "aimon-react-application-{rand}". + return_telemetry (bool): Whether to include per-iteration telemetry in the response. + return_aimon_summary (bool): Whether to include a human-readable caption summarizing re-prompting. (e.g.: 2 iterations, 0 failed instructions) + latency_limit_ms (Optional[int]): Maximum cumulative latency (ms) before aborting. None = no limit. + user_model_max_retries (Optional[int]): Max retries for user model calls. Defaults to 2. + feedback_model_max_retries (Optional[int]): Max retries for feedback model calls. Defaults to 2. + """ + publish: bool = False + max_iterations: int = 2 + if max_iterations < 1: + raise ValueError("Max iterations must be greater than 0") + aimon_api_key: Optional[str] = os.getenv("AIMON_API_KEY") or "default_api_key" + if aimon_api_key == "default_api_key": + raise ValueError("AIMON_API_KEY environment variable is not set and no fallback value is provided.") + model_name: Optional[str] = "aimon-react-model-" + generate_random_string(5) + application_name: Optional[str] = "aimon-react-application-" + generate_random_string(5) + return_telemetry: bool = False + return_aimon_summary: bool = False + latency_limit_ms: Optional[int] = None + user_model_max_retries: Optional[int] = 2 + feedback_model_max_retries: Optional[int] = 2 + + \ No newline at end of file diff --git a/aimon/reprompting_api/pipeline.py b/aimon/reprompting_api/pipeline.py new file mode 100644 index 0000000..4e962f2 --- /dev/null +++ b/aimon/reprompting_api/pipeline.py @@ -0,0 +1,543 @@ +from aimon.reprompting_api.config import RepromptingConfig, StopReasons +from aimon.reprompting_api.telemetry import TelemetryLogger +from aimon.reprompting_api.reprompter import Reprompter +from aimon.reprompting_api.utils import retry, toxicity_check, get_failed_instructions_count, get_failed_instructions, get_residual_error_score, get_failed_toxicity_instructions +from aimon import Detect +import time +import random +from string import Template +import logging + +logger = logging.getLogger(__name__) + +class RepromptingPipeline: + """ + A pipeline for iterative re-prompting of LLM responses using AIMon evaluation. + + This pipeline orchestrates: + - Initial prompt generation for a given query, context, and user instructions. + - Interaction with a black-box LLM to generate responses. + - Evaluation of responses using AIMon detectors (instruction adherence, groundedness, toxicity). + - Iterative corrective re-prompting until stopping conditions are met. + - Collection and emission of telemetry for all iterations. + + **Expected LLM function signature**: + llm_fn(recommended_prompt_template: Template, system_prompt: str, context: str, user_query: str) -> str + + Attributes: + llm_fn (callable): Function to call the LLM. Must be a Callable with + -recommended_prompt_template: Template + -system_prompt: str + -context: str + -user_query: str + + config (RepromptingConfig): Configuration object with API keys and iteration limits. + reprompter (Reprompter): Utility for generating corrective prompts based on evaluation feedback. + telemetry (TelemetryLogger): Logger for capturing telemetry data. + detect (Detect): AIMon detection client for evaluating model responses. + + Returns: + dict: + { + "best_response" (str): Best model response across all iterations. + "telemetry" (list, optional): Iteration-level telemetry if enabled. + "summary" (str, optional): Human-readable run summary if enabled. + } + """ + def __init__(self, llm_fn, config): + """ + Initialize pipeline with LLM callable and RepromptingConfig. + + Args: + llm_fn (callable): Function to call the LLM. + Signature: llm_fn(recommended_prompt_template: Template, system_prompt: str, context: str, user_query: str) -> str + config (RepromptingConfig): Configuration object with API keys and limits. + + """ + self.llm_fn = llm_fn + self.config = config or RepromptingConfig() + + # Utilities for reprompting, telemetry, and scoring + self.reprompter = Reprompter() + self.telemetry = TelemetryLogger() + + # Initialize AIMon Detect for response evaluation + self.detect = Detect( + values_returned=['user_query', 'instructions', 'generated_text', 'context'], + config={ + "instruction_adherence": { + "detector_name": "default", + "explain": True, + "extract_from_system": False + }, + "groundedness": { + "detector_name": "default", + "explain": True + }, + "toxicity": { + "detector_name": "default", + "explain": True + } + }, + api_key=self.config.aimon_api_key, + application_name = self.config.application_name, + model_name = self.config.model_name, + publish=self.config.publish + ) + + def run(self, system_prompt: str, context: str, user_query: str, user_instructions): + """ + Execute the full re-prompting pipeline. + + Process: + 1. Build an initial prompt with query, context, and instructions. + 2. Call the LLM to generate a response. + 3. Evaluate the response with AIMon detectors for instruction adherence, toxicity, and groundedness. + Toxicity and groundedness are always evaluated. If user_instructions are empty / not provided, the + instruction adherence detector is not used. + 4. If violations are found, iteratively generate corrective prompts and re-prompt the LLM. + 5. Stop when all instructions are followed and response has no hallucination or toxicity or when iteration or latency limits are reached. + 6. Return the best response (lowest residual error) along with telemetry and a summary if configured. + + Args: + user_query (str): Must be a non-empty string. The user's query or instruction. + context (str): Contextual information to include in the prompt. May be an empty string, but it is recommended to be included. + user_instructions (list[str]): Instructions the model must follow. May be an empty list, but it is highly recommended to be included. + system_prompt (str): A high‑level role or behavior definition for the model. May be an empty string. + + Returns: + dict: + { + "best_response" (str): Best model response from all iterations. + "telemetry" (list, optional): Telemetry for all iterations if enabled. + "summary" (str, optional): Summary of the process if enabled. + } + """ + logger.info("Starting RepromptingPipeline run") + logger.debug(f"Inputs - System Prompt: {system_prompt}, Context: {context}, User Query: {user_query}, Instructions: {user_instructions}") + iteration_outputs = {} # key: iteration number → dict(response_text, residual_error_score, failed_instructions_count) + pipeline_start = time.time() + iteration_num = 1 + + curr_prompt = self._build_original_prompt() + logger.debug(f"Initial prompt template built: {curr_prompt.template}") + + + # First LLM call + curr_generated_text = self._call_llm(curr_prompt,self.config.user_model_max_retries, system_prompt, context, user_query) + logger.debug(f"Initial LLM response: {curr_generated_text}") + + + # Evaluate response with AIMon + curr_payload = self._build_aimon_payload(context, user_query, user_instructions, curr_generated_text, system_prompt) + curr_result = self._detect_aimon_response(curr_payload, self.config.feedback_model_max_retries) + logger.debug(f"AIMon evaluation result: {curr_result}") + + # Get scores and detailed feedback on failed instructions + scores, feedback = self.get_response_feedback(curr_result) + self._record_iteration_output(iteration_outputs, iteration_num, curr_generated_text, curr_result) + + # Iteratively re-prompt until conditions are met or limits reached + stop_reason = None + while True: + should_stop, stop_reason = self._should_stop_reprompting(curr_result, iteration_num, pipeline_start) + logger.info(f"Iteration {iteration_num}: Stop decision: {should_stop}, Reason: {stop_reason}") + if should_stop: + break + + # Emit telemetry for this iteration + self._emit_iteration_telemetry( + iteration_num, + pipeline_start, + scores, + feedback, + curr_result, + stop_reason or StopReasons.CONTINUE, + curr_prompt, + curr_generated_text, + ) + + # Generate corrective prompt + curr_prompt = self._build_corrective_prompt(curr_payload, curr_result) + + # Retry LLM call with corrective prompt + curr_generated_text = self._call_llm(curr_prompt, self.config.user_model_max_retries) + curr_generated_text = self._call_llm(curr_prompt,self.config.user_model_max_retries, system_prompt, context, user_query) + # Re-evaluate the new response + curr_payload = self._build_aimon_payload(context, user_query, user_instructions, curr_generated_text, system_prompt) + curr_result = self._detect_aimon_response(curr_payload, self.config.feedback_model_max_retries) + + # Extract updated scores and feedback + scores, feedback = self.get_response_feedback(curr_result) + iteration_num += 1 + self._record_iteration_output(iteration_outputs, iteration_num, curr_generated_text, curr_result) + + # Final telemetry after loop exit + self._emit_iteration_telemetry( + iteration_num, + pipeline_start, + scores, + feedback, + curr_result, + stop_reason or StopReasons.UNKNOWN_ERROR, + curr_prompt, + curr_generated_text, + ) + + # Select best response across all iterations + best_output, best_failed_count = self._select_best_iteration(iteration_outputs) + + # Build final response payload + response = {"best_response": best_output} + if self.config.return_telemetry: + response["telemetry"] = self.telemetry.get_all() + if self.config.return_aimon_summary: + response["summary"] = self._gen_summary(iteration_num, best_failed_count) + + logger.info("RepromptingPipeline run completed") + logger.info(f"Best response selected with {best_failed_count} failed instructions remaining.") + + return response + + def _build_original_prompt(self) -> Template: + """ + Build a reusable template for combining system_prompt, context, and user_query. + This returns a string.Template object so the caller can safely substitute values. + + Placeholders: + - system_prompt + - context + - user_query + + Returns: + Template: A string.Template for building the base LLM prompt. + """ + template_str = ( + "System:\n${system_prompt}\n\n" + "Context:\n${context}\n\n" + "User Query:\n${user_query}" + ) + return Template(template_str) + + def _build_aimon_payload(self, context, user_query, user_instructions, generated_text, system_prompt): + """ + Constructs AIMon input payload. + + Args: + context (str): Context for the LLM. + user_query (str): The user's query. + user_instructions (list[str]): Instructions for the model. + generated_text (str): The model's generated response. + + Returns: + dict: Payload for AIMon evaluation. + """ + if not isinstance(user_instructions, list): + user_instructions = [] + payload = { + 'context': context, + 'user_query': user_query, + 'generated_text': generated_text, + 'instructions': user_instructions, + 'system_prompt' : system_prompt + } + return payload + + def _call_llm(self, prompt_template: Template, max_attempts, system_prompt=None, context=None, user_query=None): + """ + Calls the LLM with exponential backoff. Retries if the LLM call fails + OR returns a non-string value. If all retries fail, the last encountered + exception from the LLM call is re-raised. + + Args: + prompt_template (Template): Prompt template for the LLM. + max_attempts (int): Max retry attempts. + + Returns: + str: LLM response text. + + Raises: + RuntimeError: If the LLM call repeatedly fails, re-raises the last encountered error. + TypeError: If the LLM call fails to return a string. + """ + @retry(exception_to_check=Exception, tries=max_attempts, delay=1, backoff=2, logger=logger) + def backoff_call(): + result = self.llm_fn(prompt_template, system_prompt, context, user_query) + if not isinstance(result, str): + raise TypeError(f"LLM returned invalid type {type(result).__name__}, expected str.") + return result + return backoff_call() + + def _detect_aimon_response(self, payload, max_attempts): + """ + Calls AIMon Detect with exponential backoff and returns the detection result. + + This method wraps the AIMon evaluation call, retrying if it fails due to transient + errors (e.g., network issues or temporary service unavailability). It retries up to + `max_attempts` times with exponential backoff before raising the last encountered + exception from the AIMon Detect call. + + Args: + payload (dict): A dictionary containing 'context', 'user_query', + 'instructions', and 'generated_text' for evaluation. + max_attempts (int): Maximum number of retry attempts. + + Returns: + object: The AIMon detection result containing evaluation scores and feedback. + + Raises: + RuntimeError: If AIMon Detect fails after all retry attempts, re-raises the last encountered error. + """ + aimon_context = f"{payload['context']}\n\nUser Query:\n{payload['user_query']}" + aimon_query = f"{payload['user_query']}\n\nInstructions:\n{payload['instructions']}" + + @self.detect + def run_detection(query, instructions, generated_text, context): + return query, instructions, generated_text, context + + @retry( + exception_to_check=Exception, + tries=max_attempts, + delay=1, + backoff=2, + logger=logger + ) + def inner_detection(): + logger.debug(f"AIMon detect call with payload: {payload}") + _, _, _, _, result = run_detection( + aimon_query, + payload['instructions'], + payload['generated_text'], + aimon_context + ) + return result + return inner_detection() + + def get_response_feedback(self, result): + """ + Extract groundedness and instruction adherence scores and failed instructions. + + Args: + result (object): AIMon detection result. + + Returns: + tuple: (scores (dict), failed_instructions (list)) + """ + scores = { + "groundedness": result.detect_response.groundedness.get("score", 0.0), + "instruction_adherence": result.detect_response.instruction_adherence.get("score", 0.0), + "toxicity": result.detect_response.toxicity.get("score", 0.0) + } + feedback = get_failed_instructions(result) + get_failed_toxicity_instructions(result) + return scores, feedback + + def _build_corrective_prompt(self, payload, result): + """ + Generate a corrective prompt using AIMon evaluation results. + + Args: + payload (dict): AIMon input payload. + result (object): AIMon detection result. + + Returns: + str: A corrective prompt for re-prompting the LLM. + """ + return self.reprompter.create_corrective_prompt(result, payload) + + def _should_stop_reprompting(self, result, iteration_num, pipeline_start): + """ + Determine whether to stop re-prompting. + + Stopping conditions: + - Max iterations reached. + - Latency budget 75% depleted + - All instructions are adhered to. + - Otherwise, continue if violations or toxicity remain. + + Args: + result (object): AIMon detection result. + iteration_num (int): Current iteration number. + + Returns: + tuple: + (should_stop (bool), stop_reason (str or None)) + """ + # Max iterations reached + if iteration_num >= self.config.max_iterations: + return True, StopReasons.MAX_ITERATIONS_REACHED + + latency_limit_ms = self.config.latency_limit_ms + if latency_limit_ms is not None: + cumulative_latency = self._get_cumulative_latency(pipeline_start) + if cumulative_latency > ((0.75) * latency_limit_ms): + return True, StopReasons.LATENCY_LIMIT_EXCEEDED + + # Continue if toxicity is detected + if toxicity_check(result): + return False, StopReasons.CONTINUE_TOXICITY + + # Continue if there are still failed instructions + if get_failed_instructions_count(result) > 0: + return False, StopReasons.CONTINUE + + # All instructions followed + return True, StopReasons.ALL_INSTRUCTIONS_ADHERED + + def _select_best_iteration(self, iteration_outputs): + """ + Selects the best iteration based on the lowest residual error score. + + Args: + iteration_outputs (dict): Mapping of iteration_num -> iteration data. + + Returns: + tuple: (best_output (str), best_failed_count (int)) + """ + valid_iterations = [ + entry for entry in iteration_outputs.values() + if isinstance(entry.get("residual_error_score"), (int, float)) + ] + if not valid_iterations: + return "[ERROR: No valid response]", None + + best_iteration = min(valid_iterations, key=lambda x: x["residual_error_score"]) + return best_iteration["response_text"], best_iteration["failed_instructions_count"] + + def _gen_summary(self, iteration_num, best_failed_count): + """ + Generate a human-readable summary for the pipeline run. + e.g.: "2 iterations, 0 failed instructions remaining" + + Args: + iteration_num (int): Number of iterations performed. + best_failed_count (int): Number of failed instructions in the best response. + + Returns: + str: Summary. + """ + iteration_word = "iteration" if iteration_num == 1 else "iterations" + summary = f"{iteration_num} {iteration_word}, {best_failed_count} failed instructions remaining" + return summary + + def _build_telemetry_entry( + self, + iteration, + cumulative_latency, + scores, + feedback, + residual_error, + failed_count, + stop_reason, + prompt, + response_text, + ): + """ + Build a structured telemetry entry for an iteration. + + Args: + iteration (int): Iteration number. + cumulative_latency (float): Total latency in milliseconds so far. + scores (dict): Evaluation scores. + feedback (list): Failed instruction feedback. + residual_error (float): Residual error score. + failed_count (int): Number of failed instructions. + stop_reason (str): Reason for stopping. + prompt (str): Prompt used for this iteration. + response_text (str): Model's response. + + Returns: + dict: Structured telemetry entry. + """ + return { + "iteration": iteration, + "cumulative_latency_ms": cumulative_latency, + "scores": scores, + "response_feedback": feedback, + "residual_error": residual_error, + "failed_instructions_count": failed_count, + "stop_reason": stop_reason, + "prompt": prompt, + "response_text": response_text, + } + + def _emit_iteration_telemetry( + self, + iteration_num, + pipeline_start, + scores, + feedback, + curr_result, + stop_reason, + curr_prompt, + curr_generated_text, + ): + """ + Build and emit telemetry for an iteration. Calculates cumulative latency. + + Args: + iteration_num (int): Current iteration number. + pipeline_start (float): Start time of the pipeline (epoch). + scores (dict): Evaluation scores. + feedback (list): Failed instruction feedback. + curr_result (object): AIMon detection result. + stop_reason (str): Reason for stopping or continuing. + curr_prompt (str): Prompt used. + curr_generated_text (str): Model response text. + + Returns: + dict: The telemetry entry. + """ + cumulative_latency_ms = self._get_cumulative_latency(pipeline_start) + + residual_error = get_residual_error_score(curr_result) if curr_result else None + failed_count = get_failed_instructions_count(curr_result) if curr_result else None + + prompt_text = curr_prompt.template + + entry = self._build_telemetry_entry( + iteration_num, + cumulative_latency_ms, + scores, + feedback, + residual_error, + failed_count, + stop_reason, + prompt_text, + curr_generated_text, + ) + try: + self.telemetry.emit(**entry) + except Exception as e: + logger.warning(f"[Warning] Telemetry emission failed: {e}") + return entry + + def _get_cumulative_latency(self, pipeline_start): + """ + Calculate cumulative latency since pipeline start. + + Args: + pipeline_start (float): Start time of the pipeline (epoch). + + Returns: + float: Cumulative latency in milliseconds. + """ + return (time.time() - pipeline_start) * 1000 + + def _record_iteration_output(self, iteration_outputs, iteration_num, generated_text, result): + """ + Record iteration outputs for later selection of the best response. + + Args: + iteration_outputs (dict): Stores outputs per iteration. + iteration_num (int): Current iteration number. + generated_text (str): Model's generated response. + result (object): AIMon detection result. + + Returns: + None + """ + iteration_outputs[iteration_num] = { + "response_text": generated_text, + "residual_error_score": get_residual_error_score(result), + "failed_instructions_count": get_failed_instructions_count(result) + } \ No newline at end of file diff --git a/aimon/reprompting_api/reprompter.py b/aimon/reprompting_api/reprompter.py new file mode 100644 index 0000000..e229621 --- /dev/null +++ b/aimon/reprompting_api/reprompter.py @@ -0,0 +1,218 @@ +from aimon.reprompting_api.utils import get_failed_instructions_count, get_failed_instructions, get_failed_toxicity_instructions +from string import Template +import logging + +logger = logging.getLogger(__name__) + +class Reprompter: + """ + Generates a template for corrective reprompting for improving LLM responses + based on AIMon evaluation results. This class combines failed instruction + feedback and background information to trigger iterative + improvement prompts for stateless LLMs. + The template is designed to accept substitutions for system_prompt, user_query, and context. + + Designed for use in open-source contexts where developers may want to + customize the prompt structure or language. + """ + + def create_corrective_prompt(self, result, aimon_payload: dict) -> Template: + """ + Build a corrective prompt **template** for the next LLM response. + + Placeholders: + {system_prompt} – The original system prompt + {user_query} – The user query + {context} – The context string + + Args: + result: AIMon detection result object. + aimon_payload (dict): Original payload containing: + - 'system_prompt' (str) + - 'user_query' (str) + - 'context' (str) + - 'generated_text' (str) + - 'instructions' (list[str]) + + Returns: + Template: A string.Template object (with placeholders for substitution). + """ + try: + failed_instructions = get_failed_instructions(result) + failed_count = get_failed_instructions_count(result) + logger.debug(f"Failed instructions ({failed_count}): {failed_instructions}") + + tone = self.determine_tone(failed_count) + toxicity_feedback = self.get_toxicity_reprompt(result) + failed_instructions_reprompt = self.format_failed_instructions(failed_instructions, toxicity_feedback) + passed_instructions = self.format_passed_instructions(self.get_passed_instructions(result, aimon_payload)) + generated_text = aimon_payload.get('generated_text', '') + + # Build template string (placeholders for substitution) + template_str = ( + "Original system prompt:\n" + "${system_prompt}\n\n" + "Revise your previous response to this query:\n" + "${user_query}\n\n" + "Context:\n" + "${context}\n\n" + "Previous response:\n" + f"{generated_text}\n\n" + f"{tone}\n\n" + f"{failed_instructions_reprompt}\n\n" + "Preserve correct content. Return only the revised output with no extra explanation.\n" + f"{passed_instructions}\n" + ) + logger.debug(f"Generated corrective prompt template:\n{template_str}") + return Template(template_str) + except Exception as e: + logger.error(f"Error generating corrective prompt: {e}") + raise RuntimeError( + f"Corrective prompt template generation failed: {type(e).__name__} — {e}" + ) from e + + def get_toxicity_reprompt(self, result) -> str: + """ + Generate feedback for detected toxicity failures in the following format: + Your reply contained toxic content. Remove any harmful, abusive, or unsafe language. + 1. We are X% confident that your response had the following issue: + → Violation: "..." + → Explanation: "..." + + Args: + result: AIMon detection result. + + Returns: + str: Toxicity-specific feedback, or None if no toxicity detected. + """ + try: + failed_instructions = get_failed_toxicity_instructions(result) + if not failed_instructions: + return "" + logger.info(f"Toxicity violations detected: {len(failed_instructions)}") + lines = ["Your reply contained toxic content. Remove any harmful, abusive, or unsafe language."] + for i, failed_instruction in enumerate(failed_instructions, start=1): + confidence = failed_instruction.get("score", 0.0) * 100 + confidence_str = f"{confidence:.2f}%" + lines.append( + f"{i}. We are {confidence_str} confident that your response had the following issue:\n" + f"→ Violation: \"{failed_instruction.get('instruction', '[Unknown]')}\"\n" + f"→ Explanation: {failed_instruction.get('explanation', '[No explanation provided]')}\n" + ) + return "\n\n".join(lines) + except Exception as e: + logger.error(f"Error generating toxicity feedback: {e}") + return "" + + def get_reprompt_per_instruction(self, failed_instruction): + """ + Corrective feedback for a single failed instruction in the following format: + 1. We are X% confident that the following instruction was not followed: + → Violated Instruction: "..." + → Explanation: "..." + + Args: + failed_instruction (dict): Failed instruction data containing: + - 'instruction' (str) + - 'score' (float) + - 'explanation' (str) + + Returns: + str: Formatted feedback for the failed instruction. + """ + try: + confidence = (1.0 - failed_instruction.get("score", 0.0)) * 100 + confidence_str = f"{confidence:.2f}%" + return ( + f" We are {confidence_str} confident that the following instruction was not followed:\n" + f"→ Violated Instruction: \"{failed_instruction.get('instruction', '[Unknown]')}\"\n" + f"→ Explanation: {failed_instruction.get('explanation', '[No explanation provided]')}\n" + ) + except Exception as e: + logger.error(f"Error formatting failed instruction: {e}") + raise RuntimeError( + f"Corrective prompt generation failed: Unexpected error of type {type(e).__name__} — {e}" + ) from e + + def format_failed_instructions(self, failed_instructions, toxicity_feedback: str = None): + """ + Combine toxicity feedback with general failed instructions into a formatted block. + + Args: + failed_instructions (list): List of failed instruction dictionaries. + toxicity_feedback (str, optional): Pre-generated toxicity feedback block. + + Returns: + str: Combined formatted feedback string. + """ + lines = [] + if toxicity_feedback: + lines.append(toxicity_feedback) + if failed_instructions: + lines.append("Fix the following:") + for i, error in enumerate(failed_instructions, start=1): + lines.append(f"{i}. {self.get_reprompt_per_instruction(error)}") + if not lines: + return "No major issues." + return "\n\n".join(lines) + + def get_passed_instructions(self, result, aimon_payload): + """ + Retrieve instructions that passed all adherence and groundedness checks. + + Args: + result: AIMon detection result. + aimon_payload (dict): Original payload containing the full instruction list. + + Returns: + list: Passed instruction strings. + """ + try: + all_instructions = aimon_payload.get("instructions", []) + failed_instructions = {item["instruction"] for item in get_failed_instructions(result)} + return [instr for instr in all_instructions if instr not in failed_instructions] + except Exception as e: + logger.error(f"Error determining passed instructions: {e}") + return [] + + def format_passed_instructions(self, passed_instructions) -> str: + """ + Format passed instructions to reinforce adherence in the next iteration. + + Args: + passed_instructions (list): Passed instruction strings. + + Returns: + str: Formatted reminder block for passed instructions. + """ + if not passed_instructions: + return "" + return ( + "You did well on these instructions. It is important that you continue to follow these instructions:\n" + + "\n".join(f"- {instr}" for instr in passed_instructions) + ) + + def determine_tone(self, failed_count: int) -> str: + """ + Decide the corrective prompt tone based on failure severity: + + if failed instructions >= 3: + Your reply had major issues. Fix all points below. + if failed instructions between 2 and 3: + Some parts were off. Improve using the notes below. + if less than 2: + Almost there! Just a few small fixes needed. + + Args: + failed_count (int): Total number of failed instructions. + + Returns: + str: Tone-setting string for the corrective prompt. + """ + if failed_count >= 3: + return "Your reply had major issues. Fix all points below." + elif failed_count >= 2: + return "Some parts were off. Improve using the notes below." + else: + return "Almost there. Just a few small fixes needed." + diff --git a/aimon/reprompting_api/runner.py b/aimon/reprompting_api/runner.py new file mode 100644 index 0000000..4bb072f --- /dev/null +++ b/aimon/reprompting_api/runner.py @@ -0,0 +1,70 @@ +""" +runner.py — This module provides a high-level function (`run_reprompting_pipeline`) +for executing AIMon's iterative re-prompting workflow. + +This function is the primary entry point for developers and end-users. It: + - Normalizes inputs (replacing missing `system_prompt` or `context` with clear placeholders). + - Initializes the `RepromptingPipeline` with the provided configuration and LLM function. + - Runs the full re-prompting loop, generating an initial response, evaluating it, + and iteratively re-prompting until adherence criteria or stopping conditions are met. + +Contributors can extend this behavior by modifying `RepromptingPipeline` or `RepromptingConfig`. +""" +from typing import List, Optional +from aimon.reprompting_api.pipeline import RepromptingPipeline +from aimon.reprompting_api.config import RepromptingConfig + +def run_reprompting_pipeline( + llm_fn, + user_query: str, + system_prompt: str = None, + context:str = None, + user_instructions: List[str] = None, + reprompting_config: RepromptingConfig = None, +) -> dict: + """ + High-level wrapper for running the full AIMon re-prompting pipeline. + + This function prepares and normalizes all inputs, initializes the pipeline, + and executes the iterative re-prompting process. Missing `system_prompt` or + `context` values are replaced with clear placeholders (`"[no system prompt provided]"` + and `"[no context provided]"`) to ensure template consistency. + + Args: + llm_fn (Callable[[Template, str, str, str], str]): A function to call the LLM. Must accept a prompt template (recommended_prompt_template), + `system_prompt`, `context`, and `user_query`. + user_query (str): The user’s query. Must be a non-empty string. + system_prompt (str, optional): A system-level instruction string. Defaults to `"[no system prompt provided]"` if None or empty. + context (str, optional): Supplemental context for the LLM. Defaults to `"[no context provided]"` if None or empty. + user_instructions (List[str], optional): A list of instructions for the model to follow. Defaults to an empty list. + reprompting_config (RepromptingConfig, optional): Configuration object for controlling pipeline behavior. + + Returns: + dict: A structured dictionary containing: + - "best_response" (str): The final, best LLM response. + - "telemetry" (list, optional): Iteration-level telemetry if enabled in config. + - "summary" (str, optional): A human-readable summary of the process if enabled. + """ + + # Use the provided config or fall back to defaults + config = reprompting_config or RepromptingConfig() + + # validate llm_fn + if not callable(llm_fn): + raise TypeError("llm_fn must be a callable that returns a string.") + + if not user_query or not isinstance(user_query, str): + raise ValueError("user_query must be a non-empty string.") + + context = context if (context and isinstance(context, str)) else "[no context provided]" + system_prompt = system_prompt if (system_prompt and isinstance(system_prompt, str)) else "[no system prompt provided]" + + # initialize the re-prompting pipeline with the LLM function and configuration + pipeline = RepromptingPipeline(llm_fn=llm_fn, config=config) + + return pipeline.run( + system_prompt=system_prompt, + context=context, + user_query=user_query, + user_instructions=user_instructions or [] # Default to empty list if none provided + ) \ No newline at end of file diff --git a/aimon/reprompting_api/telemetry.py b/aimon/reprompting_api/telemetry.py new file mode 100644 index 0000000..1d408c2 --- /dev/null +++ b/aimon/reprompting_api/telemetry.py @@ -0,0 +1,75 @@ +import json +import uuid +from datetime import datetime + +class TelemetryLogger: + """ + A lightweight logger for recording telemetry events during re-prompting pipeline execution. + + Telemetry is stored in memory for retrieval and returned by the pipeline when requested. + """ + def __init__(self): + """Initialize an in-memory telemetry logger.""" + self.session_id = str(uuid.uuid4()) + self.memory_store = [] + + def emit( + self, + iteration: int, + cumulative_latency_ms: float, + scores: dict, + response_feedback: dict, + residual_error: float, + failed_instructions_count: int, + stop_reason: str, + response_text: str, + prompt: str = "", + ): + """ + Emit a single telemetry entry. + + Args: + iteration (int): The iteration number of the pipeline (starts at 1). + cumulative_latency_ms (float): Total latency from pipeline start (ms). + scores (dict): Evaluation scores (e.g., groundedness, instruction adherence). + response_feedback (dict): Feedback for failed instructions. + residual_error (int): Residual error score. + failed_instructions_count (int): Count of instructions not followed. + stop_reason (str): Reason for stopping or continuing. + response_text (str): The raw text response from the LLM. + prompt (str): The prompt text used for this iteration. + """ + telemetry = { + # not returned + "_timestamp": datetime.utcnow().isoformat() + "Z", + "_session_id": self.session_id, + # returned + "iteration": iteration, + "cumulative_latency_ms": cumulative_latency_ms, + "scores": scores, + "response_feedback": response_feedback, + "residual_error": residual_error, + "failed_instructions_count": failed_instructions_count, + "stop_reason": stop_reason, + "prompt_template": prompt, + "response_text": response_text, + } + self.memory_store.append(telemetry) + + def get_all(self, include_meta=False): + """ + Return all recorded telemetry entries. + + Args: + include_meta (bool): If True, includes session_id and timestamp. Defaults to False. + + Returns: + list: Telemetry entries, stripped of internal metadata unless requested. + """ + if include_meta: + return self.memory_store + # Strip out keys starting with "_" for external return + sanitized = [] + for entry in self.memory_store: + sanitized.append({k: v for k, v in entry.items() if not k.startswith("_")}) + return sanitized diff --git a/aimon/reprompting_api/utils.py b/aimon/reprompting_api/utils.py new file mode 100644 index 0000000..20740fc --- /dev/null +++ b/aimon/reprompting_api/utils.py @@ -0,0 +1,241 @@ +""" +utils.py — Utility functions for processing AIMon reprompting detector results. + +This module provides helper functions for: +- Extracting failed instructions across instruction adherence, groundedness, and toxicity detectors. +- Calculating a residual error score (0–1) for evaluating LLM responses. + +These utilities are primarily used by the RepromptingPipeline to: +- Build telemetry. +- Select best iterations. +- Guide corrective re-prompting logic. + +Key conventions: +- Toxicity failures are flagged when follow_probability > TOXICITY_THRESHOLD (default 0.25). +- Residual error scoring penalizes low follow probabilities more heavily and adds a flat penalty for any toxicity failures. +""" +from typing import Callable, Type, Union, Tuple, Optional, List +from functools import wraps +import logging +import random +import time + +def retry( + exception_to_check: Union[Type[BaseException], Tuple[Type[BaseException], ...]], + tries: int = 5, + delay: int = 3, + backoff: int = 2, + logger: Optional[logging.Logger] = None, + log_level: int = logging.WARNING, + re_raise: bool = True, + jitter: float = 0.1 +) -> Callable: + """ + Retry calling the decorated function using an exponential backoff. + :param exception_to_check: Exception or a tuple of exceptions to check. + :param tries: Number of times to try (not retry) before giving up. + :param delay: Initial delay between retries in seconds. + :param backoff: Backoff multiplier e.g., a value of 2 will double the delay each retry. + :param logger: Logger to use. If None, print. + :param log_level: Logging level. + :param re_raise: Whether to re-raise the exception after the last retry. + :param jitter: The maximum jitter to apply to the delay as a fraction of the delay. + """ + + def deco_retry(func: Callable) -> Callable: + @wraps(func) + def f_retry(*args, **kwargs): + remaining_tries, current_delay = tries, delay + while remaining_tries > 1: + try: + return func(*args, **kwargs) + except exception_to_check as e: + msg = f"{e}, Retrying in {current_delay} seconds..." + if logger: + logger.log(log_level, msg) + else: + print(msg) + time.sleep(current_delay * (1 + jitter * (2 * random.random() - 1))) + remaining_tries -= 1 + current_delay *= backoff + try: + return func(*args, **kwargs) + except exception_to_check as e: + msg = f"Failed after {tries} tries. {e}" + if logger: + logger.log(log_level, msg) + else: + print(msg) + if re_raise: + raise + return f_retry + return deco_retry + +# toxicity threshold for AIMon detection; Follow probabilities above this are considered failures +TOXICITY_THRESHOLD = 0.25 + +def _count_toxicity_failures(result) -> int: + """ + Count the number of toxicity instructions whose follow probability exceeds the threshold. + + Args: + result: AIMon detection result containing a `toxicity` section. + + Returns: + int: Number of failed toxicity instructions. + """ + return sum( + 1 + for inst in result.detect_response.toxicity.get("instructions_list", []) + if inst.get("follow_probability", 0.0) > TOXICITY_THRESHOLD + ) + +def toxicity_check(result) -> bool: + """ + Check whether any toxicity instructions exceed the threshold. + + Args: + result: AIMon detection result containing a `toxicity` section. + + Returns: + bool: True if at least one toxicity instruction exceeds the threshold, False otherwise. + """ + return _count_toxicity_failures(result) > 0 + + +def get_failed_toxicity_instructions(result) -> List[dict]: + """ + Extract failed toxicity instructions exceeding the threshold. + + Args: + result: AIMon detection result containing a `toxicity` section. + + Returns: + List[dict]: A list of dictionaries, each describing a failed toxicity instruction with: + - type (str): "toxicity_failure" + - source (str): "toxicity" + - instruction (str): The instruction text. + - score (float): The follow probability. + - explanation (str): The explanation for the failure. + """ + failed = [] + for inst in result.detect_response.toxicity.get("instructions_list", []): + if inst.get("follow_probability", 0.0) > TOXICITY_THRESHOLD: + failed.append({ + "type": "toxicity_failure", + "source": "toxicity", + "instruction": inst.get("instruction", ""), + "score": inst.get("follow_probability", 0.0), + "explanation": inst.get("explanation", "") + }) + return failed + +def get_failed_instructions(result) -> List[dict]: + """ + Extract all failed instructions from adherence, groundedness, and toxicity detectors. + + Args: + result: AIMon detection result containing `instruction_adherence`, `groundedness`, and `toxicity` sections. + + Returns: + List[dict]: A list of failed instructions with: + - type (str): Failure type ("instruction_adherence_failure", "groundedness_failure", "toxicity_failure"). + - source (str): Detector source ("instruction_adherence", "groundedness", "toxicity"). + - instruction (str): The instruction text. + - score (float): Follow probability. + - explanation (str): Explanation for the failure. + """ + failed = [] + # Adherence & groundedness + for source in ["instruction_adherence", "groundedness"]: + for inst in getattr(result.detect_response, source, {}).get("instructions_list", []): + if not inst.get("label", True): + failed.append({ + "type": f"{source}_failure", + "source": source, + "instruction": inst.get("instruction", ""), + "score": inst.get("follow_probability", 0.0), + "explanation": inst.get("explanation", "") + }) + # Sort by score (most confident first) + failed.sort(key=lambda x: x["score"], reverse=True) + return failed + +def get_failed_instructions_count(result) -> int: + """ + Count all failed instructions across adherence, groundedness, and toxicity. + + Args: + result: AIMon detection result containing `instruction_adherence`, `groundedness`, and `toxicity` sections. + + Returns: + int: Total number of failed instructions. + """ + count = 0 + # Instruction adherence + for inst in result.detect_response.instruction_adherence.get("instructions_list", []): + if not inst.get("label", True): + count += 1 + # Groundedness + for inst in result.detect_response.groundedness.get("instructions_list", []): + if not inst.get("label", True): + count += 1 + count += _count_toxicity_failures(result) # Toxicity + return count + +def get_residual_error_score(result): + """ + Compute a normalized residual error score (0–1) based on: + - Groundedness follow probabilities + - Instruction adherence follow probabilities + - Toxicity (inverted: 1 - follow_probability) + + Logic: + 1. Collect follow probabilities for groundedness & adherence. + 2. For toxicity, use 1 - follow_probability (since high follow = low error). + 3. Compute a penalized average using the helper. + 4. Clamp the final score to [0,1]. + """ + combined_probs = [] + + for source in ["groundedness", "instruction_adherence"]: + combined_probs.extend([ + item["follow_probability"] + for item in getattr(result.detect_response, source, {}).get("instructions_list", []) + ]) + + # For toxicity, invert the follow probability + combined_probs.extend([ + 1 - item["follow_probability"] + for item in getattr(result.detect_response, "toxicity", {}).get("instructions_list", []) + ]) + + residual_error_score = penalized_average(combined_probs) if combined_probs else 0.0 + residual_error_score = min(1.0, max(0.0, residual_error_score)) + return round(residual_error_score, 2) + + +def penalized_average(probs: List[float]) -> float: + """ + Compute a penalized average of follow probabilities. + + Penalizes probabilities <0.5 more heavily by doubling their penalty. + Probabilities > 0.5 (passed instructions) recieve no penalty + + Args: + probs (List[float]): A list of follow probabilities. Must be non-empty. + + Returns: + float: Penalized average. Return -1 if probs is empty. + """ + if not probs: # handle division by zero for empty list + return -1 + + penalties = [] + for p in probs: + if p >= 0.5: + penalty = 0 + else: + penalty = (1 - p) * 2 # heavier penalty + penalties.append(penalty) + return sum(penalties) / len(penalties) \ No newline at end of file diff --git a/tests/test_reprompting_cases.py b/tests/test_reprompting_cases.py new file mode 100644 index 0000000..b913bf6 --- /dev/null +++ b/tests/test_reprompting_cases.py @@ -0,0 +1,181 @@ +import os +import pytest +from string import Template +from aimon.reprompting_api.config import RepromptingConfig +from aimon.reprompting_api.runner import run_reprompting_pipeline + +AIMON_API_KEY = os.environ.get("AIMON_API_KEY") + +# --- Fixtures --- + +@pytest.fixture +def my_llm(): + """Mock LLM function for integration tests. Prints prompts and responses.""" + def _my_llm(recommended_prompt_template: Template, system_prompt, context, user_query) -> str: + filled_prompt = recommended_prompt_template.safe_substitute( + system_prompt=system_prompt or "", + context=context or "", + user_query=user_query or "" + ) + return filled_prompt + return _my_llm + +@pytest.fixture +def base_config(): + return RepromptingConfig( + aimon_api_key=AIMON_API_KEY, + publish=False, + return_telemetry=True, + return_aimon_summary=True, + application_name="api_test", + max_iterations=2, + ) + +@pytest.fixture +def config_without_telemetry(): + return RepromptingConfig( + aimon_api_key=AIMON_API_KEY, + publish=False, + return_telemetry=False, + return_aimon_summary=False, + application_name="api_test", + max_iterations=2, + ) + +@pytest.fixture +def config_low_latency(): + return RepromptingConfig( + aimon_api_key=AIMON_API_KEY, + publish=False, + return_telemetry=True, + return_aimon_summary=True, + application_name="api_test", + max_iterations=2, + latency_limit_ms=100 + ) + +@pytest.fixture +def config_high_latency(): + return RepromptingConfig( + aimon_api_key=AIMON_API_KEY, + publish=False, + return_telemetry=True, + return_aimon_summary=True, + application_name="api_test", + max_iterations=3, + latency_limit_ms=5000 + ) + +@pytest.fixture +def config_iteration_limit(): + return RepromptingConfig( + aimon_api_key=AIMON_API_KEY, + publish=False, + return_telemetry=True, + return_aimon_summary=True, + application_name="api_test", + max_iterations=1, + ) + +# --- Helper to print results nicely --- +def print_result(test_name, result): + print(f"\n===== RESULTS FOR: {test_name} =====", flush=True) + print("\n==== BEST RESPONSE ====", flush=True) + print(result.get("best_response"), flush=True) + print("\n==== TELEMETRY ====", flush=True) + print(result.get("telemetry"), flush=True) + print("\n==== SUMMARY ====", flush=True) + print(result.get("summary"), flush=True) + print("===== END OF RESULT =====\n", flush=True) + +# --- Tests --- + +def test_low_latency_limit(my_llm, config_low_latency): + """Test stopping behavior when latency limit is very low (100ms).""" + result = run_reprompting_pipeline( + user_query="We just received a Cal/OSHA citation for emergency workplace safety violations with a 15-day correction deadline. What are our options to appeal or resolve this without facing business closure?", + context="(Form DOSH-TRN-2025) [SECTION] Section 4: Consequences of Non-Compliance [SECTION] Failing to act may result in: [SECTION] - Daily penalties of $500 per violation after deadline [SECTION] - Business closure orders for willful violations [SECTION] - 300% penalty enhancements for repeat offenses [SECTION] Section 5: Recommended Resolution Path [SECTION] For fastest resolution: [SECTION] 1. Correct all hazards within 10 days [SECTION] 2. Submit Form DOSH-RESP-2025 with evidence [SECTION] 3. Request compliance verification inspection [SECTION] Remember: Appeal filings don’t suspend correction deadlines—address hazards immediately while preserving your appeal rights.[SECTION] California Business Emergency Preparedness Compliance Guide – 2025 Edition [SECTION] Document Ref: CA-BEP-2025-09 / Effective July 1, 2025 [SECTION] Section 1: Mandatory Earthquake Preparedness Protocol [SECTION] All California businesses with 10+ employees must maintain an approved earthquake readiness plan under CA Labor Code §6401.7. Don't worry—we'll walk you through each requirement step by step. [SECTION] 1. Plan Submission: [SECTION] - Complete Form BEP-22 (Earthquake Preparedness Certification) [SECTION] - Submit via CalOES Business Portal or mail to: [SECTION] Office of Emergency Services [SECTION] Business Compliance Division [SECTION] P.O. Box 419047 [SECTION] Sacramento, CA 95841 [SECTION] - Deadline: Within 30 days of plan creation or update [SECTION] 2. Employee Training: [SECTION] - Conduct quarterly drills using state-approved materials (Reference Guide BEP-TM-2025) [SECTION] - Maintain signed attendance records (Form BEP-23) for 3 years [SECTION] - New hires must complete training within 14 days of employment [SECTION] 3. Emergency Supplies: [SECTION] - Minimum 3-day water supply (1 gallon per person per day) [SECTION] - First aid kits meeting ANSI/ISEA Z308.1-2025 standards [SECTION] - Emergency lighting for all exits (tested monthly) [SECTION] - Battery-powered NOAA weather radio [SECTION] Section 2: Compliance Verification Process [SECTION] To schedule verification: [SECTION] 1. Create account on CalOES Business Portal (portal.caloes.ca.gov) [SECTION] 2. Submit Inspection Request Form BEP-30 [SECTION] 3. Pay verification fee: [SECTION] - $150 standard fee [SECTION] - $75 for businesses with 10-24 employees [SECTION] - Waived for non-profits with Form BEP-31 [SECTION] 4. Prepare for inspection: [SECTION] - Printed copy of Form BEP-22 submission receipt [SECTION] - Training records (Forms BEP-23 and BEP-23A) [SECTION] - Current supply inventory log (Form BEP-24) [SECTION] Section 3: Non-Compliance Penalties [SECTION] - First violation: $500 fine + mandatory corrective action plan (Form BEP-40) [SECTION] - Subsequent violations: $1,000 per occurrence [SECTION] - Willful neglect: Up to $5,000 + business license suspension [SECTION] - False certification: Misdemeanor charge under CA Business Code §17200 [SECTION] Section 6: Penalties for Non-Compliance [SECTION] Failure to properly report may result in: [SECTION] - Fines up to $25,000 per violation under Labor Code §6429. [SECTION] - Business license suspension for repeat offenses. [SECTION] - Potential criminal charges for willful violations causing harm. [SECTION] Section 7: Additional Resources [SECTION] - Cal/OSHA Spill Response Guidelines: dir.ca.gov/dosh/chemical-spills.html [SECTION] - 24/7 Emergency Consultation: 1-800-963-9424 [SECTION] - Small Business Compliance Assistance: 1-800-400-7115[SECTION] California Business Energy Efficiency Installation Guide – 2025 Edition [SECTION] ## Section 1: Understanding Your CBESP Installation Requirements [SECTION] You're asking about installing energy efficiency equipment under the CBESP program—let's walk through this step by step. The process is straightforward when you follow these guidelines. We recommend starting with a certified site assessment to maximize your benefits. [SECTION] ## Section 2: Pre-Installation Checklist [SECTION] Before installation begins: [SECTION] 1. Verify Equipment Eligibility: Check the CBESP-approved equipment list (available at cbesp.ca.gov/equipment) for your business category. For example, commercial kitchens must use NSF-certified refrigeration units. [SECTION] 2. Schedule Site Assessment: Contact a CBESP-certified assessor within 5 business days—this helps identify the best equipment placement and potential rebates. [SECTION] 3. Submit Installation Plan: Complete Form CBESP-IP-2025 with details like equipment specifications and safety protocols. [SECTION] ## Section 3: Installation Protocol [SECTION] Follow these key steps: [SECTION] 1. Power Isolation: Shut off circuits following standard safety procedures (typically 1-2 hours for most facilities). [SECTION] 2. Equipment Mounting: Use manufacturer-specified brackets with proper bracing in seismic zones. [SECTION] 3. Wiring Compliance: Follow all electrical safety requirements—consult an electrician if unsure. [SECTION] ## Section 4: Post-Installation Process [SECTION] 1. Functional Testing: Run diagnostic cycles for 24-48 hours to confirm everything works properly. [SECTION] 2. Documentation: Submit Form ICF-2025 within 3 business days of completion. [SECTION] 3. Inspector Scheduling: Request CBESP verification within 14 calendar days to avoid reinspection fees. [SECTION] ## Section 5: Important Deadlines [SECTION] - Assessment scheduling: Within 5 business days of equipment purchase [SECTION] - Form submission: 3 business days post-installation [SECTION] - Verification request: Within 14 calendar days of completion [SECTION] ## Section 6: Non-Compliance Consequences [SECTION] Not following procedures may result in:SECTION] 3. Referral to the California Franchise Tax Board for collection [SECTION] Section 5: Urgent Support Resources [SECTION] For time-sensitive applications: [SECTION] 1. Call the Disaster Relief Hotline: 1-800-CA-BIZ-AID (option 2 for expedited processing) [SECTION] 2. Email emergency@ca-bfa.gov with subject line URGENT: [Your Business Name] [SECTION] 3. Visit designated disaster recovery centers (list at ca-bfa.gov/locations) [SECTION] Note: Applications submitted after the 14-day window will be processed under standard timelines (4–6 weeks).[SECTION] California Business Licensing Compliance Guide – 2025 Edition [SECTION] Document Ref: CA-BLC-2025-12 / Effective July 1, 2025 [SECTION] Section 1: Renewal Notice Verification [SECTION] 1. Submit Form BLC-ADDR-2025 through our online portal or by mail to verify your address on file. Processing takes 3 business days. [SECTION] 2. If your address is correct but the notice hasn't arrived, request a duplicate notice by calling 1-800-CA-BIZLIC or visiting any Regional Business License Center. [SECTION] 3. For urgent renewals, submit Form BLC-RENEW-2025 with a written explanation of the missing notice. [SECTION] Section 2: Required Renewal Documentation [SECTION] To complete your renewal, you must provide: [SECTION] 1. Form BLC-RENEW-2025 with Sections A, C, and E completed [SECTION] 2. Your current business license number from prior correspondence [SECTION] 3. Proof of local zoning compliance (Form ZON-2025 or equivalent) [SECTION] Section 3: Late Renewal Procedures [SECTION] Renewals submitted within 30 days of expiration require: [SECTION] 1. Payment of $50 late fee via Form BLC-LATE-2025 [SECTION] 2. Submission of all standard renewal documents [SECTION] 3. Written justification for delay [SECTION] After 30 days, you must submit Form BLC-NEW-2025 for full reapplication. [SECTION] Section 4: Operating Without Renewal [SECTION] Continuing business operations with an expired license may result in: [SECTION] 1. Fines of $500 per violation [SECTION] 2. Mandatory compliance review (Form BLC-COMP-2025) [SECTION] 3. Potential license suspension for repeated violations", + llm_fn=my_llm, + reprompting_config=config_low_latency, + user_instructions=["Use sentence structures that sound natural in spoken English, avoiding overly formal or stilted constructions.","Do not suggest actions that bypass or undermine official company processes (e.g., “You could just submit a new request under a different name”); mentioning formal appeal or exception procedures is acceptable if supported by context."] + ) + print_result("Low Latency Limit Test (100ms)", result) + assert "best_response" in result + +def test_latency_limit(my_llm, config_high_latency): + """Test behavior with a high latency limit and contradictory instructions.""" + result = run_reprompting_pipeline( + user_query="I manage data compliance for a healthcare startup using CloudSync's Enterprise tier. We need to ensure all patient-related files are automatically deleted after 7 years to meet HIPAA requirements, while maintaining the ability to recover accidentally deleted files for at least 30 days. Can CloudSync support this workflow, and what configuration changes would we need to make?", + context="[SECTION] # Data Retention and Privacy Policy for CloudSync Services [SECTION] ## Overview [SECTION] This document outlines the data retention, storage, and privacy practices for CloudSync, a cloud-based file synchronization service. It applies to all users of the Free, Pro, and Enterprise tiers unless superseded by a signed Enterprise Agreement. [SECTION] ## Data Retention Periods [SECTION] - **Active Accounts:** User data is retained indefinitely unless manually deleted by the user or via automated compliance workflows (e.g., legal hold expiration). [SECTION] - **Inactive Accounts:** Accounts with no login activity for 24 months are flagged for review. After 30 days in review status, data is permanently deleted unless an extension is requested via Form CS-102. [SECTION] - **Deleted Files:** Files removed from the trash bin are retained in a recoverable state for 30 days. After this period, they are irreversibly purged from all systems. [SECTION] ## Privacy Controls [SECTION] Users can configure: [SECTION] - Automatic deletion of files older than a specified age (1, 3, or 5 years) [SECTION] - Geographic restrictions for data storage (US, EU, or APAC regions) [SECTION] - Access logs retention (default 90 days, extendable to 365 days for Enterprise) [SECTION] ## Legal and Compliance [SECTION] - Data may be retained beyond standard periods if subject to litigation hold, subpoena, or regulatory investigation. Users will be notified unless prohibited by law. [SECTION] - Third-party audits are conducted annually for SOC 2 Type II compliance. Reports available upon request for Enterprise customers. [SECTION] - Data sovereignty requirements are enforced at the storage layer; metadata may transit through global networks for synchronization purposes. [SECTION] ## Data Recovery [SECTION] - Enterprise accounts can request point-in-time recovery for entire accounts (14-day window) or individual files (30-day window). [SECTION] - Recovery operations incur a $150 base fee plus $0.03/GB for data restored. [SECTION] ## Policy Changes [SECTION] - Notice of material changes will be provided 60 days in advance via email and in-app notifications. [SECTION] - Continued use of the service after changes take effect constitutes acceptance of the new terms.", + llm_fn=my_llm, + reprompting_config=config_high_latency, + user_instructions=["Do not use the letter e", "Only use the letter e"] + ) + print_result("High Latency Limit Test (5000ms)", result) + assert "best_response" in result + +def test_iteration_limit(my_llm, config_iteration_limit): + """Test behavior when max_iterations is 1.""" + result = run_reprompting_pipeline( + user_query="Our systems are showing vulnerability alerts but we can't find the patch file in the vendor portal. What should we do?", + system_prompt="Keep the tone professionally neutral by avoiding emotionally charged words, exclamations, or informal phrases (e.g., awesome, ugh, or emojis)", + context="[SECTION] # Emergency Software Patch Installation Guide [SECTION] ## Critical Security Patch Notification – CVE-2025-1234 [SECTION] This notice applies to all systems running versions prior to 3.2.1. Immediate action is required under Cybersecurity Policy CP-2025-07 to address authentication vulnerabilities. [SECTION] ## Patch Acquisition Procedures [SECTION] 1. Verify your system meets the requirements using Form LIC-VER-15 [SECTION] 2. Access the Emergency Patch Portal at epp.vendor.com/alert/CVE-2025-1234 [SECTION] 3. If the patch isn't available, submit Form PATCH-REQ-22 for manual distribution [SECTION] ## Installation Process [SECTION] We understand urgent updates can be stressful—here's how to proceed safely: [SECTION] 1. First, create a complete system backup [SECTION] 2. Run the patch installer with administrator privileges [SECTION] 3. Check the system logs to confirm successful installation [SECTION] ## Post-Installation Steps [SECTION] - Submit Form EP-ACK-22 within 24 hours [SECTION] - Retain installation records for 90 days [SECTION] - Schedule a security scan within 7 days [SECTION] ## Support Options [SECTION] - 24/7 Technical Support: 1-800-PATCH-HELP [SECTION] - Priority assistance: Submit Form IRF-89 with CRITICAL flag", + llm_fn=my_llm, + reprompting_config=config_iteration_limit, + user_instructions=["do not use the letter e","only use the letter e"] + ) + print_result("Iteration Limit Test (no re-prompting, only 1 iteration allowed)", result) + assert "best_response" in result + +def test_empty_context_and_instructions(my_llm, base_config): + """Ensure pipeline works with no context, instructions, or system prompt.""" + result = run_reprompting_pipeline( + user_query="Testing with empty context, instructions, and system prompt", + context="", + llm_fn=my_llm, + reprompting_config=base_config, + user_instructions=[] + ) + print_result("Empty Context & Instructions Test", result) + assert "best_response" in result + +def test_no_telemetry(my_llm, config_without_telemetry): + """Confirm telemetry and summary are excluded when disabled in config.""" + result = run_reprompting_pipeline( + user_query="I keep getting 401 errors when trying to connect to your API. What should I check first?", + context="[SECTION] API Integration Troubleshooting Guide – Version 2.1 [SECTION] Document Ref: API-TS-2025-07 / Issued March 2025 [SECTION] Step 1: Verify Authentication Details [SECTION] To resolve 401 errors, first check these key items: [SECTION] - Ensure your API key has exactly 32 characters [SECTION] - Confirm the key is active in your Developer Portal account [SECTION] - Check that your IP address is whitelisted if required [SECTION] Step 2: Gather Required Information [SECTION] For support cases, prepare: [SECTION] - Screenshot of the error message [SECTION] - Recent API call logs [SECTION] - Your account ID and integration details [SECTION] Step 3: Submit Support Request [SECTION] You can submit your request through: [SECTION] - The Developer Portal ticket system (fastest response) [SECTION] - Email to api-support@company.com with 401 Error in subject [SECTION] Typical response time is 2 business days. [SECTION] Step 4: After Resolution [SECTION] Once fixed: [SECTION] - Update your integration settings [SECTION] - Keep records of the troubleshooting process [SECTION] For immediate help: [SECTION] Call 1-800-API-HELP (24/7 for Priority customers) [SECTION] Email: api-support@company.com[SECTION] API Rate Limit and Throttling Policy – 2025 Update [SECTION] Document Ref: API-POL-2025-07 / Effective March 2025 [SECTION] Section 1: Standard Rate Limits [SECTION] The following rate limits apply to all API endpoints unless otherwise specified in your service tier agreement: [SECTION] - Free Tier: 100 requests per minute, 1,000 requests per day [SECTION] - Business Tier: 500 requests per minute, 10,000 requests per day [SECTION] - Enterprise Tier: Custom limits negotiated per contract [SECTION] Section 2: Throttling Behavior [SECTION] When limits are exceeded: [SECTION] 1. First violation: API returns HTTP 429 (Too Many Requests) with Retry-After header [SECTION] 2. Subsequent violations within 24 hours: Temporary suspension for 1 hour [SECTION] 3. Chronic violations (3+ in 7 days): Account review and potential permanent rate reduction [SECTION] Section 3: Best Practices for Avoiding Throttling [SECTION] To maintain optimal API performance: [SECTION] 1. Implement exponential backoff when receiving 429 responses [SECTION] 2. Cache responses where possible (ETag headers supported on all GET endpoints) [SECTION] 3. Use batch endpoints instead of individual calls for bulk operations [SECTION] 4. Monitor usage via the X-RateLimit-Remaining header [SECTION] Section 4: Consequences of Policy Violations [SECTION] Repeated throttling may result in: [SECTION] 1. Temporary API key revocation [SECTION] 2. Mandatory migration to higher service tier [SECTION] 3. Suspension of account privileges pending review [SECTION] Section 5: Monitoring and Alerts [SECTION] Configure usage alerts through: [SECTION] 1. Dashboard notifications (available in Account Settings) [SECTION] 2. Webhook integrations (documented in API Guide Section 12.4) [SECTION] 3. Email warnings at 75% and 90% of daily limits[SECTION] API Rate Limit and Throttling Policy – Version 2.1 [SECTION] Effective Date: March 2025 [SECTION] This document outlines the rate limits, throttling policies, and escalation procedures for the Enterprise API tier. All API calls are subject to these limits unless otherwise specified in a signed Service Level Agreement (SLA). [SECTION] ## Rate Limit Tiers [SECTION] - **Standard Tier:** 1,000 requests per minute (RPM) across all endpoints [SECTION] - **High-Capacity Tier:** 5,000 RPM, available for an additional $200/month fee [SECTION] - **Burst Capacity:** Temporary spikes up to 2x your tier limit for 5-minute intervals, max twice per hour [SECTION] ## Throttling Behavior [SECTION] When limits are exceeded: [SECTION] - First violation: API returns HTTP 429 with Retry-After header (typically 30 seconds) [SECTION] - Repeated violations within 1 hour: 15-minute cool-down period enforced [SECTION] - Chronic violations (3+ incidents/day): Account review and potential downgrade to Standard Tier [SECTION] ## Urgent Limit Increase Process [SECTION] Follow these steps to resolve your rate limit issue: [SECTION] 1. **Submit Request:** Log into the API Dashboard and navigate to Manage Quotas [SECTION] 2. **Select Priority:** Choose Urgent and provide: [SECTION] - Business justification for the increase [SECTION] - Expected call volume during peak hours [SECTION] - Required duration (maximum 72 hours) [SECTION] 3. **Payment:** Submit the $75 expedited processing fee [SECTION] 4. **Activation:** Approved increases take effect within 2 hours of submissionSECTION] ## Consequences of Not Following Process [SECTION] - Unapproved workarounds may trigger account suspension [SECTION] - Repeated urgent requests may require SLA upgrade [SECTION] - All limit changes are logged and audited [SECTION] ## Recommended Action [SECTION] For immediate relief while waiting for approval: [SECTION] - Implement client-side retry logic with exponential backoff [SECTION] - Distribute calls across multiple API keys if available [SECTION] - Schedule non-critical requests during off-peak hours [SECTION] ## Support Resources [SECTION] - Emergency Support: api-support@example.com (Subject: URGENT - Rate Limit) [SECTION] - Real-time Monitoring: api.example.com/status [SECTION] - Documentation: docs.example.com/api/rate-limits [SECTION] ## Note on Eligibility [SECTION] Temporary increases are granted based on system capacity and historical usage patterns. Approval is not guaranteed for accounts with frequent violation history.[SECTION] API Rate Limit and Throttling Policy – 2025 Update [SECTION] Document Ref: API-POL-2025-07 / Effective March 2025 [SECTION] Section 1: Standard Rate Limits [SECTION] The following rate limits apply to all API endpoints unless otherwise specified in your service tier agreement: [SECTION] - Free Tier: 100 requests per minute, 1,000 requests per day [SECTION] - Business Tier: 500 requests per minute, 10,000 requests per day [SECTION] - Enterprise Tier: Custom limits negotiated per contract [SECTION] Section 2: Throttling Behavior [SECTION] When limits are exceeded: [SECTION] 1. First violation: API returns HTTP 429 (Too Many Requests) with Retry-After header [SECTION] 2. Subsequent violations within 24 hours: Temporary suspension for 1 hour [SECTION] 3. Chronic violations (3+ in 7 days): Account review and potential permanent rate reduction [SECTION] Section 3: Best Practices for Avoiding Throttling [SECTION] To maintain optimal API performance: [SECTION] 1. Implement exponential backoff when receiving 429 responses [SECTION] 2. Cache responses where possible (ETag headers supported on all GET endpoints) [SECTION] 3. Use batch endpoints instead of individual calls for bulk operations [SECTION] 4. Monitor usage via the X-RateLimit-Remaining header [SECTION] Section 4: Consequences of Policy Violations [SECTION] Repeated throttling may result in: [SECTION] 1. Temporary API key revocation [SECTION] 2. Mandatory migration to higher service tier [SECTION] 3. Suspension of account privileges pending review [SECTION] Section 5: Monitoring and Alerts [SECTION] Configure usage alerts through: [SECTION] 1. Dashboard notifications (available in Account Settings) [SECTION] 2. Webhook integrations (documented in API Guide Section 12.4) [SECTION] 3. Email warnings at 75% and 90% of daily limits[SECTION] API Rate Limit Enforcement Policy – California Department of Technology (2025 Revision) [SECTION] Document Ref: CDT-API-2025-09 / Effective March 1, 2025 [SECTION] Section 1: Rate Limit Thresholds [SECTION] All API endpoints enforce the following limits per client IP: [SECTION] - Standard Tier: 100 requests per minute [SECTION] - Elevated Tier: 500 requests per minute (requires Form API-T2 submitted 5 business days in advance) [SECTION] - Emergency Tier: 1,000 requests per minute (requires Form API-EMG with justification; valid for 72 hours) [SECTION] Section 2: Violation Consequences [SECTION] Exceeding rate limits triggers these automated responses: [SECTION] 1. First violation: HTTP 429 response with Retry-After header (60 seconds) [SECTION] 2. Second violation within 24 hours: 15-minute suspension [SECTION] 3. Third violation within 7 days: Account review and potential permanent blacklisting [SECTION] Section 3: Immediate Resolution Steps [SECTION] If your API access is suspended: [SECTION] 1. Check your request logs for spikes using CDT API Monitor (Form API-MON required for access) [SECTION] 2. Implement exponential backoff with jitter in your client code [SECTION] 3. Submit Form API-RES with: [SECTION] a. Incident timeline [SECTION] b. Corrective action plan [SECTION] c. Client IP ranges needing whitelisting [SECTION] Section 4: Permanent Blacklist Appeals [SECTION] To contest a blacklisting decision: [SECTION] 1. File Form API-APL within 10 business days [SECTION] 2. Provide technical documentation proving compliance with: [SECTION] a. California Code §11546.45 (API Fair Use) [SECTION] b. CDT Technical Bulletin 2025-7 (Throttling Best Practices) [SECTION] 3. Await review by the API Governance Board (5-10 business days) [SECTION] Note: Emergency service applications may qualify for expedited review if submitting Form API-EMG with wildfire response documentation.[SECTION] API Rate Limit Policy – Enterprise Developer Portal [SECTION] Document Ref: API-POL-2025-03 / Effective June 2025 [SECTION] Section 1: Understanding Your Rate Limit Issue [SECTION] Your application hitting rate limits during peak hours is a common challenge. Let's walk through the steps to resolve this while maintaining API access. [SECTION] Section 2: Immediate Actions [SECTION] 1. Review your API-LOG-78 reports to identify: [SECTION] * Which endpoints are exceeding limits [SECTION] * Time patterns of high traffic [SECTION] 2. Implement request throttling per Technical Bulletin API-TB-104 [SECTION] 3. For temporary relief, submit Form API-RL-22 for a 72-hour limit increase [SECTION] Section 3: Long-Term Solutions [SECTION] To permanently increase your rate limits: [SECTION] 1. Submit Form API-HC-45 with: [SECTION] * Business justification for higher limits [SECTION] * 30 days of API-LOG-78 reports [SECTION] 2. Our team will review within 5 business days [SECTION] Section 4: Best Practices [SECTION] * Spread high-volume requests evenly throughout the day [SECTION] * Cache responses where possible [SECTION] * Consider upgrading to Enterprise Tier (5,000 RPM) [SECTION] Section 5: What to Avoid [SECTION] Repeated violations may lead to: [SECTION] * Temporary API key suspension [SECTION] * Mandatory compliance review (Form API-COMP-19) [SECTION] * Service tier downgrade [SECTION] Section 6: Recommended Next Steps [SECTION] We recommend starting with Form API-RL-22 for immediate relief while preparing your materials for a permanent tier upgrade. Our support team is available to review your API-LOG-78 reports if needed.[SECTION] API Service Level Agreement – Version 2.1 [SECTION] Effective Date: March 15, 2025 [SECTION] Section 1: Service Availability Standards [SECTION] The API maintains 99.9% monthly uptime excluding scheduled maintenance. Downtime incidents exceeding 30 consecutive minutes qualify for service credit compensation under Section 7. [SECTION] Section 2: Incident Reporting Protocol [SECTION] To report 503 errors: [SECTION] 1. Document the incident with: [SECTION] - Exact timestamps (UTC) [SECTION] - Affected endpoint URLs [SECTION] - HTTP response headers [SECTION] 2. Complete Form API-INC-2025 (available in Developer Portal > Support) [SECTION] - Attach redacted error logs [SECTION] - Include business impact assessment [SECTION] 3. Submit within 60 minutes of first occurrence for priority handling [SECTION] Section 3: Investigation Timeline [SECTION] Upon submission: [SECTION] 1. Initial response within 15 minutes (email confirmation with Case ID) [SECTION] 2. Severity assessment using Priority Matrix API-PM-2025 within 30 minutes [SECTION] 3. Hourly status updates posted to Case ID portal [SECTION] Section 4: Resolution Procedures [SECTION] For confirmed outages: [SECTION] 1. Emergency patch deployment within 4 hours for Severity 1 incidents [SECTION] 2. Full root cause analysis report within 3 business days [SECTION] 3. Post-mortem review available upon request (Form API-PM-2025) [SECTION] Section 5: Compensation Policy [SECTION] Service credits apply as follows: [SECTION] - 5% of monthly fee for 30-59 minutes downtime [SECTION] - 10% for 1-2 hours [SECTION] - 15% for 2-4 hours [SECTION] Note: Credits require submission of Form API-CR-2025 within 7 calendar days. [SECTION] Section 6: Penalties for False Reports [SECTION] Misrepresented incidents may result in: [SECTION] - Suspension of incident reporting privileges for 30 days [SECTION] - Forfeiture of accrued service credits [SECTION] - Administrative fee of $250 per invalid claim [SECTION] Section 7: Emergency Contacts [SECTION] For unresolved Severity 1 incidents after 2 hours: [SECTION] 1. Primary: api-emergency@company.com (monitored 24/7) [SECTION] 2. Secondary: +1-800-555-API1 (follow voice prompts for engineer dispatch) [SECTION] 3. Escalation: File Form API-ESC-2025 with VP of Engineering CC [SECTION] Section 8: Preventive Measures [SECTION] Recommended best practices: [SECTION] - Implement exponential backoff with jitter [SECTION] - Monitor status.company.com for real-time updates [SECTION] - Maintain fallback endpoints per API-FB-2025 guidelines[SECTION] API Usage and Rate Limit Policy – Version 2025.1 [SECTION] We understand hitting rate limits can disrupt your workflow—let's review your options to resolve this. [SECTION] SECTION 1: CURRENT LIMITS AND UPGRADE PATHS [SECTION] Your Free Tier currently allows: [SECTION] - 100 requests per minute [SECTION] - 5,000 requests per day [SECTION] To increase these limits, consider: [SECTION] Option 1: Basic Tier ($49/month) [SECTION] - 500 requests per minute [SECTION] - 25,000 requests per day [SECTION] - Immediate activation via Developer Portal > Billing [SECTION] Option 2: Enterprise Tier [SECTION] - Custom limits tailored to your needs [SECTION] - Requires signed agreement (Form API-ENT-2025) [SECTION] - Contact sales@company.com for consultation [SECTION] SECTION 2: TEMPORARY LIMIT INCREASE [SECTION] If you need short-term relief: [SECTION] 1. Submit Form API-LIMIT-REQ through Developer Portal [SECTION] 2. Provide: [SECTION] - Technical justification (e.g., expected traffic spikes) [SECTION] - Duration needed (max 30 days) [SECTION] 3. Typical approval time: 2 business days [SECTION] SECTION 3: AVOIDING FUTURE ISSUES [SECTION] Best practices we recommend: [SECTION] - Implement exponential backoff (sample code in Dev Docs Section 4.3) [SECTION] - Cache responses using ETag headers [SECTION] - Monitor usage via Dashboard > API Analytics [SECTION] SECTION 4: IMPORTANT NOTES [SECTION] - Repeated violations (5+/month) may trigger account review [SECTION] - Emergency after-hours support: Submit Priority Ticket with 'Rate Limit' tag [SECTION] - Full policy details available in Document API-POL-2025.1 [SECTION] SECTION 5: IMMEDIATE HELP [SECTION] For urgent assistance: [SECTION] - Live chat: Developer Portal > Support [SECTION] - Phone: 1-800-555-API1 (Mon-Fri 9AM-5PM ET) [SECTION] Remember: Our team is here to help you scale efficiently while maintaining system stability for all users", + llm_fn=my_llm, + reprompting_config=config_without_telemetry, + user_instructions=["Keep the tone professionally neutral by avoiding emotionally charged words, exclamations, or informal phrases (e.g., awesome, ugh, or emojis)","Provide a detailed, ordered explanation of a process with at least three sequential steps.","Avoid expressions of uncertainty about company policies and eliminate vague or speculative phrases (e.g., “I think we cover that”)."] + ) + print_result("No Telemetry Test", result) + assert "telemetry" not in result + assert "summary" not in result + +def test_no_system_prompt(my_llm, base_config): + """Test behavior when system prompt is excluded.""" + result = run_reprompting_pipeline( + user_query="What penalties can credit bureaus face if they don’t fix errors on my report, and how do I make sure they take my dispute seriously?", + context="[SECTION] California Credit Reporting & Dispute Resolution Policy – 2025 Update [SECTION] Document Ref: CA-CRDP-2025-09 / Effective January 1, 2025 [SECTION] Section 1: Credit Bureau Responsibilities [SECTION] Credit reporting agencies must correct errors on your report within 30 days. If they fail to do so, they may face penalties under state and federal law, including fines and required corrections. [SECTION] Section 2: How to File a Dispute [SECTION] Follow these steps to ensure your dispute is processed correctly: [SECTION] 1. Get your credit reports from all three bureaus using AnnualCreditReport.com or Form CR-REQUEST-2025 [SECTION] 2. Complete Form CA-DISPUTE-9, available on the DFPI website, including: [SECTION] • Your personal information in Section 3A [SECTION] • Details about each error in Section 4B [SECTION] • Supporting documents like bank statements [SECTION] 3. Submit your dispute online through the secure portal or by certified mail [SECTION] Section 3: What Happens Next [SECTION] After you file: [SECTION] 1. You'll receive a confirmation letter within 5 business days [SECTION] 2. The bureau will investigate and send results within 30 days [SECTION] 3. They must either correct the error, verify it's accurate, or remove the item [SECTION] Section 4: If the Error Isn't Fixed [SECTION] If the bureau doesn't correct a verified error: [SECTION] 1. File a complaint with the DFPI within 60 days using Form DFPI-CR-7 [SECTION] 2. Contact the Federal Trade Commission for assistance [SECTION] 3. You may have the right to take legal action [SECTION] Section 5: Getting Help Quickly [SECTION] For urgent situations like mortgage applications: [SECTION] • Call the DFPI Dispute Hotline at 1-800-555-REPORT (option 2) [SECTION] • Submit Form CR-EXPEDITE with proof of urgency [SECTION] • Visit a DFPI office by appointment [SECTION] Note: There may be a $25 fee if you dispute the same item more than twice. Medical debt disputes require extra documentation.[SECTION] California Credit Reporting Compliance Policy – FCRA Section 605 Enforcement [SECTION] Document Ref: CRCP-2025-09 / Effective Immediately [SECTION] Section 1: FCRA Violation Penalties Under California Law [SECTION] The Fair Credit Reporting Act (FCRA) violations in California are subject to the following penalties: [SECTION] - **Incorrect Reporting (FCRA §605(a)):** $2,500 per violation, with additional civil penalties up to $10,000 for willful non-compliance. [SECTION] - **Failure to Investigate Disputes (FCRA §611):** Mandatory $1,000 penalty per unresolved dispute, plus actual damages if litigation ensues. [SECTION] - **Unauthorized Access (FCRA §604):** Statutory damages of $3,000 per instance, plus potential criminal charges under California Penal Code 502(c). [SECTION] Section 2: Mandatory Corrective Actions [SECTION] Upon identification of an FCRA violation, regulated entities must: [SECTION] 1. Submit Form CR-25 (Credit Reporting Correction Notice) to the California Department of Financial Protection and Innovation (DFPI) within 5 business days. [SECTION] 2. Provide corrected information to all affected consumers via certified mail (Form CR-30) within 10 business days. [SECTION] 3. File an attestation of compliance (Form CR-35) with the CFPB within 15 business days, including: [SECTION] - Copies of corrected consumer reports [SECTION] - Proof of consumer notification [SECTION] - Internal audit documentation [SECTION] Section 3: Consumer Eligibility for Remedies [SECTION] Consumers may file claims if they meet these criteria: [SECTION] - Demonstrated financial harm via bank statements showing denied credit applications or increased interest rates. [SECTION] - Timely submission of Form CR-40 (Consumer Dispute Affidavit) within 60 days of violation discovery. [SECTION] - Documentation of at least two unsuccessful dispute attempts with the credit bureau (retain copies of Form CR-45). [SECTION] Section 4: Enforcement Timeline [SECTION] - **Immediate Actions (0-5 days):** Credit bureaus must place fraud alerts or security freezes upon request (Form CR-50). [SECTION] - **Investigation Phase (30 days):** Regulated entities must complete dispute investigations per FCRA §611(a)(1). [SECTION] - **Remediation Deadline (45 days):** All corrections must be reflected in consumer reports by this date. [SECTION] Section 5: Consequences of Non-Compliance [SECTION] Failure to adhere to these requirements results in: [SECTION] - Automatic referral to the California Attorney General's Office for enforcement action. [SECTION] - Suspension of credit reporting privileges for 90 days (extendable to 180 days for repeat violations). [SECTION] - Mandatory participation in quarterly FCRA compliance audits for two years. [SECTION] Section 6: Contact Information [SECTION] For compliance questions or dispute submissions: [SECTION] DFPI Credit Reporting Division: 1-800-555-7890 [SECTION] Email: cr.compliance@dfpi.ca.gov [SECTION] Overnight Mail: DFPI – CR Unit, 1500 11th Street, Sacramento, CA 95814[SECTION] California Credit Reporting Policy – 2025 Consumer Rights Update [SECTION] Document Ref: CCRP-2025-09 / Effective March 2025 [SECTION] Section 1: Requesting Your Credit Report [SECTION] California residents may request free credit reports under the following conditions: [SECTION] - **Annual Request**: One free report per year from each major bureau (Equifax, Experian, TransUnion) via AnnualCreditReport.com or Form CR-22. [SECTION] - **Additional Requests**: Free reports are available if: [SECTION] * You’ve been denied credit, employment, or housing within the last 60 days (submit denial letter with Form CR-25) [SECTION] * You’re a victim of identity theft (submit police report or FTC affidavit with Form CR-28) [SECTION] * You’re unemployed and plan to seek employment within 90 days (submit unemployment verification with Form CR-30) [SECTION] Section 2: Disputing Errors [SECTION] To dispute inaccuracies on your credit report: [SECTION] 1. **Document the Error**: Identify the incorrect item and gather supporting evidence (e.g., bank statements, payment confirmations). [SECTION] 2. **Submit Dispute**: File online through the credit bureau’s dispute portal or mail Form CR-40 with: [SECTION] * Copy of your credit report with errors circled [SECTION] * Proof of identity (CA driver’s license or state ID) [SECTION] * Supporting documentation [SECTION] 3. **Await Investigation**: Bureaus must respond within 30 days (45 days if submitting additional evidence later). [SECTION] Section 3: HIPAA-Related Medical Debt Reporting [SECTION] Under HIPAA Privacy Rule §164.528, medical debt reporting must comply with: [SECTION] - **Consent Requirement**: Providers must obtain written consent (Form HIPAA-15) before reporting medical debt to credit bureaus. [SECTION] - **Dispute Process**: If medical debt appears without consent, submit Form CR-45 to the bureau with a copy of the unsigned HIPAA-15. [SECTION] - **Removal Timeline**: Unauthorized medical debt must be removed within 5 business days of dispute receipt. [SECTION] Section 4: Contact Information [SECTION] For credit report assistance in California: [SECTION] - **Equifax**: 1-800-685-1111 | PO Box 740241, Atlanta, GA 30374 [SECTION] - **Experian**: 1-888-397-3742 | PO Box 4500, Allen, TX 75013 [SECTION] - **TransUnion**: 1-800-916-8800 | PO Box 2000, Chester, PA 19016 [SECTION] - **CA Attorney General’s Office**: 1-800-952-5225 | credit.reports@doj.ca.gov [SECTION] - **HIPAA Complaints**: Submit Form OCR-200 to the U.S. Department of Health and Human Services [SECTION] Section 5: Penalties for Non-Compliance [SECTION] Violations of credit reporting laws may result in: [SECTION] - **Bureau Penalties**: $2,500 per willful violation under the FCRA [SECTION] - **Provider Penalties**: Up to $50,000 per HIPAA violation for unauthorized medical debt reporting [SECTION] - **Consumer Remedies**: Actual damages plus attorney fees for successful lawsuits [SECTION] Note: Retain copies of all correspondence for at least 3 years. Dispute status can be checked online or by calling the bureau’s toll-free number.", + llm_fn=my_llm, + reprompting_config=base_config, + user_instructions=["use the letter e only", "do not use the letter e"] + ) + print_result("No System Prompt Test", result) + assert "best_response" in result + +@pytest.mark.integration +def test_with_system_prompt(my_llm, base_config): + """Test behavior when a system prompt is explicitly provided.""" + result = run_reprompting_pipeline( + user_query="Can I dispute my background check results if they’re wrong?", + context="[SECTION] California Employment Background Check Dispute Process – FCRA Compliance [SECTION] Effective: July 2025 / Policy Ref: BGC-2025-07 [SECTION] This document outlines the formal dispute procedure for inaccurate background check reports under Fair Credit Reporting Act (FCRA) §611 and California Civil Code §1786.16. [SECTION] SECTION 1: ELIGIBILITY REQUIREMENTS [SECTION] You may dispute your background check report if: [SECTION] - The report contains factual errors (e.g., incorrect employment dates, misattributed criminal records, or expired violations) [SECTION] - The disputed information falls within FCRA's 7-year reporting period (10 years for positions with salaries exceeding $125,000 annually) [SECTION] - You have not previously disputed the same item within the past 12 months [SECTION] SECTION 2: DISPUTE SUBMISSION PROCESS [SECTION] Step 1: Documentation Preparation [SECTION] - Complete Form BGC-2025-D (Dispute Request) with: [SECTION] * Notarized statement of inaccuracy [SECTION] * Supporting evidence (pay stubs, court disposition forms, or government-issued ID) [SECTION] * For healthcare positions: current medical license (Form MC-114) and malpractice insurance verification [SECTION] Step 2: Submission Methods [SECTION] - Secure Online Portal: Upload documents at bgcdispute.ca.gov (Case Type 45B) [SECTION] - Certified Mail: Send to California Background Check Bureau, PO Box 980, Sacramento, CA 95812 [SECTION] - In-Person: Submit at any California Department of Consumer Affairs office with appointment (Form APT-2025 required) [SECTION] Step 3: Processing Timeline [SECTION] - Acknowledgement issued within 3 business days via Form BGC-2025-R [SECTION] - Investigation completed within 30 calendar days (45 days for complex cases involving multiple jurisdictions) [SECTION] - Corrected reports distributed to you and requesting employer within 5 business days of resolution [SECTION] SECTION 3: CONSEQUENCES OF NON-COMPLIANCE [SECTION] - Incomplete submissions will be rejected and require re-filing (Form BGC-2025-RF) with $25 processing fee [SECTION] - Frivolous disputes (more than 3 in 12 months) may result in 6-month filing suspension [SECTION] - Employment applications may be automatically denied if dispute isn't resolved before employer's decision deadline [SECTION] SECTION 4: EXAMPLE SCENARIOS [SECTION] Example 1: A report lists a dismissed misdemeanor from 2022. Submit Form BGC-2025-D with court dismissal paperwork (Form CR-180) for automatic removal. [SECTION] Example 2: For incorrect drug test results, include lab retest documentation (Form DT-114) and chain-of-custody records. [SECTION] SECTION 5: CONTACT INFORMATION [SECTION] For assistance: [SECTION] - Phone: 1-800-555-1234 (Monday-Friday, 8 AM to 5 PM PST) [SECTION] - Email: bgc.disputes@ca.gov (Response within 2 business days) [SECTION] - In-Person: Schedule appointments using Form APT-2025 at approved locations [SECTION] Note: All disputes are subject to audit under FCRA §609 and may require additional verification. Fraudulent submissions may result in legal action under California Penal Code §532.", + llm_fn=my_llm, + reprompting_config=base_config, + user_instructions=["use the letter e only", "do not use the letter e", "use a neutral tone"], + system_prompt="this is a system prompt" + ) + print_result("With System Prompt Test", result) + assert "best_response" in result + assert "telemetry" in result + assert "summary" in result diff --git a/tests/test_reprompting_failures.py b/tests/test_reprompting_failures.py new file mode 100644 index 0000000..df5ca9b --- /dev/null +++ b/tests/test_reprompting_failures.py @@ -0,0 +1,110 @@ +import os +import pytest +from string import Template +import aimon +from aimon.reprompting_api.config import RepromptingConfig +from aimon.reprompting_api.runner import run_reprompting_pipeline + +AIMON_API_KEY = os.environ.get("AIMON_API_KEY") + +# --- MOCKED LLM FUNCTIONS --- +def my_llm(prompt_template: Template, system_prompt=None, context=None, user_query=None) -> str: + """Simulates a normal working LLM that returns a string response. Just returns filled_prompt for test""" + filled_prompt = prompt_template.safe_substitute( + system_prompt=system_prompt or "", + context=context or "", + user_query=user_query or "" + ) + return filled_prompt + +def llm_fn_failure(prompt_template: Template, system_prompt=None, context=None, user_query=None) -> str: + """Simulates an LLM call that fails every time.""" + raise RuntimeError("LLM call failed intentionally for testing") + +def llm_fn_incorrect_return_value(prompt_template: Template, system_prompt=None, context=None, user_query=None): + """Simulates an LLM that returns an invalid type instead of a string.""" + return 42 + +# --- MOCKED CONFIG FACTORIES --- +def get_config(): + """Returns a valid base configuration for most tests.""" + return RepromptingConfig( + aimon_api_key=AIMON_API_KEY, + publish=False, + return_telemetry=True, + return_aimon_summary=True, + application_name="api_test", + max_iterations=2, + ) + +def get_config_with_invalid_aimon_api_key(): + """Returns a config with an intentionally invalid AIMon API key.""" + return RepromptingConfig( + aimon_api_key="invalid key", + publish=False, + return_telemetry=True, + return_aimon_summary=True, + application_name="api_test", + max_iterations=3, + ) + +# --- TESTS EXPECTING FAILURES --- +def test_llm_failure(): + """Should raise RuntimeError when the LLM function always fails.""" + config = get_config() + with pytest.raises(RuntimeError, match="LLM call failed intentionally for testing"): + run_reprompting_pipeline( + user_query="Test LLM failure handling", + context="Context for failure test", + llm_fn=llm_fn_failure, + reprompting_config=config, + user_instructions=[] + ) + +def test_invalid_llm_fn(): + """Should raise TypeError when LLM function is None.""" + config = get_config() + with pytest.raises(TypeError): + run_reprompting_pipeline( + user_query="Test invalid LLM fn", + context="Context for failure test", + llm_fn=None, + reprompting_config=config, + user_instructions=[] + ) + +def test_invalid_return_value(): + """Should raise TypeError when the LLM returns a non-string value.""" + config = get_config() + with pytest.raises(TypeError, match="LLM returned invalid type int, expected str."): + run_reprompting_pipeline( + user_query="Test invalid return type", + context="Context for type error", + llm_fn=llm_fn_incorrect_return_value, + reprompting_config=config, + user_instructions=[] + ) + +def test_empty_query(): + """Empty query should raise a ValueError.""" + config = get_config() + with pytest.raises(ValueError, match="user_query must be a non-empty string"): + run_reprompting_pipeline( + user_query="", + context="", + llm_fn=my_llm, + reprompting_config=config, + user_instructions=[] + ) + +def test_invalid_api_key(): + """Should fail due to invalid AIMon API key.""" + config = get_config_with_invalid_aimon_api_key() + with pytest.raises(aimon.AuthenticationError): + run_reprompting_pipeline( + user_query="Testing with invalid AIMon API key", + context="Context for invalid key test", + llm_fn=my_llm, + reprompting_config=config, + user_instructions=[] + )