diff --git a/adalflow/adalflow/components/agent/agent.py b/adalflow/adalflow/components/agent/agent.py index 3a939f118..9cfb9ae61 100644 --- a/adalflow/adalflow/components/agent/agent.py +++ b/adalflow/adalflow/components/agent/agent.py @@ -132,7 +132,7 @@ def create_default_planner( cache_path: Optional[str] = None, use_cache: Optional[bool] = False, # default agent parameters - max_steps: Optional[int] = 10, + max_steps: Optional[int] = 3, is_thinking_model: Optional[bool] = False, answer_data_type: Optional[Type[T]] = str, **kwargs, @@ -259,8 +259,8 @@ def __init__( use_cache: Optional[bool] = True, # default agent parameters answer_data_type: Optional[Type[T]] = str, # the data type of the final answer - max_steps: Optional[int] = 10, - is_thinking_model: Optional[bool] = False, # when thinking model turned on, it disables the CoT field in the output + max_steps: Optional[int] = DEFAULT_MAX_STEPS, + is_thinking_model: Optional[bool] = False, # support thinking model in agent # for fully customize the agent tool_manager: Optional[ ToolManager diff --git a/adalflow/adalflow/components/agent/react.py b/adalflow/adalflow/components/agent/react.py index c5e3d5977..d8295981b 100644 --- a/adalflow/adalflow/components/agent/react.py +++ b/adalflow/adalflow/components/agent/react.py @@ -559,7 +559,7 @@ def _run_one_step( ) log.debug( - f"Running step {step} with prompt: {self.planner.prompt(**prompt_kwargs)}" + f"Running step {step} with prompt: {self.planner.get_prompt(**prompt_kwargs)}" ) try: @@ -682,7 +682,8 @@ def _get_answer( step_history=step_history, id=last_step.data_id, react_agent_task_desc=self.planner.prompt_kwargs[ - "react_agent_task_desc" + # "react_agent_task_desc" + "task_desc" ], ) @@ -720,7 +721,7 @@ def _is_step_output_last_step(self, step_output: StepOutput) -> bool: def bicall( self, input: str, # open up to the external - promt_kwargs: Optional[Dict] = {}, + prompt_kwargs: Optional[Dict] = {}, model_kwargs: Optional[Dict] = {}, id: Optional[str] = None, ) -> Union["Parameter", ReActOutput]: @@ -729,7 +730,7 @@ def bicall( # set up the prompts prompt_kwargs = { - **promt_kwargs, + **prompt_kwargs, "input_str": input, } diff --git a/adalflow/adalflow/components/agent/runner.py b/adalflow/adalflow/components/agent/runner.py index e6aa51818..b6d1e3067 100644 --- a/adalflow/adalflow/components/agent/runner.py +++ b/adalflow/adalflow/components/agent/runner.py @@ -20,10 +20,12 @@ from typing_extensions import TypeAlias -from adalflow.optim.parameter import Parameter +from adalflow.optim.parameter import Parameter, ParameterType, OutputParameter from adalflow.utils import printc from adalflow.core.component import Component from adalflow.components.agent.agent import Agent +from adalflow.optim.function import BackwardContext + from adalflow.core.types import ( GeneratorOutput, @@ -53,6 +55,7 @@ response_span, step_span, ) +from adalflow.optim.grad_component import GradComponent __all__ = ["Runner"] @@ -68,9 +71,26 @@ ] # Replace with your actual Adalflow dataclass type if available +class CombineStepHistoryAndRunnerResult(GradComponent): + def __init__(self): + super().__init__(desc="Extract the final answer from the step history.") + + def call( + self, + step_history: List[StepOutput], + runner_result: RunnerResult, + task_desc: str, # skip connection + id: Optional[str] = None, + ) -> str: + if not runner_result: + return "" + # answer = step_history[-1].observation + answer = runner_result.answer + return answer + # The runner will create tool call request, add a unique call id. # TODO: move this to repo adalflow/agent -class Runner(Component): +class Runner(GradComponent): """Executes Agent instances with multi-step iterative planning and tool execution. The Runner orchestrates the execution of an Agent through multiple reasoning and action @@ -106,6 +126,7 @@ def __init__( max_steps: Optional[int] = None, # this will overwrite the agent's max_steps permission_manager: Optional[PermissionManager] = None, conversation_memory: Optional[ConversationMemory] = None, + training: Optional[bool] = False, **kwargs, ) -> None: """Initialize runner with an agent and configuration. @@ -118,7 +139,9 @@ def __init__( permission_manager: Optional permission manager for tool approval conversation_memory: Optional conversation memory """ - super().__init__(**kwargs) + Component.__init__(self, **kwargs) + GradComponent.__init__(self, desc="Generate a response using LLM model.", **kwargs) + # creates a backward engine if it is not passed in the kwargs self.agent = agent self.tool_manager = agent.tool_manager self.permission_manager = permission_manager @@ -137,6 +160,7 @@ def __init__( else: # overwrite the agent's max_steps self.agent.max_steps = max_steps + self.answer_data_type = agent.answer_data_type or str self.step_history: List[StepOutput] = [] @@ -153,6 +177,11 @@ def __init__( self._cancel_callbacks = [] self._current_task = None # Track the current running task self._current_streaming_result = None # Track the current streaming result + self.training = training + + # combine step history + self.combine_step_history_and_runner_result = CombineStepHistoryAndRunnerResult() + # support thinking model self.is_thinking_model = agent.is_thinking_model if hasattr(agent, 'is_thinking_model') else False @@ -184,8 +213,6 @@ def set_permission_manager( if permission_manager is not None: permission_manager.set_tool_manager(self.tool_manager) - - def is_cancelled(self) -> bool: """Check if execution has been cancelled.""" return self._cancelled @@ -207,15 +234,19 @@ async def cancel(self) -> None: self._cancelled = True # Try to emit a test event if we have a streaming result - if hasattr(self, '_current_streaming_result') and self._current_streaming_result: + if ( + hasattr(self, "_current_streaming_result") + and self._current_streaming_result + ): try: cancel_received_event = RunItemStreamEvent( name="runner.cancel_received", item=FinalOutputItem( data={ - "status": "cancel_received", - "message": "Cancel request received", - }) + "status": "cancel_received", + "message": "Cancel request received", + } + ), ) self._current_streaming_result.put_nowait(cancel_received_event) log.info("Emitted cancel_received event") @@ -235,10 +266,7 @@ async def _wait_for_cancellation(self): if self._current_task: try: # Wait up to 1 second for task to cancel gracefully - await asyncio.wait_for( - self._current_task, - timeout=1.0 - ) + await asyncio.wait_for(self._current_task, timeout=1.0) except (asyncio.TimeoutError, asyncio.CancelledError): # Task didn't cancel in time or was cancelled - that's ok pass @@ -256,16 +284,69 @@ def _get_final_answer(self, function: Function) -> Any: return self._process_data(function._answer) return None - - def _create_runner_result(self, answer: Any, step_history, error: Optional[str] = None, ) -> RunnerResult: + def _create_runner_result( + self, + answer: Any, + step_history, + error: Optional[str] = None, + id: Optional[str] = None, + ) -> RunnerResult: """Create a RunnerResult object with the final answer and error.""" return RunnerResult( answer=answer, step_history=step_history.copy(), error=error, + id=id, # ctx=self.ctx, ) - def _create_execution_complete_stream_event(self, streaming_result: RunnerStreamingResult, final_output_item: FinalOutputItem): + + def _create_trainable_runner_result( + self, + runner_result: RunnerResult, + name: str, + previous_output: Parameter, + prompt_kwargs: Dict[str, Any], + id: Optional[str] = None, + ) -> OutputParameter: + """Helper function to create a trainable OutputParameter with proper grad_fn setup. + + Args: + runner_result: The RunnerResult data + name: Name for the parameter + previous_output: The predecessor parameter to link to + prompt_kwargs: Original prompt arguments for gradient computation + id: Optional identifier + + Returns: + OutputParameter with grad_fn properly configured + """ + runner_result_parameter = OutputParameter( + name=name, + data=runner_result, + param_type=ParameterType.OUTPUT, + requires_opt=True, + ) + + runner_result_parameter.set_predecessors([previous_output]) + + runner_result_parameter.set_grad_fn( + BackwardContext( + backward_fn=self.backward, + backward_engine=self.backward_engine, + response=runner_result_parameter, + prompt_kwargs=prompt_kwargs, + template=getattr(self.agent.planner, 'template', None), + prompt_str=self.agent.planner.get_prompt(**prompt_kwargs), + disable_backward_engine=self._disable_backward_engine, + id=id, + ) + ) + + return runner_result_parameter + + def _create_execution_complete_stream_event( + self, streaming_result: RunnerStreamingResult, final_output_item: FinalOutputItem + ): """Complete the streaming execution by adding a sentinel.""" final_output_event = RunItemStreamEvent( name="agent.execution_complete", @@ -292,8 +373,14 @@ def _add_assistant_response_to_memory(self, final_output_item: FinalOutputItem): ) ) - def create_response_span(self, runner_result, step_count: int, streaming_result: RunnerStreamingResult, runner_span_instance, workflow_status: str = "stream_completed"): - + def create_response_span( + self, + runner_result, + step_count: int, + streaming_result: RunnerStreamingResult, + runner_span_instance, + workflow_status: str = "stream_completed", + ): runner_span_instance.span_data.update_attributes( { "steps_executed": step_count + 1, @@ -316,15 +403,13 @@ def create_response_span(self, runner_result, step_count: int, streaming_result: ): pass - - - async def _process_stream_final_step( self, answer: Any, step_count: int, streaming_result, - runner_span_instance + runner_span_instance, + id: Optional[str] = None, ) -> FinalOutputItem: """Process the final step and trace it.""" # processed_data = self._get_final_answer(function) @@ -335,6 +420,7 @@ async def _process_stream_final_step( runner_result = self._create_runner_result( answer=answer, step_history=self.step_history, + id=id, ) # Update runner span with final results @@ -404,7 +490,9 @@ def _process_data( except json.JSONDecodeError as e: raise ValueError(f"Invalid JSON in data: {e}") if not isinstance(data, dict): - raise ValueError(f"Expected dict after JSON parsing, got {type(data)}") + raise ValueError( + f"Expected dict after JSON parsing, got {type(data)}" + ) log.info( f"initial answer after being evaluated using json: {data}, type: {type(data)}" ) @@ -461,7 +549,7 @@ def call( model_kwargs: Optional[Dict[str, Any]] = None, use_cache: Optional[bool] = None, id: Optional[str] = None, # global run id - ) -> RunnerResult: + ) -> Union[RunnerResult, OutputParameter]: """Execute the planner synchronously for multiple steps with function calling support. At the last step the action should be set to "finish" instead which terminates the sequence @@ -565,6 +653,7 @@ def call( last_output = RunnerResult( answer=processed_data, step_history=self.step_history.copy(), + id=id, # ctx=self.ctx, ) @@ -696,8 +785,182 @@ def call( answer=current_error or f"No output generated after {step_count} steps (max_steps: {self.max_steps})", step_history=self.step_history.copy(), error=current_error, + id=id, ) + # trainable version of Runner call + def forward( + self, + prompt_kwargs: Dict[str, Any], + model_kwargs: Optional[Dict[str, Any]] = None, + use_cache: Optional[bool] = None, + id: Optional[str] = None, + ) -> Parameter: + """Trainable version of call that chains generator outputs and returns a trainable Parameter. + + Each generator output becomes a trainable predecessor for the next one, and the final + result is wrapped in a trainable Parameter. + """ + # Initialize tracking variables + self.agent.planner.training = True + + previous_output = None + step_count = 0 + self.step_history = [] + + # Set up the initial prompt + prompt_kwargs = prompt_kwargs.copy() if prompt_kwargs else {} + prompt_kwargs["step_history"] = self.step_history + + if self.use_conversation_memory: + self.conversation_memory.reset_pending_query() + prompt_kwargs["chat_history_str"] = self.conversation_memory() + query_metadata = {"context_str": prompt_kwargs.get("context_str", None)} + self.conversation_memory.add_user_query( + UserQuery( + query_str=prompt_kwargs.get("input_str", None), + metadata=query_metadata, + ) + ) + + prompt_kwargs["max_steps"] = self.max_steps + model_kwargs = model_kwargs.copy() if model_kwargs else {} + + # Main training loop + while step_count < self.max_steps: + try: + # Get the next generator output + current_output = self.agent.planner.forward( + prompt_kwargs=prompt_kwargs, + model_kwargs=model_kwargs, + # use_cache=use_cache, # TODO no use_cache method for forward + id=id, + # predecessor=previous_output # Chain to previous output + ) + printc( + f"agent planner prompt call: {self.agent.planner.get_prompt(**prompt_kwargs)}" + ) + + + # If this is the first step, we need to create a trainable parameter + trainable_output = current_output + trainable_output.name = f"step_{step_count}" + trainable_output.requires_opt = True + if previous_output is not None: + # Chain to previous trainable output + # TODO make cleaner + if trainable_output.predecessors is None: + trainable_output.predecessors = set() + trainable_output.predecessors.add(previous_output) + + previous_output = trainable_output + + generator_output = current_output.data + + printc(f"planner output: {generator_output}") + + # Process the function call if needed + if hasattr(generator_output, 'data') and isinstance(generator_output.data, Function): + function = generator_output.data + function.id = str(uuid.uuid4()) + + # Handle final step + if self._check_last_step(function): + printc(f"Function answer: {function._answer}") + processed_data = self._process_data(function._answer) + printc(f"Final processed_data: {processed_data}") + runner_result = RunnerResult( + answer=processed_data, + step_history=self.step_history.copy(), + id=id, + ) + return self._create_trainable_runner_result( + runner_result=runner_result, + name="Runner Result", + previous_output=previous_output, + prompt_kwargs=prompt_kwargs, + id=id, + ) + + # return self.combine_step_history_and_runner_result( + # step_history=self.step_history, + # runner_result=runner_result, + # id=id, + # task_desc=self.agent.planner.prompt_kwargs["task_desc"], + # ) + + # Execute the function and update context + function_results = self._tool_execute_sync(function) + step_output = StepOutput( + step=step_count, + action=function, + function=function, + observation=function_results.output.observation if hasattr(function_results.output, 'observation') else function_results.output, + ) + + self.step_history.append(step_output) + prompt_kwargs["step_history"] = self.step_history + + # Update for next iteration + step_count += 1 + + except Exception as e: + error_msg = f"Error in step {step_count}: {str(e)}" + log.error(error_msg) + runner_result = RunnerResult( + answer=error_msg, + step_history=self.step_history.copy(), + error=error_msg, + id=id, + ) + return self._create_trainable_runner_result( + runner_result=runner_result, + name="Runner Error", + previous_output=previous_output, + prompt_kwargs=prompt_kwargs, + id=id, + ) + # return self.combine_step_history_and_runner_result( + # step_history=self.step_history, + # runner_result=runner_result, + # id=id, + # task_desc=self.agent.planner.prompt_kwargs["task_desc"], + # ) + + # If we exit the loop without a final result + runner_result = RunnerResult( + answer=f"Max steps ({self.max_steps}) reached without completion", + step_history=self.step_history.copy(), + error="Max steps reached", + id=id, + ) + + runner_result_parameter = self._create_trainable_runner_result( + runner_result=runner_result, + name="Runner Incomplete", + previous_output=previous_output, + prompt_kwargs=prompt_kwargs, + id=id, + ) + + return runner_result_parameter + + # return self.combine_step_history_and_runner_result( + # step_history=step_history, + # runner_result=runner_result, + # id=id, + # task_desc=self.agent.planner.prompt_kwargs["task_desc"], + # ) + + + def __call__(self, *args, **kwargs) -> Union[Parameter, RunnerResult]: + if self.training: + log.debug("Training mode") + return self.forward(*args, **kwargs) + else: + log.debug("Inference mode") + return self.call(*args, **kwargs) + def _tool_execute_sync( self, func: Function, @@ -852,6 +1115,7 @@ async def acall( last_output = RunnerResult( answer=processed_data, step_history=self.step_history.copy(), + id=id, # ctx=self.ctx, ) @@ -989,6 +1253,7 @@ async def acall( answer=current_error or f"No output generated after {step_count} steps (max_steps: {self.max_steps})", step_history=self.step_history.copy(), error=current_error, + id=id, ) def astream( @@ -1226,6 +1491,7 @@ async def impl_astream( step_count=step_count, streaming_result=streaming_result, runner_span_instance=runner_span_instance, + id=id, ) stop_the_loop = True workflow_status = "stream_completed" @@ -1375,6 +1641,7 @@ async def impl_astream( answer=f"No output generated after {step_count} steps (max_steps: {self.max_steps})", error=current_error, step_history=self.step_history.copy(), + id=id, ) final_output_item = FinalOutputItem(data=runner_result) @@ -1397,6 +1664,7 @@ async def impl_astream( answer=final_output_item.data.answer if final_output_item.data else None, step_history=self.step_history.copy(), error=current_error, + id=id, ) self._create_execution_complete_stream_event( diff --git a/adalflow/adalflow/components/agent/runner_trainer.py b/adalflow/adalflow/components/agent/runner_trainer.py new file mode 100644 index 000000000..12f41be43 --- /dev/null +++ b/adalflow/adalflow/components/agent/runner_trainer.py @@ -0,0 +1,219 @@ +from re import A +from typing import Any, Callable, Dict, Optional, Tuple, List +import adalflow as adal +from adalflow.datasets.types import GSM8KData as Example +from adalflow.datasets.gsm8k import GSM8K +from adalflow.eval.answer_match_acc import AnswerMatchAcc +from adalflow.core.types import RunnerResult +from adalflow.optim.optimizer import Optimizer +from adalflow.components.agent.react import ReActAgent +from adalflow.optim.parameter import Parameter, ParameterType +from adalflow.core.func_tool import FunctionTool + + +def load_datasets(): + train_data = GSM8K(split="train", size=100) + val_data = GSM8K(split="val", size=50) + test_data = GSM8K(split="test", size=100) + return train_data, val_data, test_data + + +def default_eval_fn(y: str, y_gt: str) -> float: + """Default dummy evaluation function that performs exact string match.""" + return 1.0 if str(y).strip() == str(y_gt).strip() else 0.0 + + +def default_loss_fn_factory(eval_fn: Callable = None): + """Factory function to create default loss function.""" + if eval_fn is None: + eval_fn = default_eval_fn + return adal.EvalFnToTextLoss( + eval_fn=eval_fn, + eval_fn_desc="Default evaluation: 1 if str(y) == str(y_gt) else 0", + ) + + +class RunnerTrainer(adal.AdalComponent): + """Generic trainer that accepts a Runner model and allows custom eval_fn and loss_fn.""" + + def __init__( + self, + runner: adal.Component, + eval_fn: Optional[Callable] = None, + loss_fn: Optional[Callable] = None, + backward_engine_model_config: Optional[Dict] = None, + teacher_model_config: Optional[Dict] = None, + text_optimizer_model_config: Optional[Dict] = None, + original_react_agent: Optional[ReActAgent] = False, + ): + # Use provided eval_fn or create default + if eval_fn is None: + eval_fn = default_eval_fn + + # Use provided loss_fn or create default + if loss_fn is None: + loss_fn = default_loss_fn_factory(eval_fn) + + super().__init__(task=runner, eval_fn=eval_fn, loss_fn=loss_fn) + + self.backward_engine_model_config = backward_engine_model_config + self.teacher_model_config = teacher_model_config + self.text_optimizer_model_config = text_optimizer_model_config + self.original_react_agent = original_react_agent + + def prepare_task(self, sample: Example) -> Tuple[Callable, Dict[str, Any]]: + # Runner.call expects prompt_kwargs parameter, not direct keyword args + # prepare task should not return in training mode + # return self.task.forward, {"prompt_kwargs": {"input_str": sample.question}, "id": sample.id} + if self.original_react_agent: + return self.task.__call__, {"input": sample.question, "id": sample.id} + return self.task.__call__, {"prompt_kwargs": {"input_str": sample.question}, "id": sample.id} + + def prepare_eval( + self, sample: Example, y_pred: RunnerResult + ) -> Tuple[float, Dict[str, Any]]: + y_label = "" + + if not self.original_react_agent and y_pred is not None and y_pred.answer is not None: + y_label = y_pred.answer + elif self.original_react_agent: + y_label = y_pred + return self.eval_fn, {"y": y_label, "y_gt": sample.answer} + + def prepare_loss( + self, sample: Example, pred: adal.Parameter + ) -> Tuple[Callable, Dict[str, Any]]: + print("pred", pred) + y_gt = adal.Parameter( + name="y_gt", + data=sample.answer, + eval_input=sample.answer, + requires_opt=False, + ) + + if self.original_react_agent: + pred.eval_input = pred.data + else: + pred.eval_input = pred.data.answer + + return self.loss_fn, {"kwargs": {"y": pred, "y_gt": y_gt}, "id": sample.id} + + +def train_original_react(): + from adalflow.utils import setup_env + + setup_env() + train_data, val_data, test_data = load_datasets() + + # Create Anthropic client configuration instead of GPT-3 + anthropic_config = { + "model_client": adal.AnthropicAPIClient(), + "model_kwargs": { + "model": "claude-3-5-sonnet-20241022", + # "max_tokens": 2000, + "temperature": 0.0, + } + } + + def llm_as_tool(input: str, id: Optional[str] = None) -> str: + """Used as a calculator tool.""" + printc(f"llm_as_tool: {input}", color="yellow") + + return self.llm_tool(prompt_kwargs={"input_str": input}, id=id) + + react_agent = ReActAgent( + tools=[FunctionTool(llm_as_tool)], + max_steps=6, + add_llm_as_fallback=True, + **anthropic_config + ) + + # Create specific eval_fn and loss_fn like in GSM8K train.py + eval_fn = AnswerMatchAcc(type="exact_match").compute_single_item + loss_fn = adal.EvalFnToTextLoss( + eval_fn=eval_fn, + eval_fn_desc="exact_match: 1 if str(y) == str(y_gt) else 0", + ) + + adal_component = RunnerTrainer( + runner=react_agent, + eval_fn=eval_fn, + loss_fn=loss_fn, + backward_engine_model_config=anthropic_config, + text_optimizer_model_config=anthropic_config, + original_react_agent=True, + ) + trainer = adal.Trainer( + adaltask=adal_component, + strategy="random", + max_steps=10, + text_optimizers_config_kwargs={"max_past_history": 5}, + ) + trainer.fit( + train_dataset=train_data, + val_dataset=val_data, + test_dataset=test_data, + debug=False, + ) + +def train(): + from adalflow.utils import setup_env + + setup_env() + train_data, val_data, test_data = load_datasets() + + # Create Anthropic client configuration instead of GPT-3 + anthropic_config = { + "model_client": adal.AnthropicAPIClient(), + "model_kwargs": { + "model": "claude-3-5-sonnet-20241022", + # "max_tokens": 2000, + "temperature": 0.0, + } + } + + # Create Agent first, then Runner + agent = adal.Agent( + name="gsm8k_agent", + add_llm_as_fallback=True, + max_steps = 6, + **anthropic_config + ) + + runner = adal.Runner(agent=agent, training=True) + + # Create specific eval_fn and loss_fn like in GSM8K train.py + eval_fn = AnswerMatchAcc(type="exact_match").compute_single_item + loss_fn = adal.EvalFnToTextLoss( + eval_fn=eval_fn, + eval_fn_desc="exact_match: 1 if str(y) == str(y_gt) else 0", + ) + + adal_component = RunnerTrainer( + runner=runner, + eval_fn=eval_fn, + loss_fn=loss_fn, + backward_engine_model_config=anthropic_config, + text_optimizer_model_config=anthropic_config, + ) + trainer = adal.Trainer( + adaltask=adal_component, + strategy="random", + max_steps=10, + # resume_from_ckpt="./Users/jinhakim/.adalflow/ckpt/RunnerTrainer/random_max_steps_5_2bd11_run_1.json", + text_optimizers_config_kwargs={"max_past_history": 5}, + ) + trainer.fit( + train_dataset=train_data, + val_dataset=val_data, + test_dataset=test_data, + debug=False, + # resume_from_ckpt="./Users/jinhakim/.adalflow/ckpt/RunnerTrainer/random_max_steps_2_cc5fb_run_1.json", + ) + + # feedback, visualizatio n + + +if __name__ == "__main__": + train() + # train_original_react() \ No newline at end of file diff --git a/adalflow/adalflow/core/functional.py b/adalflow/adalflow/core/functional.py index 426a54231..9132ce311 100644 --- a/adalflow/adalflow/core/functional.py +++ b/adalflow/adalflow/core/functional.py @@ -84,8 +84,17 @@ def _asdict_inner(obj, dict_factory, exclude): ) for k, v in obj.items() ) + elif isinstance(obj, set): + # Handle sets specifically - preserve them as sets + return type(obj)(_asdict_inner(v, dict_factory, exclude) for v in obj) else: - return obj + # Check if the object is JSON serializable + try: + json.dumps(obj) + return obj + except: + # If not JSON serializable, convert to string representation + return str(obj) # return deepcopy(obj) diff --git a/adalflow/adalflow/core/generator.py b/adalflow/adalflow/core/generator.py index f2edc205f..f4bbbb2a5 100644 --- a/adalflow/adalflow/core/generator.py +++ b/adalflow/adalflow/core/generator.py @@ -162,6 +162,7 @@ def __init__( CachedEngine.__init__(self, cache_path=self.cache_path) Component.__init__(self) + # creates a backward engine if none is called GradComponent.__init__(self, desc="Generate a response using LLM model.") CallbackManager.__init__(self) @@ -739,7 +740,7 @@ def data_to_prompt_map_fn(data: Parameter) -> str: # **** end of the special to the generator **** # if not self.backward_engine: - # # self.set_backward_engine() + # self.set_backward_engine() # log.debug(f"Backward engine: {self.backward_engine}") # attach a funtion to compute gradient for predecessors @@ -778,7 +779,7 @@ def backward( log.debug( f"backward pass setup: {backward_pass_setup}, name: {self.name}", color="red", - ) + ) children_params = response.predecessors is_intermediate_node = True diff --git a/adalflow/adalflow/core/types.py b/adalflow/adalflow/core/types.py index 024b3c691..4e81bb66f 100644 --- a/adalflow/adalflow/core/types.py +++ b/adalflow/adalflow/core/types.py @@ -1436,6 +1436,10 @@ class RunnerResult: metadata={"desc": "The context of the execution"}, default=None, ) + id: Optional[str] = field( + metadata={"desc": "The id of the execution"}, + default=None, + ) @dataclass diff --git a/adalflow/adalflow/datasets/hotpot_qa.py b/adalflow/adalflow/datasets/hotpot_qa.py index afec6cff1..c809ee72e 100644 --- a/adalflow/adalflow/datasets/hotpot_qa.py +++ b/adalflow/adalflow/datasets/hotpot_qa.py @@ -11,7 +11,6 @@ from adalflow.core.base_data_class import DataClass from adalflow.datasets.types import HotPotQAData - class HotPotQA(Dataset): def __init__( self, diff --git a/adalflow/adalflow/optim/parameter.py b/adalflow/adalflow/optim/parameter.py index 7fb68ddd4..1275a3185 100644 --- a/adalflow/adalflow/optim/parameter.py +++ b/adalflow/adalflow/optim/parameter.py @@ -865,6 +865,8 @@ def draw_interactive_html_graph( # Add nodes to the graph node_ids = set() + if nodes is None: + nodes = [] for node in nodes: self.generate_node_html(node, output_dir=filepath) diff --git a/adalflow/tests/test_runner.py b/adalflow/tests/test_runner.py index 39f24051a..58d457aaf 100644 --- a/adalflow/tests/test_runner.py +++ b/adalflow/tests/test_runner.py @@ -816,6 +816,329 @@ def test_conversation_memory_clear_streaming(self): asyncio.run(self._test_conversation_memory_clear_streaming()) +class TestRunnerForward(unittest.TestCase): + """Tests for the Runner.forward method (trainable mode).""" + + def setUp(self): + """Set up test fixtures.""" + self.runner = Runner( + agent=DummyAgent( + planner=None, answer_data_type=None, tool_manager=MockToolManager() + ) + ) + + def test_forward_single_step_final_answer(self): + """Test forward method with single step that produces final answer.""" + from adalflow.optim.parameter import Parameter + + fn = DummyFunction(name="answer_output", _is_answer_final=True, _answer="forward_answer") + + class TrainablePlanner: + def __init__(self): + self.training = False + + def forward(self, *, prompt_kwargs, model_kwargs=None, id=None): + # Return a Parameter wrapping GeneratorOutput + param = Parameter( + name="planner_output", + data=GeneratorOutput(data=fn), + requires_opt=True, + ) + return param + + def get_prompt(self, **kwargs): + return "test prompt" + + agent = DummyAgent( + planner=TrainablePlanner(), + answer_data_type=None, + tool_manager=MockToolManager() + ) + runner = Runner(agent=agent) + + result = runner.forward(prompt_kwargs={"input_str": "test"}) + + # Should return OutputParameter with RunnerResult + from adalflow.optim.parameter import OutputParameter + self.assertIsInstance(result, OutputParameter) + self.assertIsInstance(result.data, RunnerResult) + self.assertEqual(result.data.answer, "forward_answer") + self.assertTrue(result.requires_opt) + + def test_forward_multi_step_execution(self): + """Test forward method with multiple steps before final answer.""" + from adalflow.optim.parameter import Parameter + + fn1 = DummyFunction(name="search", _is_answer_final=False) + fn2 = DummyFunction(name="answer_output", _is_answer_final=True, _answer="multi_step_answer") + + class MultiStepTrainablePlanner: + def __init__(self): + self.training = False + self.call_count = 0 + + def forward(self, *, prompt_kwargs, model_kwargs=None, id=None): + self.call_count += 1 + if self.call_count == 1: + data = GeneratorOutput(data=fn1) + else: + data = GeneratorOutput(data=fn2) + + param = Parameter( + name=f"planner_output_{self.call_count}", + data=data, + requires_opt=True, + ) + return param + + def get_prompt(self, **kwargs): + return "test prompt" + + agent = DummyAgent( + planner=MultiStepTrainablePlanner(), + answer_data_type=None, + tool_manager=MockToolManager() + ) + runner = Runner(agent=agent) + + result = runner.forward(prompt_kwargs={"input_str": "test"}) + + # Should return OutputParameter with RunnerResult + from adalflow.optim.parameter import OutputParameter + self.assertIsInstance(result, OutputParameter) + self.assertIsInstance(result.data, RunnerResult) + self.assertEqual(result.data.answer, "multi_step_answer") + self.assertEqual(len(runner.step_history), 1) # Only non-final step in history + + def test_forward_parameter_chaining(self): + """Test that forward method properly chains parameters.""" + from adalflow.optim.parameter import Parameter + + fn = DummyFunction(name="answer_output", _is_answer_final=True, _answer="chained_answer") + + class ChainingPlanner: + def __init__(self): + self.training = False + self.call_count = 0 + + def forward(self, *, prompt_kwargs, model_kwargs=None, id=None): + self.call_count += 1 + param = Parameter( + name=f"step_{self.call_count}", + data=GeneratorOutput(data=fn), + requires_opt=True, + ) + return param + + def get_prompt(self, **kwargs): + return "test prompt" + + agent = DummyAgent( + planner=ChainingPlanner(), + answer_data_type=None, + tool_manager=MockToolManager() + ) + runner = Runner(agent=agent) + + result = runner.forward(prompt_kwargs={"input_str": "test"}) + + # Should return OutputParameter + from adalflow.optim.parameter import OutputParameter + self.assertIsInstance(result, OutputParameter) + + # Should have gradient function set up + self.assertIsNotNone(result.grad_fn) + self.assertTrue(result.requires_opt) + + def test_forward_error_handling(self): + """Test forward method handles errors gracefully.""" + from adalflow.optim.parameter import Parameter + + class ErrorPlanner: + def __init__(self): + self.training = False + + def forward(self, *, prompt_kwargs, model_kwargs=None, id=None): + raise ValueError("Planning error") + + def get_prompt(self, **kwargs): + return "test prompt" + + agent = DummyAgent( + planner=ErrorPlanner(), + answer_data_type=None, + tool_manager=MockToolManager() + ) + runner = Runner(agent=agent) + + result = runner.forward(prompt_kwargs={"input_str": "test"}) + + # Should return OutputParameter with error in RunnerResult + from adalflow.optim.parameter import OutputParameter + self.assertIsInstance(result, OutputParameter) + self.assertIsInstance(result.data, RunnerResult) + self.assertIn("Error in step 0", result.data.answer) + self.assertIsNotNone(result.data.error) + + def test_forward_max_steps_reached(self): + """Test forward method when max steps are reached without final answer.""" + from adalflow.optim.parameter import Parameter + + fn = DummyFunction(name="continue", _is_answer_final=False) + + class ContinuousPlanner: + def __init__(self): + self.training = False + self.call_count = 0 + + def forward(self, *, prompt_kwargs, model_kwargs=None, id=None): + self.call_count += 1 + param = Parameter( + name=f"step_{self.call_count}", + data=GeneratorOutput(data=fn), + requires_opt=True, + ) + return param + + def get_prompt(self, **kwargs): + return "test prompt" + + agent = DummyAgent( + planner=ContinuousPlanner(), + answer_data_type=None, + tool_manager=MockToolManager(), + max_steps=2 + ) + runner = Runner(agent=agent) + + result = runner.forward(prompt_kwargs={"input_str": "test"}) + + # Should return OutputParameter with incomplete message + from adalflow.optim.parameter import OutputParameter + self.assertIsInstance(result, OutputParameter) + self.assertIsInstance(result.data, RunnerResult) + self.assertIn("Max steps (2) reached", result.data.answer) + self.assertEqual(len(runner.step_history), 2) + + def test_forward_training_mode_flag(self): + """Test that forward method sets training mode on planner.""" + from adalflow.optim.parameter import Parameter + + fn = DummyFunction(name="answer_output", _is_answer_final=True, _answer="training_test") + + class TrainingAwarePlanner: + def __init__(self): + self.training = False + + def forward(self, *, prompt_kwargs, model_kwargs=None, id=None): + # Should be called in training mode + assert self.training == True, "Planner should be in training mode" + param = Parameter( + name="planner_output", + data=GeneratorOutput(data=fn), + requires_opt=True, + ) + return param + + def get_prompt(self, **kwargs): + return "test prompt" + + agent = DummyAgent( + planner=TrainingAwarePlanner(), + answer_data_type=None, + tool_manager=MockToolManager() + ) + runner = Runner(agent=agent) + + # This should set planner.training = True and not raise assertion error + result = runner.forward(prompt_kwargs={"input_str": "test"}) + + from adalflow.optim.parameter import OutputParameter + self.assertIsInstance(result, OutputParameter) + + def test_forward_with_conversation_memory(self): + """Test forward method with conversation memory.""" + from adalflow.optim.parameter import Parameter + from adalflow.components.memory.memory import ConversationMemory + + fn = DummyFunction(name="answer_output", _is_answer_final=True, _answer="memory_test") + captured_prompt_kwargs = [] + + class MemoryPlanner: + def __init__(self): + self.training = False + + def forward(self, *, prompt_kwargs, model_kwargs=None, id=None): + captured_prompt_kwargs.append(prompt_kwargs.copy()) + param = Parameter( + name="planner_output", + data=GeneratorOutput(data=fn), + requires_opt=True, + ) + return param + + def get_prompt(self, **kwargs): + return "test prompt" + + memory = ConversationMemory() + agent = DummyAgent( + planner=MemoryPlanner(), + answer_data_type=None, + tool_manager=MockToolManager() + ) + runner = Runner(agent=agent, conversation_memory=memory) + + result = runner.forward(prompt_kwargs={"input_str": "Hello"}) + + # Should have chat_history_str in prompt_kwargs + self.assertEqual(len(captured_prompt_kwargs), 1) + self.assertIn("chat_history_str", captured_prompt_kwargs[0]) + self.assertEqual(captured_prompt_kwargs[0]["chat_history_str"], "") # Empty on first call + + def test_forward_predecessor_chaining(self): + """Test that forward method properly chains predecessors across steps.""" + from adalflow.optim.parameter import Parameter + + fn1 = DummyFunction(name="search", _is_answer_final=False) + fn2 = DummyFunction(name="answer_output", _is_answer_final=True, _answer="predecessor_test") + + class PredecessorPlanner: + def __init__(self): + self.training = False + self.call_count = 0 + + def forward(self, *, prompt_kwargs, model_kwargs=None, id=None): + self.call_count += 1 + if self.call_count == 1: + data = GeneratorOutput(data=fn1) + else: + data = GeneratorOutput(data=fn2) + + param = Parameter( + name=f"step_{self.call_count}", + data=data, + requires_opt=True, + ) + return param + + def get_prompt(self, **kwargs): + return "test prompt" + + agent = DummyAgent( + planner=PredecessorPlanner(), + answer_data_type=None, + tool_manager=MockToolManager() + ) + runner = Runner(agent=agent) + + result = runner.forward(prompt_kwargs={"input_str": "test"}) + + # The final result should have predecessors set up + from adalflow.optim.parameter import OutputParameter + self.assertIsInstance(result, OutputParameter) + self.assertTrue(len(result.predecessors) > 0) # Should have predecessor + + class TestRunnerBugFixes(unittest.TestCase): """Tests for specific bugs that were found and fixed in the runner implementation.""" diff --git a/adalflow/tests/test_runner_trainer.py b/adalflow/tests/test_runner_trainer.py new file mode 100644 index 000000000..75f3381a0 --- /dev/null +++ b/adalflow/tests/test_runner_trainer.py @@ -0,0 +1,650 @@ +import unittest +import unittest.mock +from unittest.mock import Mock, MagicMock, patch +from types import SimpleNamespace +from typing import Dict, Any, Tuple, Callable + +import adalflow as adal +from adalflow.components.agent.runner_trainer import ( + RunnerTrainer, + load_datasets, + default_eval_fn, + default_loss_fn_factory, +) +from adalflow.datasets.types import GSM8KData as Example +from adalflow.core.types import RunnerResult +from adalflow.optim.parameter import Parameter +from adalflow.components.agent.react import ReActAgent + + +class TestRunnerTrainer(unittest.TestCase): + """Tests for the RunnerTrainer class.""" + + def setUp(self): + """Set up test fixtures.""" + # Create mock runner + self.mock_runner = Mock() + self.mock_runner.__class__ = adal.Component + + # Create test sample + self.test_sample = Example( + id="test_1", + question="What is 2 + 2?", + answer="4" + ) + + def test_init_with_defaults(self): + """Test RunnerTrainer initialization with default parameters.""" + trainer = RunnerTrainer(runner=self.mock_runner) + + # Check that defaults are set + self.assertEqual(trainer.task, self.mock_runner) + self.assertIsNotNone(trainer.eval_fn) + self.assertIsNotNone(trainer.loss_fn) + self.assertIsNone(trainer.backward_engine_model_config) + self.assertIsNone(trainer.teacher_model_config) + self.assertIsNone(trainer.text_optimizer_model_config) + self.assertFalse(trainer.original_react_agent) + + def test_init_with_custom_parameters(self): + """Test RunnerTrainer initialization with custom parameters.""" + custom_eval_fn = lambda y, y_gt: 0.8 + custom_loss_fn = Mock() + backward_config = {"model": "test"} + teacher_config = {"teacher": "test"} + text_optimizer_config = {"optimizer": "test"} + + trainer = RunnerTrainer( + runner=self.mock_runner, + eval_fn=custom_eval_fn, + loss_fn=custom_loss_fn, + backward_engine_model_config=backward_config, + teacher_model_config=teacher_config, + text_optimizer_model_config=text_optimizer_config, + original_react_agent=True + ) + + self.assertEqual(trainer.eval_fn, custom_eval_fn) + self.assertEqual(trainer.loss_fn, custom_loss_fn) + self.assertEqual(trainer.backward_engine_model_config, backward_config) + self.assertEqual(trainer.teacher_model_config, teacher_config) + self.assertEqual(trainer.text_optimizer_model_config, text_optimizer_config) + self.assertTrue(trainer.original_react_agent) + + def test_prepare_task_standard_runner(self): + """Test prepare_task method for standard Runner.""" + trainer = RunnerTrainer(runner=self.mock_runner) + + task_fn, kwargs = trainer.prepare_task(self.test_sample) + + self.assertEqual(task_fn, self.mock_runner.__call__) + self.assertIn("prompt_kwargs", kwargs) + self.assertEqual(kwargs["prompt_kwargs"]["input_str"], "What is 2 + 2?") + self.assertEqual(kwargs["id"], "test_1") + + def test_prepare_task_original_react_agent(self): + """Test prepare_task method for original ReActAgent.""" + trainer = RunnerTrainer(runner=self.mock_runner, original_react_agent=True) + + task_fn, kwargs = trainer.prepare_task(self.test_sample) + + self.assertEqual(task_fn, self.mock_runner.__call__) + self.assertIn("input", kwargs) + self.assertEqual(kwargs["input"], "What is 2 + 2?") + self.assertEqual(kwargs["id"], "test_1") + + def test_prepare_eval_standard_runner(self): + """Test prepare_eval method for standard Runner.""" + trainer = RunnerTrainer(runner=self.mock_runner) + + # Create mock RunnerResult + mock_result = RunnerResult(answer="4", step_history=[]) + + eval_fn, kwargs = trainer.prepare_eval(self.test_sample, mock_result) + + self.assertEqual(eval_fn, trainer.eval_fn) + self.assertEqual(kwargs["y"], "4") + self.assertEqual(kwargs["y_gt"], "4") + + def test_prepare_eval_original_react_agent(self): + """Test prepare_eval method for original ReActAgent.""" + trainer = RunnerTrainer(runner=self.mock_runner, original_react_agent=True) + + prediction = "4" + + eval_fn, kwargs = trainer.prepare_eval(self.test_sample, prediction) + + self.assertEqual(eval_fn, trainer.eval_fn) + self.assertEqual(kwargs["y"], "4") + self.assertEqual(kwargs["y_gt"], "4") + + def test_prepare_eval_none_result(self): + """Test prepare_eval method with None result.""" + trainer = RunnerTrainer(runner=self.mock_runner) + + eval_fn, kwargs = trainer.prepare_eval(self.test_sample, None) + + self.assertEqual(eval_fn, trainer.eval_fn) + self.assertEqual(kwargs["y"], "") + self.assertEqual(kwargs["y_gt"], "4") + + def test_prepare_loss_standard_runner(self): + """Test prepare_loss method for standard Runner.""" + trainer = RunnerTrainer(runner=self.mock_runner) + + # Create mock prediction parameter with RunnerResult + mock_result = RunnerResult(answer="4", step_history=[]) + mock_pred = Parameter( + name="pred", + data=mock_result, + requires_opt=True + ) + + loss_fn, kwargs = trainer.prepare_loss(self.test_sample, mock_pred) + + self.assertEqual(loss_fn, trainer.loss_fn) + self.assertIn("kwargs", kwargs) + self.assertIn("y", kwargs["kwargs"]) + self.assertIn("y_gt", kwargs["kwargs"]) + self.assertEqual(kwargs["id"], "test_1") + + # Check that pred.eval_input is set correctly + self.assertEqual(mock_pred.eval_input, "4") + + # Check y_gt parameter + y_gt = kwargs["kwargs"]["y_gt"] + self.assertIsInstance(y_gt, Parameter) + self.assertEqual(y_gt.data, "4") + self.assertFalse(y_gt.requires_opt) + + def test_prepare_loss_original_react_agent(self): + """Test prepare_loss method for original ReActAgent.""" + trainer = RunnerTrainer(runner=self.mock_runner, original_react_agent=True) + + # Create mock prediction parameter with string data + mock_pred = Parameter( + name="pred", + data="4", + requires_opt=True + ) + + loss_fn, kwargs = trainer.prepare_loss(self.test_sample, mock_pred) + + self.assertEqual(loss_fn, trainer.loss_fn) + + # Check that pred.eval_input is set directly to data + self.assertEqual(mock_pred.eval_input, "4") + + @patch('builtins.print') # Mock print to avoid output during tests + def test_prepare_loss_prints_pred(self, mock_print): + """Test that prepare_loss prints the prediction parameter.""" + trainer = RunnerTrainer(runner=self.mock_runner) + + mock_result = RunnerResult(answer="4", step_history=[]) + mock_pred = Parameter( + name="pred", + data=mock_result, + requires_opt=True + ) + + trainer.prepare_loss(self.test_sample, mock_pred) + + # Verify print was called with the prediction + mock_print.assert_called_once_with("pred", mock_pred) + + def test_inheritance_from_adal_component(self): + """Test that RunnerTrainer properly inherits from AdalComponent.""" + trainer = RunnerTrainer(runner=self.mock_runner) + + self.assertIsInstance(trainer, adal.AdalComponent) + self.assertTrue(hasattr(trainer, 'task')) + self.assertTrue(hasattr(trainer, 'eval_fn')) + self.assertTrue(hasattr(trainer, 'loss_fn')) + + +class TestRunnerTrainerHelperFunctions(unittest.TestCase): + """Tests for helper functions in runner_trainer module.""" + + def test_default_eval_fn_exact_match(self): + """Test default_eval_fn with exact matches.""" + result = default_eval_fn("hello", "hello") + self.assertEqual(result, 1.0) + + result = default_eval_fn(" hello ", "hello") + self.assertEqual(result, 1.0) # Should strip whitespace + + result = default_eval_fn(42, "42") + self.assertEqual(result, 1.0) # Should convert to string + + def test_default_eval_fn_no_match(self): + """Test default_eval_fn with non-matches.""" + result = default_eval_fn("hello", "world") + self.assertEqual(result, 0.0) + + result = default_eval_fn("42", "43") + self.assertEqual(result, 0.0) + + def test_default_loss_fn_factory_with_default_eval(self): + """Test default_loss_fn_factory with default eval function.""" + loss_fn = default_loss_fn_factory() + + self.assertIsInstance(loss_fn, adal.EvalFnToTextLoss) + # Check that it has the expected description + self.assertIn("Default evaluation", loss_fn.eval_fn_desc) + + def test_default_loss_fn_factory_with_custom_eval(self): + """Test default_loss_fn_factory with custom eval function.""" + custom_eval = lambda y, y_gt: 0.5 + loss_fn = default_loss_fn_factory(custom_eval) + + self.assertIsInstance(loss_fn, adal.EvalFnToTextLoss) + # The eval_fn should be wrapped in the loss function + self.assertIn("Default evaluation", loss_fn.eval_fn_desc) + + @patch('adalflow.components.agent.runner_trainer.GSM8K') + def test_load_datasets(self, mock_gsm8k_class): + """Test load_datasets function.""" + # Mock the GSM8K constructor + mock_train = Mock() + mock_val = Mock() + mock_test = Mock() + + def mock_gsm8k_side_effect(split, size): + if split == "train": + return mock_train + elif split == "val": + return mock_val + elif split == "test": + return mock_test + + mock_gsm8k_class.side_effect = mock_gsm8k_side_effect + + train_data, val_data, test_data = load_datasets() + + # Check that GSM8K was called with correct parameters + expected_calls = [ + unittest.mock.call(split="train", size=100), + unittest.mock.call(split="val", size=50), + unittest.mock.call(split="test", size=100) + ] + mock_gsm8k_class.assert_has_calls(expected_calls) + + # Check return values + self.assertEqual(train_data, mock_train) + self.assertEqual(val_data, mock_val) + self.assertEqual(test_data, mock_test) + + +class TestRunnerTrainerIntegration(unittest.TestCase): + """Integration tests for RunnerTrainer with mocked dependencies.""" + + def setUp(self): + """Set up integration test fixtures.""" + self.mock_runner = Mock(spec=adal.Component) + self.test_sample = Example( + id="integration_test", + question="What is the capital of France?", + answer="Paris" + ) + + def test_full_workflow_standard_runner(self): + """Test complete workflow with standard Runner.""" + # Create trainer + trainer = RunnerTrainer(runner=self.mock_runner) + + # Test prepare_task + task_fn, task_kwargs = trainer.prepare_task(self.test_sample) + self.assertEqual(task_fn, self.mock_runner.__call__) + + # Simulate runner execution + mock_result = RunnerResult(answer="Paris", step_history=[]) + + # Test prepare_eval + eval_fn, eval_kwargs = trainer.prepare_eval(self.test_sample, mock_result) + + # Execute evaluation + eval_score = eval_fn(**eval_kwargs) + self.assertEqual(eval_score, 1.0) # Should match exactly + + # Test prepare_loss with Parameter + mock_pred = Parameter( + name="prediction", + data=mock_result, + requires_opt=True + ) + + loss_fn, loss_kwargs = trainer.prepare_loss(self.test_sample, mock_pred) + + # Verify loss preparation + self.assertIn("kwargs", loss_kwargs) + self.assertEqual(loss_kwargs["id"], "integration_test") + + def test_full_workflow_original_react_agent(self): + """Test complete workflow with original ReActAgent.""" + # Create trainer for original ReActAgent + trainer = RunnerTrainer(runner=self.mock_runner, original_react_agent=True) + + # Test prepare_task + task_fn, task_kwargs = trainer.prepare_task(self.test_sample) + self.assertIn("input", task_kwargs) + self.assertEqual(task_kwargs["input"], "What is the capital of France?") + + # Test prepare_eval with string prediction + prediction = "Paris" + eval_fn, eval_kwargs = trainer.prepare_eval(self.test_sample, prediction) + + # Execute evaluation + eval_score = eval_fn(**eval_kwargs) + self.assertEqual(eval_score, 1.0) + + # Test prepare_loss + mock_pred = Parameter( + name="prediction", + data="Paris", + requires_opt=True + ) + + loss_fn, loss_kwargs = trainer.prepare_loss(self.test_sample, mock_pred) + + # Verify eval_input is set directly to data + self.assertEqual(mock_pred.eval_input, "Paris") + + def test_error_handling_in_eval(self): + """Test error handling in evaluation methods.""" + # Create trainer with faulty eval function + def faulty_eval(y, y_gt): + raise ValueError("Evaluation error") + + trainer = RunnerTrainer(runner=self.mock_runner, eval_fn=faulty_eval) + + mock_result = RunnerResult(answer="Test", step_history=[]) + eval_fn, eval_kwargs = trainer.prepare_eval(self.test_sample, mock_result) + + # Should propagate the error + with self.assertRaises(ValueError): + eval_fn(**eval_kwargs) + + def test_custom_configurations(self): + """Test that custom configurations are preserved.""" + backward_config = {"model_client": "test_backward"} + teacher_config = {"model_client": "test_teacher"} + text_optimizer_config = {"model_client": "test_optimizer"} + + trainer = RunnerTrainer( + runner=self.mock_runner, + backward_engine_model_config=backward_config, + teacher_model_config=teacher_config, + text_optimizer_model_config=text_optimizer_config + ) + + self.assertEqual(trainer.backward_engine_model_config, backward_config) + self.assertEqual(trainer.teacher_model_config, teacher_config) + self.assertEqual(trainer.text_optimizer_model_config, text_optimizer_config) + + +class TestRunnerTrainerEdgeCases(unittest.TestCase): + """Tests for edge cases and error conditions.""" + + def setUp(self): + """Set up edge case test fixtures.""" + self.mock_runner = Mock(spec=adal.Component) + + def test_empty_answer(self): + """Test handling of empty answers.""" + trainer = RunnerTrainer(runner=self.mock_runner) + + empty_sample = Example( + id="empty_test", + question="Test question?", + answer="" + ) + + # Test with empty RunnerResult answer + mock_result = RunnerResult(answer="", step_history=[]) + eval_fn, eval_kwargs = trainer.prepare_eval(empty_sample, mock_result) + + # Should match empty string + result = eval_fn(**eval_kwargs) + self.assertEqual(result, 1.0) + + def test_none_runner_result_answer(self): + """Test handling of RunnerResult with None answer.""" + trainer = RunnerTrainer(runner=self.mock_runner) + + test_sample = Example( + id="none_test", + question="Test question?", + answer="expected" + ) + + # Test with RunnerResult having None answer + mock_result = RunnerResult(answer=None, step_history=[]) + eval_fn, eval_kwargs = trainer.prepare_eval(test_sample, mock_result) + + # Should use empty string for None answer + self.assertEqual(eval_kwargs["y"], "") + + def test_special_characters_in_answers(self): + """Test handling of special characters in answers.""" + trainer = RunnerTrainer(runner=self.mock_runner) + + special_sample = Example( + id="special_test", + question="What is the result?", + answer="$100.50" + ) + + mock_result = RunnerResult(answer="$100.50", step_history=[]) + eval_fn, eval_kwargs = trainer.prepare_eval(special_sample, mock_result) + + result = eval_fn(**eval_kwargs) + self.assertEqual(result, 1.0) + + def test_whitespace_normalization(self): + """Test that whitespace is properly normalized in evaluation.""" + trainer = RunnerTrainer(runner=self.mock_runner) + + test_sample = Example( + id="whitespace_test", + question="Test?", + answer="answer" + ) + + # Test with extra whitespace + mock_result = RunnerResult(answer=" answer ", step_history=[]) + eval_fn, eval_kwargs = trainer.prepare_eval(test_sample, mock_result) + + result = eval_fn(**eval_kwargs) + self.assertEqual(result, 1.0) # Should match after stripping + + +class TestRunnerTrainerNewRunnerFeatures(unittest.TestCase): + """Tests for RunnerTrainer compatibility with new Runner features.""" + + def setUp(self): + """Set up test fixtures for new Runner features.""" + self.mock_runner = Mock(spec=adal.Component) + self.test_sample = Example( + id="new_feature_test", + question="Test with new runner?", + answer="New runner works" + ) + + def test_runner_trainer_with_training_flag(self): + """Test RunnerTrainer works with Runner that has training parameter.""" + # Create a mock runner that simulates training behavior + mock_runner = Mock(spec=adal.Component) + mock_runner.training = False + + trainer = RunnerTrainer(runner=mock_runner) + + # Test that trainer initialization works with training-capable runners + self.assertEqual(trainer.task, mock_runner) + self.assertIsNotNone(trainer.eval_fn) + self.assertIsNotNone(trainer.loss_fn) + + def test_runner_result_with_new_fields(self): + """Test handling of RunnerResult with potentially new fields.""" + trainer = RunnerTrainer(runner=self.mock_runner) + + # Create RunnerResult with standard fields plus potential new ones + mock_result = RunnerResult( + answer="test answer", + step_history=[], + error=None, + id="test_id_123" + ) + + eval_fn, eval_kwargs = trainer.prepare_eval(self.test_sample, mock_result) + + # Should handle RunnerResult regardless of additional fields + self.assertEqual(eval_fn, trainer.eval_fn) + self.assertEqual(eval_kwargs["y"], "test answer") + self.assertEqual(eval_kwargs["y_gt"], "New runner works") + + def test_parameter_handling_with_new_runner_output(self): + """Test Parameter creation with new Runner output types.""" + trainer = RunnerTrainer(runner=self.mock_runner) + + # Test with OutputParameter (which is now potentially returned by new Runner) + from adalflow.optim.parameter import OutputParameter + + # Create a mock RunnerResult + mock_result = RunnerResult(answer="output parameter test", step_history=[]) + + # Create an OutputParameter wrapping the result (as new Runner might return) + mock_output_param = OutputParameter( + name="runner_output", + data=mock_result, + requires_opt=True + ) + + # Test prepare_loss can handle OutputParameter input + loss_fn, loss_kwargs = trainer.prepare_loss(self.test_sample, mock_output_param) + + self.assertEqual(loss_fn, trainer.loss_fn) + self.assertIn("kwargs", loss_kwargs) + self.assertEqual(loss_kwargs["id"], "new_feature_test") + + # Check eval_input is set correctly for OutputParameter + self.assertEqual(mock_output_param.eval_input, "output parameter test") + + def test_runner_trainer_initialization_with_new_runner_kwargs(self): + """Test RunnerTrainer handles any new Runner constructor parameters gracefully.""" + # Test with various configurations that might be used with new Runner + configs = [ + {"training": True}, + {"conversation_memory": None}, + {"permission_manager": None}, + {"ctx": {"test": "context"}}, + ] + + for config in configs: + with self.subTest(config=config): + # Mock runner should accept any additional parameters + mock_runner = Mock(spec=adal.Component) + for key, value in config.items(): + setattr(mock_runner, key, value) + + trainer = RunnerTrainer(runner=mock_runner) + + # Should initialize successfully regardless of runner's additional features + self.assertIsNotNone(trainer.task) + self.assertIsNotNone(trainer.eval_fn) + self.assertIsNotNone(trainer.loss_fn) + + def test_step_history_handling_with_enhanced_runner(self): + """Test that step history handling works with potentially enhanced Runner.""" + trainer = RunnerTrainer(runner=self.mock_runner) + + # Create mock step history with potentially new StepOutput format + mock_step = Mock() + mock_step.step = 1 + mock_step.function = Mock() + mock_step.function.name = "test_function" + mock_step.observation = "test observation" + + mock_result = RunnerResult( + answer="final answer", + step_history=[mock_step], + error=None + ) + + eval_fn, eval_kwargs = trainer.prepare_eval(self.test_sample, mock_result) + + # Should extract answer correctly regardless of step history format + self.assertEqual(eval_kwargs["y"], "final answer") + + def test_error_handling_with_new_runner_exceptions(self): + """Test error handling with potentially new Runner exception types.""" + trainer = RunnerTrainer(runner=self.mock_runner) + + # Test with RunnerResult containing error information + mock_result = RunnerResult( + answer=None, + step_history=[], + error="New runner error type" + ) + + eval_fn, eval_kwargs = trainer.prepare_eval(self.test_sample, mock_result) + + # Should handle error cases gracefully + self.assertEqual(eval_kwargs["y"], "") # Empty string for None answer + self.assertEqual(eval_kwargs["y_gt"], "New runner works") + + def test_context_preservation_with_new_runner(self): + """Test that context and metadata are preserved with new Runner features.""" + trainer = RunnerTrainer(runner=self.mock_runner) + + # Test prepare_task with additional context that new Runner might use + sample_with_metadata = Example( + id="context_test", + question="Test with context?", + answer="Context preserved" + ) + + task_fn, kwargs = trainer.prepare_task(sample_with_metadata) + + # Should preserve all necessary information for new Runner + self.assertEqual(task_fn, self.mock_runner.__call__) + self.assertIn("prompt_kwargs", kwargs) + self.assertEqual(kwargs["prompt_kwargs"]["input_str"], "Test with context?") + self.assertEqual(kwargs["id"], "context_test") + + def test_backward_compatibility_with_legacy_runner_features(self): + """Test that RunnerTrainer maintains backward compatibility.""" + trainer = RunnerTrainer(runner=self.mock_runner) + + # Test with "old style" runner result + old_style_result = RunnerResult(answer="legacy answer", step_history=[]) + + eval_fn, eval_kwargs = trainer.prepare_eval(self.test_sample, old_style_result) + + # Should work exactly as before + eval_result = eval_fn(**eval_kwargs) + # This should fail since answers don't match, proving evaluation works + self.assertEqual(eval_result, 0.0) # "legacy answer" != "New runner works" + + def test_concurrent_runner_execution_support(self): + """Test that RunnerTrainer supports potential concurrent execution features.""" + trainer = RunnerTrainer(runner=self.mock_runner) + + # Test multiple samples in sequence (simulating batch processing) + samples = [ + Example(id=f"concurrent_{i}", question=f"Question {i}?", answer=f"Answer {i}") + for i in range(3) + ] + + results = [] + for sample in samples: + mock_result = RunnerResult(answer=f"Answer {sample.id.split('_')[1]}", step_history=[]) + eval_fn, eval_kwargs = trainer.prepare_eval(sample, mock_result) + result = eval_fn(**eval_kwargs) + results.append(result) + + # All should evaluate correctly + self.assertEqual(results, [1.0, 1.0, 1.0]) + + +if __name__ == "__main__": + unittest.main() \ No newline at end of file diff --git a/benchmarks/hotpot_qa/adal_exp/train_agent_rag.py b/benchmarks/hotpot_qa/adal_exp/train_agent_rag.py index 370ec97e5..9419a1517 100644 --- a/benchmarks/hotpot_qa/adal_exp/train_agent_rag.py +++ b/benchmarks/hotpot_qa/adal_exp/train_agent_rag.py @@ -3,6 +3,10 @@ import adalflow as adal from adalflow.eval.answer_match_acc import AnswerMatchAcc from adalflow.datasets.types import HotPotQAData +import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent)) from benchmarks.hotpot_qa.config import load_datasets from benchmarks.hotpot_qa.adal_exp.build_multi_hop_rag import AgenticRAG diff --git a/notebooks/tutorials/adalflow_classification_optimization.ipynb b/notebooks/tutorials/adalflow_classification_optimization.ipynb index e83980d14..733630545 100644 --- a/notebooks/tutorials/adalflow_classification_optimization.ipynb +++ b/notebooks/tutorials/adalflow_classification_optimization.ipynb @@ -904,11 +904,13 @@ "provenance": [] }, "kernelspec": { - "display_name": "Python 3", + "display_name": "adalflow-project-Y74mvs4e-py3.12", + "language": "python", "name": "python3" }, "language_info": { - "name": "python" + "name": "python", + "version": "3.12.7" } }, "nbformat": 4, diff --git a/notebooks/tutorials/adalflow_rag_optimization.ipynb b/notebooks/tutorials/adalflow_rag_optimization.ipynb index c05d9ad47..8cca7fd7b 100644 --- a/notebooks/tutorials/adalflow_rag_optimization.ipynb +++ b/notebooks/tutorials/adalflow_rag_optimization.ipynb @@ -497,11 +497,13 @@ "provenance": [] }, "kernelspec": { - "display_name": "Python 3", + "display_name": "adalflow-project-Y74mvs4e-py3.12", + "language": "python", "name": "python3" }, "language_info": { - "name": "python" + "name": "python", + "version": "3.12.7" } }, "nbformat": 4, diff --git a/poetry.lock b/poetry.lock index 308f6d5e3..db6aa3d2b 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.1.4 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand. [[package]] name = "absl-py" @@ -114,7 +114,7 @@ version = "2.6.1" description = "Happy Eyeballs for asyncio" optional = false python-versions = ">=3.9" -groups = ["dev"] +groups = ["main", "dev"] files = [ {file = "aiohappyeyeballs-2.6.1-py3-none-any.whl", hash = "sha256:f349ba8f4b75cb25c99c5c2d84e997e485204d2902a9597802b0371f09331fb8"}, {file = "aiohappyeyeballs-2.6.1.tar.gz", hash = "sha256:c3f9d0113123803ccadfdf3f0faa505bc78e6a72d1cc4806cbd719826e943558"}, @@ -126,7 +126,7 @@ version = "3.12.15" description = "Async http client/server framework (asyncio)" optional = false python-versions = ">=3.9" -groups = ["dev"] +groups = ["main", "dev"] files = [ {file = "aiohttp-3.12.15-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:b6fc902bff74d9b1879ad55f5404153e2b33a82e72a95c89cec5eb6cc9e92fbc"}, {file = "aiohttp-3.12.15-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:098e92835b8119b54c693f2f88a1dec690e20798ca5f5fe5f0520245253ee0af"}, @@ -250,7 +250,7 @@ version = "1.4.0" description = "aiosignal: a list of registered asynchronous callbacks" optional = false python-versions = ">=3.9" -groups = ["dev"] +groups = ["main", "dev"] files = [ {file = "aiosignal-1.4.0-py3-none-any.whl", hash = "sha256:053243f8b92b990551949e63930a839ff0cf0b0ebbe0597b0f3fb19e1a0fe82e"}, {file = "aiosignal-1.4.0.tar.gz", hash = "sha256:f47eecd9468083c2029cc99945502cb7708b082c232f9aca65da147157b251c7"}, @@ -1324,7 +1324,7 @@ version = "4.0.0" description = "HuggingFace community-driven open-source library of datasets" optional = false python-versions = ">=3.9.0" -groups = ["dev"] +groups = ["main", "dev"] files = [ {file = "datasets-4.0.0-py3-none-any.whl", hash = "sha256:7ef95e62025fd122882dbce6cb904c8cd3fbc829de6669a5eb939c77d50e203d"}, {file = "datasets-4.0.0.tar.gz", hash = "sha256:9657e7140a9050db13443ba21cb5de185af8af944479b00e7ff1e00a61c8dbf1"}, @@ -1426,7 +1426,7 @@ version = "0.3.8" description = "serialize all of Python" optional = false python-versions = ">=3.8" -groups = ["dev"] +groups = ["main", "dev"] files = [ {file = "dill-0.3.8-py3-none-any.whl", hash = "sha256:c36ca9ffb54365bdd2f8eb3eff7d2a21237f8452b57ace88b1ac615b7e815bd7"}, {file = "dill-0.3.8.tar.gz", hash = "sha256:3ebe3c479ad625c4553aca177444d89b486b1d84982eeacded644afc0cf797ca"}, @@ -1735,7 +1735,7 @@ version = "3.18.0" description = "A platform independent file lock." optional = false python-versions = ">=3.9" -groups = ["dev"] +groups = ["main", "dev"] files = [ {file = "filelock-3.18.0-py3-none-any.whl", hash = "sha256:c401f4f8377c4464e6db25fff06205fd89bdd83b65eb0488ed1b160f780e21de"}, {file = "filelock-3.18.0.tar.gz", hash = "sha256:adbc88eabb99d2fec8c9c1b229b171f18afa655400173ddc653d5d01501fb9f2"}, @@ -1875,7 +1875,7 @@ version = "1.7.0" description = "A list-like structure which implements collections.abc.MutableSequence" optional = false python-versions = ">=3.9" -groups = ["dev"] +groups = ["main", "dev"] files = [ {file = "frozenlist-1.7.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cc4df77d638aa2ed703b878dd093725b72a824c3c546c076e8fdf276f78ee84a"}, {file = "frozenlist-1.7.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:716a9973a2cc963160394f701964fe25012600f3d311f60c790400b00e568b61"}, @@ -1989,7 +1989,7 @@ version = "2025.3.0" description = "File-system specification" optional = false python-versions = ">=3.8" -groups = ["dev"] +groups = ["main", "dev"] files = [ {file = "fsspec-2025.3.0-py3-none-any.whl", hash = "sha256:efb87af3efa9103f94ca91a7f8cb7a4df91af9f74fc106c9c7ea0efd7277c1b3"}, {file = "fsspec-2025.3.0.tar.gz", hash = "sha256:a935fd1ea872591f2b5148907d103488fc523295e6c64b835cfad8c3eca44972"}, @@ -2291,7 +2291,7 @@ version = "0.21" description = "Simple Python interface for Graphviz" optional = false python-versions = ">=3.9" -groups = ["dev"] +groups = ["main", "dev"] files = [ {file = "graphviz-0.21-py3-none-any.whl", hash = "sha256:54f33de9f4f911d7e84e4191749cac8cc5653f815b06738c54db9a15ab8b1e42"}, {file = "graphviz-0.21.tar.gz", hash = "sha256:20743e7183be82aaaa8ad6c93f8893c923bd6658a04c32ee115edb3c8a835f78"}, @@ -2528,7 +2528,7 @@ version = "1.1.5" description = "Fast transfer of large files with the Hugging Face Hub." optional = false python-versions = ">=3.8" -groups = ["dev"] +groups = ["main", "dev"] markers = "platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"arm64\" or platform_machine == \"aarch64\"" files = [ {file = "hf_xet-1.1.5-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:f52c2fa3635b8c37c7764d8796dfa72706cc4eded19d638331161e82b0792e23"}, @@ -2643,7 +2643,7 @@ version = "0.34.3" description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub" optional = false python-versions = ">=3.8.0" -groups = ["dev"] +groups = ["main", "dev"] files = [ {file = "huggingface_hub-0.34.3-py3-none-any.whl", hash = "sha256:5444550099e2d86e68b2898b09e85878fbd788fc2957b506c6a79ce060e39492"}, {file = "huggingface_hub-0.34.3.tar.gz", hash = "sha256:d58130fd5aa7408480681475491c0abd7e835442082fbc3ef4d45b6c39f83853"}, @@ -3104,6 +3104,25 @@ files = [ {file = "jsonpath_python-1.0.6-py3-none-any.whl", hash = "sha256:1e3b78df579f5efc23565293612decee04214609208a2335884b3ee3f786b575"}, ] +[[package]] +name = "jsonpickle" +version = "4.1.1" +description = "jsonpickle encodes/decodes any Python object to/from JSON" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "jsonpickle-4.1.1-py3-none-any.whl", hash = "sha256:bb141da6057898aa2438ff268362b126826c812a1721e31cf08a6e142910dc91"}, + {file = "jsonpickle-4.1.1.tar.gz", hash = "sha256:f86e18f13e2b96c1c1eede0b7b90095bbb61d99fedc14813c44dc2f361dbbae1"}, +] + +[package.extras] +cov = ["pytest-cov"] +dev = ["black", "pyupgrade"] +docs = ["furo", "rst.linker (>=1.9)", "sphinx (>=3.5)"] +packaging = ["build", "setuptools (>=61.2)", "setuptools_scm[toml] (>=6.0)", "twine"] +testing = ["PyYAML", "atheris (>=2.3.0,<2.4.0) ; python_version < \"3.12\"", "bson", "ecdsa", "feedparser", "gmpy2", "numpy", "pandas", "pymongo", "pytest (>=6.0,!=8.1.*)", "pytest-benchmark", "pytest-benchmark[histogram]", "pytest-checkdocs (>=1.2.3)", "pytest-enabler (>=1.0.1)", "pytest-ruff (>=0.2.1)", "scikit-learn", "scipy (>=1.9.3) ; python_version > \"3.10\"", "scipy ; python_version <= \"3.10\"", "simplejson", "sqlalchemy", "ujson"] + [[package]] name = "jsonpointer" version = "3.0.0" @@ -4214,7 +4233,7 @@ version = "6.6.3" description = "multidict implementation" optional = false python-versions = ">=3.9" -groups = ["dev"] +groups = ["main", "dev"] files = [ {file = "multidict-6.6.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a2be5b7b35271f7fff1397204ba6708365e3d773579fe2a30625e16c4b4ce817"}, {file = "multidict-6.6.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:12f4581d2930840295c461764b9a65732ec01250b46c6b2c510d7ee68872b140"}, @@ -4334,7 +4353,7 @@ version = "0.70.16" description = "better multiprocessing and multithreading in Python" optional = false python-versions = ">=3.8" -groups = ["dev"] +groups = ["main", "dev"] files = [ {file = "multiprocess-0.70.16-pp310-pypy310_pp73-macosx_10_13_x86_64.whl", hash = "sha256:476887be10e2f59ff183c006af746cb6f1fd0eadcfd4ef49e605cbe2659920ee"}, {file = "multiprocess-0.70.16-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:d951bed82c8f73929ac82c61f01a7b5ce8f3e5ef40f5b52553b4f547ce2b08ec"}, @@ -4465,7 +4484,7 @@ version = "3.5" description = "Python package for creating and manipulating graphs and networks" optional = false python-versions = ">=3.11" -groups = ["dev"] +groups = ["main", "dev"] files = [ {file = "networkx-3.5-py3-none-any.whl", hash = "sha256:0030d386a9a06dee3565298b4a734b68589749a544acbb6c412dc9e2489ec6ec"}, {file = "networkx-3.5.tar.gz", hash = "sha256:d4c6f9cf81f52d69230866796b82afbccdec3db7ae4fbd1b65ea750feed50037"}, @@ -5116,7 +5135,7 @@ version = "2.3.1" description = "Powerful data structures for data analysis, time series, and statistics" optional = false python-versions = ">=3.9" -groups = ["dev"] +groups = ["main", "dev"] files = [ {file = "pandas-2.3.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:22c2e866f7209ebc3a8f08d75766566aae02bcc91d196935a1d9e59c7b990ac9"}, {file = "pandas-2.3.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3583d348546201aff730c8c47e49bc159833f971c2899d6097bce68b9112a4f1"}, @@ -5479,7 +5498,7 @@ version = "0.3.2" description = "Accelerated property cache" optional = false python-versions = ">=3.9" -groups = ["dev"] +groups = ["main", "dev"] files = [ {file = "propcache-0.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:22d9962a358aedbb7a2e36187ff273adeaab9743373a272976d2e348d08c7770"}, {file = "propcache-0.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0d0fda578d1dc3f77b6b5a5dce3b9ad69a8250a891760a548df850a5e8da87f3"}, @@ -5678,7 +5697,7 @@ version = "20.0.0" description = "Python library for Apache Arrow" optional = false python-versions = ">=3.9" -groups = ["dev"] +groups = ["main", "dev"] files = [ {file = "pyarrow-20.0.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:c7dd06fd7d7b410ca5dc839cc9d485d2bc4ae5240851bcd45d85105cc90a47d7"}, {file = "pyarrow-20.0.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:d5382de8dc34c943249b01c19110783d0d64b207167c728461add1ecc2db88e4"}, @@ -6079,7 +6098,7 @@ version = "2.9.0.post0" description = "Extensions to the standard Python datetime module" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" -groups = ["dev"] +groups = ["main", "dev"] files = [ {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"}, {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"}, @@ -6136,12 +6155,29 @@ version = "2025.2" description = "World timezone definitions, modern and historical" optional = false python-versions = "*" -groups = ["dev"] +groups = ["main", "dev"] files = [ {file = "pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00"}, {file = "pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3"}, ] +[[package]] +name = "pyvis" +version = "0.3.2" +description = "A Python network graph visualization library" +optional = false +python-versions = ">3.6" +groups = ["main"] +files = [ + {file = "pyvis-0.3.2-py3-none-any.whl", hash = "sha256:5720c4ca8161dc5d9ab352015723abb7a8bb8fb443edeb07f7a322db34a97555"}, +] + +[package.dependencies] +ipython = ">=5.3.0" +jinja2 = ">=2.9.6" +jsonpickle = ">=1.4.1" +networkx = ">=1.11" + [[package]] name = "pywin32" version = "311" @@ -7049,7 +7085,7 @@ version = "1.17.0" description = "Python 2 and 3 compatibility utilities" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" -groups = ["dev"] +groups = ["main", "dev"] files = [ {file = "six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274"}, {file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"}, @@ -7880,7 +7916,6 @@ files = [ {file = "typing_extensions-4.14.1-py3-none-any.whl", hash = "sha256:d1e1e3b58374dc93031d6eda2420a48ea44a36c2b4766a4fdeb3710755731d76"}, {file = "typing_extensions-4.14.1.tar.gz", hash = "sha256:38b39f4aeeab64884ce9f74c94263ef78f3c22467c8724005483154c26648d36"}, ] -markers = {main = "python_version == \"3.11\""} [[package]] name = "typing-inspect" @@ -7919,7 +7954,7 @@ version = "2025.2" description = "Provider of IANA time zone data" optional = false python-versions = ">=2" -groups = ["dev"] +groups = ["main", "dev"] files = [ {file = "tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8"}, {file = "tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9"}, @@ -8220,7 +8255,7 @@ version = "3.5.0" description = "Python binding for xxHash" optional = false python-versions = ">=3.7" -groups = ["dev"] +groups = ["main", "dev"] files = [ {file = "xxhash-3.5.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ece616532c499ee9afbb83078b1b952beffef121d989841f7f4b3dc5ac0fd212"}, {file = "xxhash-3.5.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3171f693dbc2cef6477054a665dc255d996646b4023fe56cb4db80e26f4cc520"}, @@ -8353,7 +8388,7 @@ version = "1.20.1" description = "Yet another URL library" optional = false python-versions = ">=3.9" -groups = ["dev"] +groups = ["main", "dev"] files = [ {file = "yarl-1.20.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:6032e6da6abd41e4acda34d75a816012717000fa6839f37124a47fcefc49bec4"}, {file = "yarl-1.20.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2c7b34d804b8cf9b214f05015c4fee2ebe7ed05cf581e7192c06555c71f4446a"}, @@ -8489,4 +8524,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.1" python-versions = ">=3.11, <4.0" -content-hash = "6fcfca26063930dfdb7d9c8aade6b90293abeb0ceb205861bc52a23f0b52188e" +content-hash = "b7d22133e9fafd9adb0b4c5b65bf36e392c8e975e4fcdee90eeb74288e8971b7" diff --git a/pyproject.toml b/pyproject.toml index f90e32269..fe9579a85 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,9 @@ adalflow = { path = "adalflow", develop = true } black = { extras = ["jupyter"], version = "^24.10.0" } aioitertools = "^0.12.0" asyncstdlib = "^3.13.1" +datasets = "^4.0.0" +pyvis = "^0.3.2" +graphviz = "^0.21" [tool.poetry.group.dev.dependencies] diff --git a/tutorials/v1_agent_runner/runner_trainable_tutorial.py b/tutorials/v1_agent_runner/runner_trainable_tutorial.py new file mode 100644 index 000000000..d6660df68 --- /dev/null +++ b/tutorials/v1_agent_runner/runner_trainable_tutorial.py @@ -0,0 +1,198 @@ +""" +Tutorial: Using Runner.forward() Method to Get Trainable Results + +This tutorial demonstrates how to use the Runner's forward() method to get +trainable outputs that can be used for optimization. The key difference between +runner.call() and runner.forward() is that forward() returns trainable Parameter +objects that maintain gradient information for optimization. +""" + +import adalflow as adal +from adalflow.core.types import RunnerResult +from adalflow.optim.parameter import OutputParameter +from adalflow.utils import setup_env + +setup_env() + + +def create_simple_agent(): + """Create a simple agent for demonstration.""" + # Configure model client + model_config = { + "model_client": adal.AnthropicAPIClient(), + "model_kwargs": { + "model": "claude-3-5-sonnet-20241022", + "temperature": 0.0, + } + } + + # Create agent with simple tools + agent = adal.Agent( + name="demo_agent", + add_llm_as_fallback=True, + max_steps=3, + **model_config + ) + + return agent + + +def example_1_basic_forward_usage(): + """Example 1: Basic usage of runner.forward() method.""" + print("=== Example 1: Basic Forward Usage ===") + + agent = create_simple_agent() + runner = adal.Runner(agent=agent, training=True) # Important: set training=True + + # Use forward() instead of call() to get trainable results + prompt_kwargs = { + "input_str": "What is 2+2? Use the finish action to provide your final answer." + } + + # This returns an OutputParameter with gradient information + trainable_result = runner.forward(prompt_kwargs=prompt_kwargs) + + print(f"Result type: {type(trainable_result)}") + print(f"Is trainable (requires_opt): {trainable_result.requires_opt}") + print(f"Has gradient function: {trainable_result.grad_fn is not None}") + + # Access the actual RunnerResult from the parameter + runner_result: RunnerResult = trainable_result.data + print(f"Final answer: {runner_result.answer}") + print(f"Number of steps: {len(runner_result.step_history)}") + + return trainable_result + + +def example_2_extracting_final_output(): + """Example 2: How to extract the final output from trainable result.""" + print("\n=== Example 2: Extracting Final Output ===") + + agent = create_simple_agent() + runner = adal.Runner(agent=agent, training=True) + + prompt_kwargs = { + "input_str": "Calculate 15 * 3 and explain your reasoning. Use finish to provide your final answer." + } + + # Get trainable result + trainable_result = runner.forward(prompt_kwargs=prompt_kwargs) + + # Method 1: Access through .data attribute + final_answer = trainable_result.data.answer + print(f"Method 1 - Final answer via .data: {final_answer}") + + # Method 2: Access step history for detailed breakdown + step_history = trainable_result.data.step_history + print(f"Method 2 - Step history length: {len(step_history)}") + for i, step in enumerate(step_history): + print(f" Step {i}: {step.function.name if step.function else 'None'} -> {step.observation}") + + # Method 3: Check if there were any errors + if trainable_result.data.error: + print(f"Method 3 - Error occurred: {trainable_result.data.error}") + else: + print("Method 3 - No errors occurred") + + return trainable_result + + +def example_3_comparison_call_vs_forward(): + """Example 3: Comparing runner.call() vs runner.forward().""" + print("\n=== Example 3: Call vs Forward Comparison ===") + + agent = create_simple_agent() + runner = adal.Runner(agent=agent) + + prompt_kwargs = { + "input_str": "What is the capital of France? Use finish to provide your final answer." + } + + # Using call() - returns RunnerResult directly (not trainable) + runner.training = False # Inference mode + call_result = runner(prompt_kwargs=prompt_kwargs) + + print("Call Result:") + print(f" Type: {type(call_result)}") + print(f" Answer: {call_result.answer}") + print(f" Is trainable: No (RunnerResult object)") + + # Using forward() - returns trainable OutputParameter + runner.training = True # Training mode + forward_result = runner.forward(prompt_kwargs=prompt_kwargs) + + print("\nForward Result:") + print(f" Type: {type(forward_result)}") + print(f" Answer: {forward_result.data.answer}") + print(f" Is trainable: {forward_result.requires_opt}") + print(f" Has predecessors: {len(forward_result.predecessors) > 0}") + + return call_result, forward_result + +def example_4_gradient_tracking(): + """Example 5: Understanding gradient tracking in trainable results.""" + print("\n=== Example 4: Gradient Tracking ===") + + agent = create_simple_agent() + runner = adal.Runner(agent=agent, training=True) + + prompt_kwargs = { + "input_str": "Solve: If John has 5 apples and gives away 2, how many does he have left?" + } + + # Get trainable result + trainable_result = runner.forward(prompt_kwargs=prompt_kwargs) + + print("Gradient Information:") + print(f" Has grad_fn: {trainable_result.grad_fn is not None}") + print(f" Requires optimization: {trainable_result.requires_opt}") + print(f" Parameter name: {trainable_result.name}") + print(f" Number of predecessors: {len(trainable_result.predecessors)}") + + # The grad_fn contains backward context for optimization + if trainable_result.grad_fn: + backward_ctx = trainable_result.grad_fn + print(backward_ctx) + print(f" Backward function available: {backward_ctx.backward_fn is not None}") + print(f" Template available: {backward_ctx.kwargs['template'] is not None}") + print(f" Prompt string length: {len(backward_ctx.kwargs['prompt_str']) if backward_ctx.kwargs['prompt_str'] else 0}") + + return trainable_result + + +def main(): + """Run all examples.""" + print("Tutorial: Runner.forward() for Trainable Results") + print("=" * 50) + + try: + # Run examples + example_1_basic_forward_usage() + example_2_extracting_final_output() + example_3_comparison_call_vs_forward() + example_4_gradient_tracking() + + print("\n" + "=" * 50) + print("Tutorial completed successfully!") + print("\nKey takeaways:") + print("1. Use runner.forward() instead of runner.call() for trainable results") + print("2. Set training=True when creating Runner for optimization") + print("3. Forward() returns OutputParameter with gradient information") + print("4. Trainable results can be used with AdalFlow's optimization system") + + except Exception as e: + print(f"Error occurred during tutorial: {e}") + print("Make sure you have:") + print("1. Set up your API key (ANTHROPIC_API_KEY)") + print("2. Installed AdalFlow with all dependencies") + return False + + return True + + +if __name__ == "__main__": + # Set up environment (you may need to configure your API key) + from adalflow.utils import setup_env + setup_env() + + main() \ No newline at end of file diff --git a/use_cases/question_answering/gsm8k/train.py b/use_cases/question_answering/gsm8k/train.py index 63fc2ef40..cf3b764d8 100644 --- a/use_cases/question_answering/gsm8k/train.py +++ b/use_cases/question_answering/gsm8k/train.py @@ -21,8 +21,9 @@ def __init__( backward_engine_model_config: Optional[Dict] = None, teacher_model_config: Optional[Dict] = None, text_optimizer_model_config: Optional[Dict] = None, + config: Optional[Dict] = None, ): - task = GSM8KTask(**gpt_3_model) + task = GSM8KTask(**config) eval_fn = AnswerMatchAcc(type="exact_match").compute_single_item loss_fn = adal.EvalFnToTextLoss( eval_fn=eval_fn, @@ -66,16 +67,29 @@ def prepare_loss( def train(): train_data, val_data, test_data = load_datasets() - task = GSM8KTask(**gpt_3_model) + + # Create Anthropic client configuration instead of GPT-3 + anthropic_config = { + "model_client": adal.AnthropicAPIClient(), + "model_kwargs": { + "model": "claude-3-5-sonnet-20241022", + # "max_tokens": 2000, + "temperature": 0.0, + } + } + + task = GSM8KTask(**anthropic_config) adal_component = StandardTrain( task=task, - backward_engine_model_config=gpt_o3_mini_model, - text_optimizer_model_config=gpt_o3_mini_model, + backward_engine_model_config=anthropic_config, + text_optimizer_model_config=anthropic_config, + config=anthropic_config, ) trainer = adal.Trainer( adaltask=adal_component, strategy="random", - max_steps=10, + # max_steps=10, + max_steps=1, text_optimizers_config_kwargs={"max_past_history": 5}, ) trainer.fit( @@ -83,7 +97,8 @@ def train(): val_dataset=val_data, test_dataset=test_data, debug=False, - resume_from_ckpt="/Users/liyin/.adalflow/ckpt/StandardTrain/random_max_steps_10_57bb8_run_1.json", + # resume_from_ckpt=None, + # resume_from_ckpt="/Users/jinhakim/.adalflow/ckpt/StandardTrain/random_max_steps_1_e35e9_run_1.json", )