diff --git a/adalflow/adalflow/components/agent/agent.py b/adalflow/adalflow/components/agent/agent.py
index 3a939f118..9cfb9ae61 100644
--- a/adalflow/adalflow/components/agent/agent.py
+++ b/adalflow/adalflow/components/agent/agent.py
@@ -132,7 +132,7 @@ def create_default_planner(
     cache_path: Optional[str] = None,
     use_cache: Optional[bool] = False,
     # default agent parameters
-    max_steps: Optional[int] = 10,
+    max_steps: Optional[int] = 3,
     is_thinking_model: Optional[bool] = False,
     answer_data_type: Optional[Type[T]] = str,
     **kwargs,
@@ -259,8 +259,8 @@ def __init__(
         use_cache: Optional[bool] = True,
         # default agent parameters
         answer_data_type: Optional[Type[T]] = str,  # the data type of the final answer
-        max_steps: Optional[int] = 10,
-        is_thinking_model: Optional[bool] = False,  # when thinking model turned on, it disables the CoT field in the output
+        max_steps: Optional[int] = DEFAULT_MAX_STEPS,
+        is_thinking_model: Optional[bool] = False,  # support thinking model in agent
         # for fully customize the agent
         tool_manager: Optional[
             ToolManager
diff --git a/adalflow/adalflow/components/agent/react.py b/adalflow/adalflow/components/agent/react.py
index c5e3d5977..d8295981b 100644
--- a/adalflow/adalflow/components/agent/react.py
+++ b/adalflow/adalflow/components/agent/react.py
@@ -559,7 +559,7 @@ def _run_one_step(
                 )
 
         log.debug(
-            f"Running step {step} with prompt: {self.planner.prompt(**prompt_kwargs)}"
+            f"Running step {step} with prompt: {self.planner.get_prompt(**prompt_kwargs)}"
         )
         try:
 
@@ -682,7 +682,8 @@ def _get_answer(
                 step_history=step_history,
                 id=last_step.data_id,
                 react_agent_task_desc=self.planner.prompt_kwargs[
-                    "react_agent_task_desc"
+                    # "react_agent_task_desc"
+                    "task_desc"
                 ],
             )
 
@@ -720,7 +721,7 @@ def _is_step_output_last_step(self, step_output: StepOutput) -> bool:
     def bicall(
         self,
         input: str,  # open up to the external
-        promt_kwargs: Optional[Dict] = {},
+        prompt_kwargs: Optional[Dict] = {},
         model_kwargs: Optional[Dict] = {},
         id: Optional[str] = None,
     ) -> Union["Parameter", ReActOutput]:
@@ -729,7 +730,7 @@ def bicall(
 
         # set up the prompts
         prompt_kwargs = {
-            **promt_kwargs,
+            **prompt_kwargs,
             "input_str": input,
         }
 
diff --git a/adalflow/adalflow/components/agent/runner.py b/adalflow/adalflow/components/agent/runner.py
index e6aa51818..b6d1e3067 100644
--- a/adalflow/adalflow/components/agent/runner.py
+++ b/adalflow/adalflow/components/agent/runner.py
@@ -20,10 +20,12 @@
 from typing_extensions import TypeAlias
 
 
-from adalflow.optim.parameter import Parameter
+from adalflow.optim.parameter import Parameter, ParameterType, OutputParameter
 from adalflow.utils import printc
 from adalflow.core.component import Component
 from adalflow.components.agent.agent import Agent
+from adalflow.optim.function import BackwardContext
+
 
 from adalflow.core.types import (
     GeneratorOutput,
@@ -53,6 +55,7 @@
     response_span,
     step_span,
 )
+from adalflow.optim.grad_component import GradComponent
 
 
 __all__ = ["Runner"]
@@ -68,9 +71,26 @@
 ]  # Replace with your actual Adalflow dataclass type if available
 
 
+class CombineStepHistoryAndRunnerResult(GradComponent):
+    def __init__(self):
+        super().__init__(desc="Extract the final answer from the step history.")
+
+    def call(
+        self,
+        step_history: List[StepOutput], 
+        runner_result: RunnerResult,
+        task_desc: str,  # skip connection
+        id: Optional[str] = None,
+    ) -> str:
+        if not runner_result:
+            return ""
+        # answer = step_history[-1].observation
+        answer = runner_result.answer
+        return answer
+
 # The runner will create tool call request, add a unique call id.
 # TODO: move this to repo adalflow/agent
-class Runner(Component):
+class Runner(GradComponent):
     """Executes Agent instances with multi-step iterative planning and tool execution.
 
     The Runner orchestrates the execution of an Agent through multiple reasoning and action
@@ -106,6 +126,7 @@ def __init__(
         max_steps: Optional[int] = None, # this will overwrite the agent's max_steps
         permission_manager: Optional[PermissionManager] = None,
         conversation_memory: Optional[ConversationMemory] = None,
+        training: Optional[bool] = False,
         **kwargs,
     ) -> None:
         """Initialize runner with an agent and configuration.
@@ -118,7 +139,9 @@ def __init__(
             permission_manager: Optional permission manager for tool approval
             conversation_memory: Optional conversation memory
         """
-        super().__init__(**kwargs)
+        Component.__init__(self, **kwargs)
+        GradComponent.__init__(self, desc="Generate a response using LLM model.", **kwargs)
+        # creates a backward engine if it is not passed in the kwargs
         self.agent = agent
         self.tool_manager = agent.tool_manager
         self.permission_manager = permission_manager
@@ -137,6 +160,7 @@ def __init__(
         else:
             # overwrite the agent's max_steps
             self.agent.max_steps = max_steps
+
         self.answer_data_type = agent.answer_data_type or str
 
         self.step_history: List[StepOutput] = []
@@ -153,6 +177,11 @@ def __init__(
         self._cancel_callbacks = []
         self._current_task = None  # Track the current running task
         self._current_streaming_result = None  # Track the current streaming result
+        self.training = training
+
+        # combine step history
+        self.combine_step_history_and_runner_result = CombineStepHistoryAndRunnerResult()
+
 
         # support thinking model
         self.is_thinking_model = agent.is_thinking_model if hasattr(agent, 'is_thinking_model')  else False
@@ -184,8 +213,6 @@ def set_permission_manager(
         if permission_manager is not None:
             permission_manager.set_tool_manager(self.tool_manager)
 
-
-
     def is_cancelled(self) -> bool:
         """Check if execution has been cancelled."""
         return self._cancelled
@@ -207,15 +234,19 @@ async def cancel(self) -> None:
         self._cancelled = True
 
         # Try to emit a test event if we have a streaming result
-        if hasattr(self, '_current_streaming_result') and self._current_streaming_result:
+        if (
+            hasattr(self, "_current_streaming_result")
+            and self._current_streaming_result
+        ):
             try:
                 cancel_received_event = RunItemStreamEvent(
                     name="runner.cancel_received",
                     item=FinalOutputItem(
                         data={
-                        "status": "cancel_received",
-                        "message": "Cancel request received",
-                    })
+                            "status": "cancel_received",
+                            "message": "Cancel request received",
+                        }
+                    ),
                 )
                 self._current_streaming_result.put_nowait(cancel_received_event)
                 log.info("Emitted cancel_received event")
@@ -235,10 +266,7 @@ async def _wait_for_cancellation(self):
         if self._current_task:
             try:
                 # Wait up to 1 second for task to cancel gracefully
-                await asyncio.wait_for(
-                    self._current_task,
-                    timeout=1.0
-                )
+                await asyncio.wait_for(self._current_task, timeout=1.0)
             except (asyncio.TimeoutError, asyncio.CancelledError):
                 # Task didn't cancel in time or was cancelled - that's ok
                 pass
@@ -256,16 +284,69 @@ def _get_final_answer(self, function: Function) -> Any:
             return self._process_data(function._answer)
         return None
 
-
-    def _create_runner_result(self, answer: Any, step_history, error: Optional[str] = None,  ) -> RunnerResult:
+    def _create_runner_result(
+        self,
+        answer: Any,
+        step_history,
+        error: Optional[str] = None,
+        id: Optional[str] = None,
+    ) -> RunnerResult:
         """Create a RunnerResult object with the final answer and error."""
         return RunnerResult(
             answer=answer,
             step_history=step_history.copy(),
             error=error,
+            id=id,
             # ctx=self.ctx,
         )
-    def _create_execution_complete_stream_event(self, streaming_result: RunnerStreamingResult, final_output_item: FinalOutputItem):
+
+    def _create_trainable_runner_result(
+        self,
+        runner_result: RunnerResult,
+        name: str,
+        previous_output: Parameter,
+        prompt_kwargs: Dict[str, Any],
+        id: Optional[str] = None,
+    ) -> OutputParameter:
+        """Helper function to create a trainable OutputParameter with proper grad_fn setup.
+        
+        Args:
+            runner_result: The RunnerResult data
+            name: Name for the parameter
+            previous_output: The predecessor parameter to link to
+            prompt_kwargs: Original prompt arguments for gradient computation
+            id: Optional identifier
+            
+        Returns:
+            OutputParameter with grad_fn properly configured
+        """
+        runner_result_parameter = OutputParameter(
+            name=name,
+            data=runner_result,
+            param_type=ParameterType.OUTPUT,
+            requires_opt=True,
+        )
+        
+        runner_result_parameter.set_predecessors([previous_output])
+
+        runner_result_parameter.set_grad_fn(
+            BackwardContext(
+                backward_fn=self.backward,
+                backward_engine=self.backward_engine,
+                response=runner_result_parameter,
+                prompt_kwargs=prompt_kwargs,
+                template=getattr(self.agent.planner, 'template', None),
+                prompt_str=self.agent.planner.get_prompt(**prompt_kwargs),
+                disable_backward_engine=self._disable_backward_engine,
+                id=id,
+            )
+        )
+
+        return runner_result_parameter
+
+    def _create_execution_complete_stream_event(
+        self, streaming_result: RunnerStreamingResult, final_output_item: FinalOutputItem
+    ):
         """Complete the streaming execution by adding a sentinel."""
         final_output_event = RunItemStreamEvent(
             name="agent.execution_complete",
@@ -292,8 +373,14 @@ def _add_assistant_response_to_memory(self, final_output_item: FinalOutputItem):
                 )
             )
 
-    def create_response_span(self, runner_result, step_count: int, streaming_result: RunnerStreamingResult, runner_span_instance, workflow_status: str = "stream_completed"):
-
+    def create_response_span(
+        self,
+        runner_result,
+        step_count: int,
+        streaming_result: RunnerStreamingResult,
+        runner_span_instance,
+        workflow_status: str = "stream_completed",
+    ):
         runner_span_instance.span_data.update_attributes(
             {
                 "steps_executed": step_count + 1,
@@ -316,15 +403,13 @@ def create_response_span(self, runner_result, step_count: int, streaming_result:
         ):
             pass
 
-
-
-
     async def _process_stream_final_step(
         self,
         answer: Any,
         step_count: int,
         streaming_result,
-        runner_span_instance
+        runner_span_instance,
+        id: Optional[str] = None,
     ) -> FinalOutputItem:
         """Process the final step and trace it."""
         # processed_data = self._get_final_answer(function)
@@ -335,6 +420,7 @@ async def _process_stream_final_step(
         runner_result = self._create_runner_result(
             answer=answer,
             step_history=self.step_history,
+            id=id,
         )
 
         # Update runner span with final results
@@ -404,7 +490,9 @@ def _process_data(
                     except json.JSONDecodeError as e:
                         raise ValueError(f"Invalid JSON in data: {e}")
                     if not isinstance(data, dict):
-                        raise ValueError(f"Expected dict after JSON parsing, got {type(data)}")
+                        raise ValueError(
+                            f"Expected dict after JSON parsing, got {type(data)}"
+                        )
                 log.info(
                     f"initial answer after being evaluated using json: {data}, type: {type(data)}"
                 )
@@ -461,7 +549,7 @@ def call(
         model_kwargs: Optional[Dict[str, Any]] = None,
         use_cache: Optional[bool] = None,
         id: Optional[str] = None,  # global run id
-    ) -> RunnerResult:
+    ) -> Union[RunnerResult, OutputParameter]:
         """Execute the planner synchronously for multiple steps with function calling support.
 
         At the last step the action should be set to "finish" instead which terminates the sequence
@@ -565,6 +653,7 @@ def call(
                             last_output = RunnerResult(
                                 answer=processed_data,
                                 step_history=self.step_history.copy(),
+                                id=id,
                                 # ctx=self.ctx,
                             )
 
@@ -696,8 +785,182 @@ def call(
                 answer=current_error or f"No output generated after {step_count} steps (max_steps: {self.max_steps})",
                 step_history=self.step_history.copy(),
                 error=current_error,
+                id=id,
             )
 
+    # trainable version of Runner call 
+    def forward(
+        self,
+        prompt_kwargs: Dict[str, Any],
+        model_kwargs: Optional[Dict[str, Any]] = None,
+        use_cache: Optional[bool] = None,
+        id: Optional[str] = None,
+    ) -> Parameter:
+        """Trainable version of call that chains generator outputs and returns a trainable Parameter.
+        
+        Each generator output becomes a trainable predecessor for the next one, and the final
+        result is wrapped in a trainable Parameter.
+        """
+        # Initialize tracking variables
+        self.agent.planner.training = True
+
+        previous_output = None
+        step_count = 0
+        self.step_history = []
+        
+        # Set up the initial prompt
+        prompt_kwargs = prompt_kwargs.copy() if prompt_kwargs else {}
+        prompt_kwargs["step_history"] = self.step_history
+        
+        if self.use_conversation_memory:
+            self.conversation_memory.reset_pending_query()
+            prompt_kwargs["chat_history_str"] = self.conversation_memory()
+            query_metadata = {"context_str": prompt_kwargs.get("context_str", None)}
+            self.conversation_memory.add_user_query(
+                UserQuery(
+                    query_str=prompt_kwargs.get("input_str", None),
+                    metadata=query_metadata,
+                )
+            )
+        
+        prompt_kwargs["max_steps"] = self.max_steps
+        model_kwargs = model_kwargs.copy() if model_kwargs else {}
+        
+        # Main training loop
+        while step_count < self.max_steps:
+            try:
+                # Get the next generator output
+                current_output = self.agent.planner.forward(
+                    prompt_kwargs=prompt_kwargs,
+                    model_kwargs=model_kwargs,
+                    # use_cache=use_cache, # TODO no use_cache method for forward
+                    id=id,
+                    # predecessor=previous_output  # Chain to previous output
+                )
+                printc(
+                    f"agent planner prompt call: {self.agent.planner.get_prompt(**prompt_kwargs)}"
+                )
+
+                
+                # If this is the first step, we need to create a trainable parameter
+                trainable_output = current_output 
+                trainable_output.name = f"step_{step_count}"
+                trainable_output.requires_opt = True
+                if previous_output is not None:
+                    # Chain to previous trainable output
+                    # TODO make cleaner 
+                    if trainable_output.predecessors is None:
+                        trainable_output.predecessors = set()
+                    trainable_output.predecessors.add(previous_output)
+
+                previous_output = trainable_output
+
+                generator_output = current_output.data
+
+                printc(f"planner output: {generator_output}")
+                
+                # Process the function call if needed
+                if hasattr(generator_output, 'data') and isinstance(generator_output.data, Function):
+                    function = generator_output.data
+                    function.id = str(uuid.uuid4())
+                    
+                    # Handle final step
+                    if self._check_last_step(function):
+                        printc(f"Function answer: {function._answer}")
+                        processed_data = self._process_data(function._answer)
+                        printc(f"Final processed_data: {processed_data}")
+                        runner_result = RunnerResult(
+                            answer=processed_data,
+                            step_history=self.step_history.copy(),
+                            id=id,
+                        )
+                        return self._create_trainable_runner_result(
+                            runner_result=runner_result,
+                            name="Runner Result",
+                            previous_output=previous_output,
+                            prompt_kwargs=prompt_kwargs,
+                            id=id,
+                        )
+
+                        # return self.combine_step_history_and_runner_result(
+                        #     step_history=self.step_history,
+                        #     runner_result=runner_result,
+                        #     id=id,
+                        #     task_desc=self.agent.planner.prompt_kwargs["task_desc"],
+                        # )
+                    
+                    # Execute the function and update context
+                    function_results = self._tool_execute_sync(function)
+                    step_output = StepOutput(
+                        step=step_count,
+                        action=function,
+                        function=function,
+                        observation=function_results.output.observation if hasattr(function_results.output, 'observation') else function_results.output,
+                    )
+                    
+                    self.step_history.append(step_output)
+                    prompt_kwargs["step_history"] = self.step_history
+
+                # Update for next iteration
+                step_count += 1
+                
+            except Exception as e:
+                error_msg = f"Error in step {step_count}: {str(e)}"
+                log.error(error_msg)
+                runner_result = RunnerResult(
+                    answer=error_msg,
+                    step_history=self.step_history.copy(),
+                    error=error_msg,
+                    id=id,
+                )
+                return self._create_trainable_runner_result(
+                    runner_result=runner_result,
+                    name="Runner Error",
+                    previous_output=previous_output,
+                    prompt_kwargs=prompt_kwargs,
+                    id=id,
+                )
+                # return self.combine_step_history_and_runner_result(
+                #     step_history=self.step_history,
+                #     runner_result=runner_result,
+                #     id=id,
+                #     task_desc=self.agent.planner.prompt_kwargs["task_desc"],
+                # )
+        
+        # If we exit the loop without a final result
+        runner_result = RunnerResult(
+            answer=f"Max steps ({self.max_steps}) reached without completion",
+            step_history=self.step_history.copy(),
+            error="Max steps reached",
+            id=id,
+        )
+
+        runner_result_parameter = self._create_trainable_runner_result(
+            runner_result=runner_result,
+            name="Runner Incomplete",
+            previous_output=previous_output,
+            prompt_kwargs=prompt_kwargs,
+            id=id,
+        )
+
+        return runner_result_parameter
+
+        # return self.combine_step_history_and_runner_result(
+        #     step_history=step_history,
+        #     runner_result=runner_result,
+        #     id=id,
+        #     task_desc=self.agent.planner.prompt_kwargs["task_desc"],
+        # )
+
+    
+    def __call__(self, *args, **kwargs) -> Union[Parameter, RunnerResult]:
+        if self.training:
+            log.debug("Training mode")
+            return self.forward(*args, **kwargs)
+        else:
+            log.debug("Inference mode")
+            return self.call(*args, **kwargs)
+    
     def _tool_execute_sync(
         self,
         func: Function,
@@ -852,6 +1115,7 @@ async def acall(
                             last_output = RunnerResult(
                                 answer=processed_data,
                                 step_history=self.step_history.copy(),
+                                id=id,
                                 # ctx=self.ctx,
                             )
 
@@ -989,6 +1253,7 @@ async def acall(
                 answer=current_error or f"No output generated after {step_count} steps (max_steps: {self.max_steps})",
                 step_history=self.step_history.copy(),
                 error=current_error,
+                id=id,
             )
 
     def astream(
@@ -1226,6 +1491,7 @@ async def impl_astream(
                                     step_count=step_count,
                                     streaming_result=streaming_result,
                                     runner_span_instance=runner_span_instance,
+                                    id=id,
                                 )
                                 stop_the_loop = True
                                 workflow_status = "stream_completed"
@@ -1375,6 +1641,7 @@ async def impl_astream(
                     answer=f"No output generated after {step_count} steps (max_steps: {self.max_steps})",
                     error=current_error,
                     step_history=self.step_history.copy(),
+                    id=id,
 
                 )
                 final_output_item = FinalOutputItem(data=runner_result)
@@ -1397,6 +1664,7 @@ async def impl_astream(
                 answer=final_output_item.data.answer if final_output_item.data else None,
                 step_history=self.step_history.copy(),
                 error=current_error,
+                id=id,
             )
 
             self._create_execution_complete_stream_event(
diff --git a/adalflow/adalflow/components/agent/runner_trainer.py b/adalflow/adalflow/components/agent/runner_trainer.py
new file mode 100644
index 000000000..12f41be43
--- /dev/null
+++ b/adalflow/adalflow/components/agent/runner_trainer.py
@@ -0,0 +1,219 @@
+from re import A
+from typing import Any, Callable, Dict, Optional, Tuple, List
+import adalflow as adal
+from adalflow.datasets.types import GSM8KData as Example
+from adalflow.datasets.gsm8k import GSM8K
+from adalflow.eval.answer_match_acc import AnswerMatchAcc
+from adalflow.core.types import RunnerResult
+from adalflow.optim.optimizer import Optimizer
+from adalflow.components.agent.react import ReActAgent
+from adalflow.optim.parameter import Parameter, ParameterType
+from adalflow.core.func_tool import FunctionTool
+
+
+def load_datasets():
+    train_data = GSM8K(split="train", size=100)
+    val_data = GSM8K(split="val", size=50)
+    test_data = GSM8K(split="test", size=100)
+    return train_data, val_data, test_data
+
+
+def default_eval_fn(y: str, y_gt: str) -> float:
+    """Default dummy evaluation function that performs exact string match."""
+    return 1.0 if str(y).strip() == str(y_gt).strip() else 0.0
+
+
+def default_loss_fn_factory(eval_fn: Callable = None):
+    """Factory function to create default loss function."""
+    if eval_fn is None:
+        eval_fn = default_eval_fn
+    return adal.EvalFnToTextLoss(
+        eval_fn=eval_fn,
+        eval_fn_desc="Default evaluation: 1 if str(y) == str(y_gt) else 0",
+    )
+
+
+class RunnerTrainer(adal.AdalComponent):
+    """Generic trainer that accepts a Runner model and allows custom eval_fn and loss_fn."""
+
+    def __init__(
+        self,
+        runner: adal.Component,
+        eval_fn: Optional[Callable] = None,
+        loss_fn: Optional[Callable] = None,
+        backward_engine_model_config: Optional[Dict] = None,
+        teacher_model_config: Optional[Dict] = None,
+        text_optimizer_model_config: Optional[Dict] = None,
+        original_react_agent: Optional[ReActAgent] = False, 
+    ):
+        # Use provided eval_fn or create default
+        if eval_fn is None:
+            eval_fn = default_eval_fn
+        
+        # Use provided loss_fn or create default
+        if loss_fn is None:
+            loss_fn = default_loss_fn_factory(eval_fn)
+        
+        super().__init__(task=runner, eval_fn=eval_fn, loss_fn=loss_fn)
+
+        self.backward_engine_model_config = backward_engine_model_config
+        self.teacher_model_config = teacher_model_config
+        self.text_optimizer_model_config = text_optimizer_model_config
+        self.original_react_agent = original_react_agent
+
+    def prepare_task(self, sample: Example) -> Tuple[Callable, Dict[str, Any]]:
+        # Runner.call expects prompt_kwargs parameter, not direct keyword args
+        # prepare task should not return in training mode
+        # return self.task.forward, {"prompt_kwargs": {"input_str": sample.question}, "id": sample.id} 
+        if self.original_react_agent:
+            return self.task.__call__, {"input": sample.question, "id": sample.id}
+        return self.task.__call__, {"prompt_kwargs": {"input_str": sample.question}, "id": sample.id}
+
+    def prepare_eval(
+        self, sample: Example, y_pred: RunnerResult
+    ) -> Tuple[float, Dict[str, Any]]:
+        y_label = ""
+
+        if not self.original_react_agent and y_pred is not None and y_pred.answer is not None:
+            y_label = y_pred.answer
+        elif self.original_react_agent:
+            y_label = y_pred
+        return self.eval_fn, {"y": y_label, "y_gt": sample.answer}
+
+    def prepare_loss(
+        self, sample: Example, pred: adal.Parameter
+    ) -> Tuple[Callable, Dict[str, Any]]:
+        print("pred", pred)
+        y_gt = adal.Parameter(
+            name="y_gt",
+            data=sample.answer,
+            eval_input=sample.answer,
+            requires_opt=False,
+        )
+
+        if self.original_react_agent:
+            pred.eval_input = pred.data
+        else: 
+            pred.eval_input = pred.data.answer
+
+        return self.loss_fn, {"kwargs": {"y": pred, "y_gt": y_gt}, "id": sample.id}
+
+
+def train_original_react():
+    from adalflow.utils import setup_env
+
+    setup_env() 
+    train_data, val_data, test_data = load_datasets()
+    
+    # Create Anthropic client configuration instead of GPT-3
+    anthropic_config = {
+        "model_client": adal.AnthropicAPIClient(),
+        "model_kwargs": {
+            "model": "claude-3-5-sonnet-20241022",
+            # "max_tokens": 2000,
+            "temperature": 0.0,
+        }
+    }
+    
+    def llm_as_tool(input: str, id: Optional[str] = None) -> str:
+        """Used as a calculator tool."""
+        printc(f"llm_as_tool: {input}", color="yellow")
+
+        return self.llm_tool(prompt_kwargs={"input_str": input}, id=id)
+
+    react_agent = ReActAgent(
+        tools=[FunctionTool(llm_as_tool)],
+        max_steps=6,
+        add_llm_as_fallback=True,
+        **anthropic_config
+    )
+    
+    # Create specific eval_fn and loss_fn like in GSM8K train.py
+    eval_fn = AnswerMatchAcc(type="exact_match").compute_single_item
+    loss_fn = adal.EvalFnToTextLoss(
+        eval_fn=eval_fn,
+        eval_fn_desc="exact_match: 1 if str(y) == str(y_gt) else 0",
+    )
+    
+    adal_component = RunnerTrainer(
+        runner=react_agent,
+        eval_fn=eval_fn,
+        loss_fn=loss_fn,
+        backward_engine_model_config=anthropic_config,
+        text_optimizer_model_config=anthropic_config,
+        original_react_agent=True,
+    )
+    trainer = adal.Trainer(
+        adaltask=adal_component,
+        strategy="random",
+        max_steps=10,
+        text_optimizers_config_kwargs={"max_past_history": 5},
+    )
+    trainer.fit(
+        train_dataset=train_data,
+        val_dataset=val_data,
+        test_dataset=test_data,
+        debug=False, 
+    )
+
+def train():
+    from adalflow.utils import setup_env
+
+    setup_env() 
+    train_data, val_data, test_data = load_datasets()
+    
+    # Create Anthropic client configuration instead of GPT-3
+    anthropic_config = {
+        "model_client": adal.AnthropicAPIClient(),
+        "model_kwargs": {
+            "model": "claude-3-5-sonnet-20241022",
+            # "max_tokens": 2000,
+            "temperature": 0.0,
+        }
+    }
+    
+    # Create Agent first, then Runner  
+    agent = adal.Agent(
+        name="gsm8k_agent",
+        add_llm_as_fallback=True,
+        max_steps = 6, 
+        **anthropic_config
+    )
+    
+    runner = adal.Runner(agent=agent, training=True)
+    
+    # Create specific eval_fn and loss_fn like in GSM8K train.py
+    eval_fn = AnswerMatchAcc(type="exact_match").compute_single_item
+    loss_fn = adal.EvalFnToTextLoss(
+        eval_fn=eval_fn,
+        eval_fn_desc="exact_match: 1 if str(y) == str(y_gt) else 0",
+    )
+    
+    adal_component = RunnerTrainer(
+        runner=runner,
+        eval_fn=eval_fn,
+        loss_fn=loss_fn,
+        backward_engine_model_config=anthropic_config,
+        text_optimizer_model_config=anthropic_config,
+    )
+    trainer = adal.Trainer(
+        adaltask=adal_component,
+        strategy="random",
+        max_steps=10,
+        # resume_from_ckpt="./Users/jinhakim/.adalflow/ckpt/RunnerTrainer/random_max_steps_5_2bd11_run_1.json",
+        text_optimizers_config_kwargs={"max_past_history": 5},
+    )
+    trainer.fit(
+        train_dataset=train_data,
+        val_dataset=val_data,
+        test_dataset=test_data,
+        debug=False, 
+        # resume_from_ckpt="./Users/jinhakim/.adalflow/ckpt/RunnerTrainer/random_max_steps_2_cc5fb_run_1.json",
+    )
+
+    # feedback, visualizatio n
+
+
+if __name__ == "__main__":
+    train()
+    # train_original_react()
\ No newline at end of file
diff --git a/adalflow/adalflow/core/functional.py b/adalflow/adalflow/core/functional.py
index 426a54231..9132ce311 100644
--- a/adalflow/adalflow/core/functional.py
+++ b/adalflow/adalflow/core/functional.py
@@ -84,8 +84,17 @@ def _asdict_inner(obj, dict_factory, exclude):
             )
             for k, v in obj.items()
         )
+    elif isinstance(obj, set):
+        # Handle sets specifically - preserve them as sets
+        return type(obj)(_asdict_inner(v, dict_factory, exclude) for v in obj)
     else:
-        return obj
+        # Check if the object is JSON serializable
+        try:
+            json.dumps(obj)
+            return obj
+        except: 
+            # If not JSON serializable, convert to string representation
+            return str(obj)
         # return deepcopy(obj)
 
 
diff --git a/adalflow/adalflow/core/generator.py b/adalflow/adalflow/core/generator.py
index f2edc205f..f4bbbb2a5 100644
--- a/adalflow/adalflow/core/generator.py
+++ b/adalflow/adalflow/core/generator.py
@@ -162,6 +162,7 @@ def __init__(
         CachedEngine.__init__(self, cache_path=self.cache_path)
 
         Component.__init__(self)
+        # creates a backward engine if none is called 
         GradComponent.__init__(self, desc="Generate a response using LLM model.")
         CallbackManager.__init__(self)
 
@@ -739,7 +740,7 @@ def data_to_prompt_map_fn(data: Parameter) -> str:
         # **** end of the special to the generator ****
 
         # if not self.backward_engine:
-        #     # self.set_backward_engine()
+        #     self.set_backward_engine()
         #     log.debug(f"Backward engine: {self.backward_engine}")
 
         # attach a funtion to compute gradient for predecessors
@@ -778,7 +779,7 @@ def backward(
         log.debug(
             f"backward pass setup: {backward_pass_setup}, name: {self.name}",
             color="red",
-        )
+        ) 
 
         children_params = response.predecessors
         is_intermediate_node = True
diff --git a/adalflow/adalflow/core/types.py b/adalflow/adalflow/core/types.py
index 024b3c691..4e81bb66f 100644
--- a/adalflow/adalflow/core/types.py
+++ b/adalflow/adalflow/core/types.py
@@ -1436,6 +1436,10 @@ class RunnerResult:
         metadata={"desc": "The context of the execution"},
         default=None,
     )
+    id: Optional[str] = field(
+        metadata={"desc": "The id of the execution"},
+        default=None,
+    )
 
 
 @dataclass
diff --git a/adalflow/adalflow/datasets/hotpot_qa.py b/adalflow/adalflow/datasets/hotpot_qa.py
index afec6cff1..c809ee72e 100644
--- a/adalflow/adalflow/datasets/hotpot_qa.py
+++ b/adalflow/adalflow/datasets/hotpot_qa.py
@@ -11,7 +11,6 @@
 from adalflow.core.base_data_class import DataClass
 from adalflow.datasets.types import HotPotQAData
 
-
 class HotPotQA(Dataset):
     def __init__(
         self,
diff --git a/adalflow/adalflow/optim/parameter.py b/adalflow/adalflow/optim/parameter.py
index 7fb68ddd4..1275a3185 100644
--- a/adalflow/adalflow/optim/parameter.py
+++ b/adalflow/adalflow/optim/parameter.py
@@ -865,6 +865,8 @@ def draw_interactive_html_graph(
 
         # Add nodes to the graph
         node_ids = set()
+        if nodes is None:
+            nodes = []
         for node in nodes:
             self.generate_node_html(node, output_dir=filepath)
 
diff --git a/adalflow/tests/test_runner.py b/adalflow/tests/test_runner.py
index 39f24051a..58d457aaf 100644
--- a/adalflow/tests/test_runner.py
+++ b/adalflow/tests/test_runner.py
@@ -816,6 +816,329 @@ def test_conversation_memory_clear_streaming(self):
         asyncio.run(self._test_conversation_memory_clear_streaming())
 
 
+class TestRunnerForward(unittest.TestCase):
+    """Tests for the Runner.forward method (trainable mode)."""
+
+    def setUp(self):
+        """Set up test fixtures."""
+        self.runner = Runner(
+            agent=DummyAgent(
+                planner=None, answer_data_type=None, tool_manager=MockToolManager()
+            )
+        )
+
+    def test_forward_single_step_final_answer(self):
+        """Test forward method with single step that produces final answer."""
+        from adalflow.optim.parameter import Parameter
+        
+        fn = DummyFunction(name="answer_output", _is_answer_final=True, _answer="forward_answer")
+        
+        class TrainablePlanner:
+            def __init__(self):
+                self.training = False
+                
+            def forward(self, *, prompt_kwargs, model_kwargs=None, id=None):
+                # Return a Parameter wrapping GeneratorOutput 
+                param = Parameter(
+                    name="planner_output",
+                    data=GeneratorOutput(data=fn),
+                    requires_opt=True,
+                )
+                return param
+                
+            def get_prompt(self, **kwargs):
+                return "test prompt"
+        
+        agent = DummyAgent(
+            planner=TrainablePlanner(),
+            answer_data_type=None,
+            tool_manager=MockToolManager()
+        )
+        runner = Runner(agent=agent)
+        
+        result = runner.forward(prompt_kwargs={"input_str": "test"})
+        
+        # Should return OutputParameter with RunnerResult
+        from adalflow.optim.parameter import OutputParameter
+        self.assertIsInstance(result, OutputParameter)
+        self.assertIsInstance(result.data, RunnerResult)
+        self.assertEqual(result.data.answer, "forward_answer")
+        self.assertTrue(result.requires_opt)
+
+    def test_forward_multi_step_execution(self):
+        """Test forward method with multiple steps before final answer."""
+        from adalflow.optim.parameter import Parameter
+        
+        fn1 = DummyFunction(name="search", _is_answer_final=False)
+        fn2 = DummyFunction(name="answer_output", _is_answer_final=True, _answer="multi_step_answer")
+        
+        class MultiStepTrainablePlanner:
+            def __init__(self):
+                self.training = False
+                self.call_count = 0
+                
+            def forward(self, *, prompt_kwargs, model_kwargs=None, id=None):
+                self.call_count += 1
+                if self.call_count == 1:
+                    data = GeneratorOutput(data=fn1)
+                else:
+                    data = GeneratorOutput(data=fn2)
+                
+                param = Parameter(
+                    name=f"planner_output_{self.call_count}",
+                    data=data,
+                    requires_opt=True,
+                )
+                return param
+                
+            def get_prompt(self, **kwargs):
+                return "test prompt"
+        
+        agent = DummyAgent(
+            planner=MultiStepTrainablePlanner(),
+            answer_data_type=None,
+            tool_manager=MockToolManager()
+        )
+        runner = Runner(agent=agent)
+        
+        result = runner.forward(prompt_kwargs={"input_str": "test"})
+        
+        # Should return OutputParameter with RunnerResult
+        from adalflow.optim.parameter import OutputParameter
+        self.assertIsInstance(result, OutputParameter)
+        self.assertIsInstance(result.data, RunnerResult)
+        self.assertEqual(result.data.answer, "multi_step_answer")
+        self.assertEqual(len(runner.step_history), 1)  # Only non-final step in history
+
+    def test_forward_parameter_chaining(self):
+        """Test that forward method properly chains parameters."""
+        from adalflow.optim.parameter import Parameter
+        
+        fn = DummyFunction(name="answer_output", _is_answer_final=True, _answer="chained_answer")
+        
+        class ChainingPlanner:
+            def __init__(self):
+                self.training = False
+                self.call_count = 0
+                
+            def forward(self, *, prompt_kwargs, model_kwargs=None, id=None):
+                self.call_count += 1
+                param = Parameter(
+                    name=f"step_{self.call_count}",
+                    data=GeneratorOutput(data=fn),
+                    requires_opt=True,
+                )
+                return param
+                
+            def get_prompt(self, **kwargs):
+                return "test prompt"
+        
+        agent = DummyAgent(
+            planner=ChainingPlanner(),
+            answer_data_type=None,
+            tool_manager=MockToolManager()
+        )
+        runner = Runner(agent=agent)
+        
+        result = runner.forward(prompt_kwargs={"input_str": "test"})
+        
+        # Should return OutputParameter
+        from adalflow.optim.parameter import OutputParameter
+        self.assertIsInstance(result, OutputParameter)
+        
+        # Should have gradient function set up
+        self.assertIsNotNone(result.grad_fn)
+        self.assertTrue(result.requires_opt)
+
+    def test_forward_error_handling(self):
+        """Test forward method handles errors gracefully."""
+        from adalflow.optim.parameter import Parameter
+        
+        class ErrorPlanner:
+            def __init__(self):
+                self.training = False
+                
+            def forward(self, *, prompt_kwargs, model_kwargs=None, id=None):
+                raise ValueError("Planning error")
+                
+            def get_prompt(self, **kwargs):
+                return "test prompt"
+        
+        agent = DummyAgent(
+            planner=ErrorPlanner(),
+            answer_data_type=None,
+            tool_manager=MockToolManager()
+        )
+        runner = Runner(agent=agent)
+        
+        result = runner.forward(prompt_kwargs={"input_str": "test"})
+        
+        # Should return OutputParameter with error in RunnerResult
+        from adalflow.optim.parameter import OutputParameter
+        self.assertIsInstance(result, OutputParameter)
+        self.assertIsInstance(result.data, RunnerResult)
+        self.assertIn("Error in step 0", result.data.answer)
+        self.assertIsNotNone(result.data.error)
+
+    def test_forward_max_steps_reached(self):
+        """Test forward method when max steps are reached without final answer."""
+        from adalflow.optim.parameter import Parameter
+        
+        fn = DummyFunction(name="continue", _is_answer_final=False)
+        
+        class ContinuousPlanner:
+            def __init__(self):
+                self.training = False
+                self.call_count = 0
+                
+            def forward(self, *, prompt_kwargs, model_kwargs=None, id=None):
+                self.call_count += 1
+                param = Parameter(
+                    name=f"step_{self.call_count}",
+                    data=GeneratorOutput(data=fn),
+                    requires_opt=True,
+                )
+                return param
+                
+            def get_prompt(self, **kwargs):
+                return "test prompt"
+        
+        agent = DummyAgent(
+            planner=ContinuousPlanner(),
+            answer_data_type=None,
+            tool_manager=MockToolManager(),
+            max_steps=2
+        )
+        runner = Runner(agent=agent)
+        
+        result = runner.forward(prompt_kwargs={"input_str": "test"})
+        
+        # Should return OutputParameter with incomplete message
+        from adalflow.optim.parameter import OutputParameter
+        self.assertIsInstance(result, OutputParameter)
+        self.assertIsInstance(result.data, RunnerResult)
+        self.assertIn("Max steps (2) reached", result.data.answer)
+        self.assertEqual(len(runner.step_history), 2)
+
+    def test_forward_training_mode_flag(self):
+        """Test that forward method sets training mode on planner."""
+        from adalflow.optim.parameter import Parameter
+        
+        fn = DummyFunction(name="answer_output", _is_answer_final=True, _answer="training_test")
+        
+        class TrainingAwarePlanner:
+            def __init__(self):
+                self.training = False
+                
+            def forward(self, *, prompt_kwargs, model_kwargs=None, id=None):
+                # Should be called in training mode
+                assert self.training == True, "Planner should be in training mode"
+                param = Parameter(
+                    name="planner_output",
+                    data=GeneratorOutput(data=fn),
+                    requires_opt=True,
+                )
+                return param
+                
+            def get_prompt(self, **kwargs):
+                return "test prompt"
+        
+        agent = DummyAgent(
+            planner=TrainingAwarePlanner(),
+            answer_data_type=None,
+            tool_manager=MockToolManager()
+        )
+        runner = Runner(agent=agent)
+        
+        # This should set planner.training = True and not raise assertion error
+        result = runner.forward(prompt_kwargs={"input_str": "test"})
+        
+        from adalflow.optim.parameter import OutputParameter
+        self.assertIsInstance(result, OutputParameter)
+
+    def test_forward_with_conversation_memory(self):
+        """Test forward method with conversation memory."""
+        from adalflow.optim.parameter import Parameter
+        from adalflow.components.memory.memory import ConversationMemory
+        
+        fn = DummyFunction(name="answer_output", _is_answer_final=True, _answer="memory_test")
+        captured_prompt_kwargs = []
+        
+        class MemoryPlanner:
+            def __init__(self):
+                self.training = False
+                
+            def forward(self, *, prompt_kwargs, model_kwargs=None, id=None):
+                captured_prompt_kwargs.append(prompt_kwargs.copy())
+                param = Parameter(
+                    name="planner_output",
+                    data=GeneratorOutput(data=fn),
+                    requires_opt=True,
+                )
+                return param
+                
+            def get_prompt(self, **kwargs):
+                return "test prompt"
+        
+        memory = ConversationMemory()
+        agent = DummyAgent(
+            planner=MemoryPlanner(),
+            answer_data_type=None,
+            tool_manager=MockToolManager()
+        )
+        runner = Runner(agent=agent, conversation_memory=memory)
+        
+        result = runner.forward(prompt_kwargs={"input_str": "Hello"})
+        
+        # Should have chat_history_str in prompt_kwargs
+        self.assertEqual(len(captured_prompt_kwargs), 1)
+        self.assertIn("chat_history_str", captured_prompt_kwargs[0])
+        self.assertEqual(captured_prompt_kwargs[0]["chat_history_str"], "")  # Empty on first call
+
+    def test_forward_predecessor_chaining(self):
+        """Test that forward method properly chains predecessors across steps."""
+        from adalflow.optim.parameter import Parameter
+        
+        fn1 = DummyFunction(name="search", _is_answer_final=False)
+        fn2 = DummyFunction(name="answer_output", _is_answer_final=True, _answer="predecessor_test")
+        
+        class PredecessorPlanner:
+            def __init__(self):
+                self.training = False
+                self.call_count = 0
+                
+            def forward(self, *, prompt_kwargs, model_kwargs=None, id=None):
+                self.call_count += 1
+                if self.call_count == 1:
+                    data = GeneratorOutput(data=fn1)
+                else:
+                    data = GeneratorOutput(data=fn2)
+                
+                param = Parameter(
+                    name=f"step_{self.call_count}",
+                    data=data,
+                    requires_opt=True,
+                )
+                return param
+                
+            def get_prompt(self, **kwargs):
+                return "test prompt"
+        
+        agent = DummyAgent(
+            planner=PredecessorPlanner(),
+            answer_data_type=None,
+            tool_manager=MockToolManager()
+        )
+        runner = Runner(agent=agent)
+        
+        result = runner.forward(prompt_kwargs={"input_str": "test"})
+        
+        # The final result should have predecessors set up
+        from adalflow.optim.parameter import OutputParameter
+        self.assertIsInstance(result, OutputParameter)
+        self.assertTrue(len(result.predecessors) > 0)  # Should have predecessor
+
+
 class TestRunnerBugFixes(unittest.TestCase):
     """Tests for specific bugs that were found and fixed in the runner implementation."""
 
diff --git a/adalflow/tests/test_runner_trainer.py b/adalflow/tests/test_runner_trainer.py
new file mode 100644
index 000000000..75f3381a0
--- /dev/null
+++ b/adalflow/tests/test_runner_trainer.py
@@ -0,0 +1,650 @@
+import unittest
+import unittest.mock
+from unittest.mock import Mock, MagicMock, patch
+from types import SimpleNamespace
+from typing import Dict, Any, Tuple, Callable
+
+import adalflow as adal
+from adalflow.components.agent.runner_trainer import (
+    RunnerTrainer,
+    load_datasets,
+    default_eval_fn,
+    default_loss_fn_factory,
+)
+from adalflow.datasets.types import GSM8KData as Example
+from adalflow.core.types import RunnerResult
+from adalflow.optim.parameter import Parameter
+from adalflow.components.agent.react import ReActAgent
+
+
+class TestRunnerTrainer(unittest.TestCase):
+    """Tests for the RunnerTrainer class."""
+
+    def setUp(self):
+        """Set up test fixtures."""
+        # Create mock runner
+        self.mock_runner = Mock()
+        self.mock_runner.__class__ = adal.Component
+        
+        # Create test sample
+        self.test_sample = Example(
+            id="test_1",
+            question="What is 2 + 2?",
+            answer="4"
+        )
+
+    def test_init_with_defaults(self):
+        """Test RunnerTrainer initialization with default parameters."""
+        trainer = RunnerTrainer(runner=self.mock_runner)
+        
+        # Check that defaults are set
+        self.assertEqual(trainer.task, self.mock_runner)
+        self.assertIsNotNone(trainer.eval_fn)
+        self.assertIsNotNone(trainer.loss_fn)
+        self.assertIsNone(trainer.backward_engine_model_config)
+        self.assertIsNone(trainer.teacher_model_config)
+        self.assertIsNone(trainer.text_optimizer_model_config)
+        self.assertFalse(trainer.original_react_agent)
+
+    def test_init_with_custom_parameters(self):
+        """Test RunnerTrainer initialization with custom parameters."""
+        custom_eval_fn = lambda y, y_gt: 0.8
+        custom_loss_fn = Mock()
+        backward_config = {"model": "test"}
+        teacher_config = {"teacher": "test"}
+        text_optimizer_config = {"optimizer": "test"}
+        
+        trainer = RunnerTrainer(
+            runner=self.mock_runner,
+            eval_fn=custom_eval_fn,
+            loss_fn=custom_loss_fn,
+            backward_engine_model_config=backward_config,
+            teacher_model_config=teacher_config,
+            text_optimizer_model_config=text_optimizer_config,
+            original_react_agent=True
+        )
+        
+        self.assertEqual(trainer.eval_fn, custom_eval_fn)
+        self.assertEqual(trainer.loss_fn, custom_loss_fn)
+        self.assertEqual(trainer.backward_engine_model_config, backward_config)
+        self.assertEqual(trainer.teacher_model_config, teacher_config)
+        self.assertEqual(trainer.text_optimizer_model_config, text_optimizer_config)
+        self.assertTrue(trainer.original_react_agent)
+
+    def test_prepare_task_standard_runner(self):
+        """Test prepare_task method for standard Runner."""
+        trainer = RunnerTrainer(runner=self.mock_runner)
+        
+        task_fn, kwargs = trainer.prepare_task(self.test_sample)
+        
+        self.assertEqual(task_fn, self.mock_runner.__call__)
+        self.assertIn("prompt_kwargs", kwargs)
+        self.assertEqual(kwargs["prompt_kwargs"]["input_str"], "What is 2 + 2?")
+        self.assertEqual(kwargs["id"], "test_1")
+
+    def test_prepare_task_original_react_agent(self):
+        """Test prepare_task method for original ReActAgent."""
+        trainer = RunnerTrainer(runner=self.mock_runner, original_react_agent=True)
+        
+        task_fn, kwargs = trainer.prepare_task(self.test_sample)
+        
+        self.assertEqual(task_fn, self.mock_runner.__call__)
+        self.assertIn("input", kwargs)
+        self.assertEqual(kwargs["input"], "What is 2 + 2?")
+        self.assertEqual(kwargs["id"], "test_1")
+
+    def test_prepare_eval_standard_runner(self):
+        """Test prepare_eval method for standard Runner."""
+        trainer = RunnerTrainer(runner=self.mock_runner)
+        
+        # Create mock RunnerResult
+        mock_result = RunnerResult(answer="4", step_history=[])
+        
+        eval_fn, kwargs = trainer.prepare_eval(self.test_sample, mock_result)
+        
+        self.assertEqual(eval_fn, trainer.eval_fn)
+        self.assertEqual(kwargs["y"], "4")
+        self.assertEqual(kwargs["y_gt"], "4")
+
+    def test_prepare_eval_original_react_agent(self):
+        """Test prepare_eval method for original ReActAgent."""
+        trainer = RunnerTrainer(runner=self.mock_runner, original_react_agent=True)
+        
+        prediction = "4"
+        
+        eval_fn, kwargs = trainer.prepare_eval(self.test_sample, prediction)
+        
+        self.assertEqual(eval_fn, trainer.eval_fn)
+        self.assertEqual(kwargs["y"], "4")
+        self.assertEqual(kwargs["y_gt"], "4")
+
+    def test_prepare_eval_none_result(self):
+        """Test prepare_eval method with None result."""
+        trainer = RunnerTrainer(runner=self.mock_runner)
+        
+        eval_fn, kwargs = trainer.prepare_eval(self.test_sample, None)
+        
+        self.assertEqual(eval_fn, trainer.eval_fn)
+        self.assertEqual(kwargs["y"], "")
+        self.assertEqual(kwargs["y_gt"], "4")
+
+    def test_prepare_loss_standard_runner(self):
+        """Test prepare_loss method for standard Runner."""
+        trainer = RunnerTrainer(runner=self.mock_runner)
+        
+        # Create mock prediction parameter with RunnerResult
+        mock_result = RunnerResult(answer="4", step_history=[])
+        mock_pred = Parameter(
+            name="pred",
+            data=mock_result,
+            requires_opt=True
+        )
+        
+        loss_fn, kwargs = trainer.prepare_loss(self.test_sample, mock_pred)
+        
+        self.assertEqual(loss_fn, trainer.loss_fn)
+        self.assertIn("kwargs", kwargs)
+        self.assertIn("y", kwargs["kwargs"])
+        self.assertIn("y_gt", kwargs["kwargs"])
+        self.assertEqual(kwargs["id"], "test_1")
+        
+        # Check that pred.eval_input is set correctly
+        self.assertEqual(mock_pred.eval_input, "4")
+        
+        # Check y_gt parameter
+        y_gt = kwargs["kwargs"]["y_gt"]
+        self.assertIsInstance(y_gt, Parameter)
+        self.assertEqual(y_gt.data, "4")
+        self.assertFalse(y_gt.requires_opt)
+
+    def test_prepare_loss_original_react_agent(self):
+        """Test prepare_loss method for original ReActAgent."""
+        trainer = RunnerTrainer(runner=self.mock_runner, original_react_agent=True)
+        
+        # Create mock prediction parameter with string data
+        mock_pred = Parameter(
+            name="pred",
+            data="4",
+            requires_opt=True
+        )
+        
+        loss_fn, kwargs = trainer.prepare_loss(self.test_sample, mock_pred)
+        
+        self.assertEqual(loss_fn, trainer.loss_fn)
+        
+        # Check that pred.eval_input is set directly to data
+        self.assertEqual(mock_pred.eval_input, "4")
+
+    @patch('builtins.print')  # Mock print to avoid output during tests
+    def test_prepare_loss_prints_pred(self, mock_print):
+        """Test that prepare_loss prints the prediction parameter."""
+        trainer = RunnerTrainer(runner=self.mock_runner)
+        
+        mock_result = RunnerResult(answer="4", step_history=[])
+        mock_pred = Parameter(
+            name="pred",
+            data=mock_result,
+            requires_opt=True
+        )
+        
+        trainer.prepare_loss(self.test_sample, mock_pred)
+        
+        # Verify print was called with the prediction
+        mock_print.assert_called_once_with("pred", mock_pred)
+
+    def test_inheritance_from_adal_component(self):
+        """Test that RunnerTrainer properly inherits from AdalComponent."""
+        trainer = RunnerTrainer(runner=self.mock_runner)
+        
+        self.assertIsInstance(trainer, adal.AdalComponent)
+        self.assertTrue(hasattr(trainer, 'task'))
+        self.assertTrue(hasattr(trainer, 'eval_fn'))
+        self.assertTrue(hasattr(trainer, 'loss_fn'))
+
+
+class TestRunnerTrainerHelperFunctions(unittest.TestCase):
+    """Tests for helper functions in runner_trainer module."""
+
+    def test_default_eval_fn_exact_match(self):
+        """Test default_eval_fn with exact matches."""
+        result = default_eval_fn("hello", "hello")
+        self.assertEqual(result, 1.0)
+        
+        result = default_eval_fn("  hello  ", "hello")
+        self.assertEqual(result, 1.0)  # Should strip whitespace
+        
+        result = default_eval_fn(42, "42")
+        self.assertEqual(result, 1.0)  # Should convert to string
+
+    def test_default_eval_fn_no_match(self):
+        """Test default_eval_fn with non-matches."""
+        result = default_eval_fn("hello", "world")
+        self.assertEqual(result, 0.0)
+        
+        result = default_eval_fn("42", "43")
+        self.assertEqual(result, 0.0)
+
+    def test_default_loss_fn_factory_with_default_eval(self):
+        """Test default_loss_fn_factory with default eval function."""
+        loss_fn = default_loss_fn_factory()
+        
+        self.assertIsInstance(loss_fn, adal.EvalFnToTextLoss)
+        # Check that it has the expected description
+        self.assertIn("Default evaluation", loss_fn.eval_fn_desc)
+
+    def test_default_loss_fn_factory_with_custom_eval(self):
+        """Test default_loss_fn_factory with custom eval function."""
+        custom_eval = lambda y, y_gt: 0.5
+        loss_fn = default_loss_fn_factory(custom_eval)
+        
+        self.assertIsInstance(loss_fn, adal.EvalFnToTextLoss)
+        # The eval_fn should be wrapped in the loss function
+        self.assertIn("Default evaluation", loss_fn.eval_fn_desc)
+
+    @patch('adalflow.components.agent.runner_trainer.GSM8K')
+    def test_load_datasets(self, mock_gsm8k_class):
+        """Test load_datasets function."""
+        # Mock the GSM8K constructor
+        mock_train = Mock()
+        mock_val = Mock() 
+        mock_test = Mock()
+        
+        def mock_gsm8k_side_effect(split, size):
+            if split == "train":
+                return mock_train
+            elif split == "val":
+                return mock_val
+            elif split == "test":
+                return mock_test
+                
+        mock_gsm8k_class.side_effect = mock_gsm8k_side_effect
+        
+        train_data, val_data, test_data = load_datasets()
+        
+        # Check that GSM8K was called with correct parameters
+        expected_calls = [
+            unittest.mock.call(split="train", size=100),
+            unittest.mock.call(split="val", size=50),
+            unittest.mock.call(split="test", size=100)
+        ]
+        mock_gsm8k_class.assert_has_calls(expected_calls)
+        
+        # Check return values
+        self.assertEqual(train_data, mock_train)
+        self.assertEqual(val_data, mock_val)
+        self.assertEqual(test_data, mock_test)
+
+
+class TestRunnerTrainerIntegration(unittest.TestCase):
+    """Integration tests for RunnerTrainer with mocked dependencies."""
+
+    def setUp(self):
+        """Set up integration test fixtures."""
+        self.mock_runner = Mock(spec=adal.Component)
+        self.test_sample = Example(
+            id="integration_test",
+            question="What is the capital of France?",
+            answer="Paris"
+        )
+
+    def test_full_workflow_standard_runner(self):
+        """Test complete workflow with standard Runner."""
+        # Create trainer
+        trainer = RunnerTrainer(runner=self.mock_runner)
+        
+        # Test prepare_task
+        task_fn, task_kwargs = trainer.prepare_task(self.test_sample)
+        self.assertEqual(task_fn, self.mock_runner.__call__)
+        
+        # Simulate runner execution
+        mock_result = RunnerResult(answer="Paris", step_history=[])
+        
+        # Test prepare_eval
+        eval_fn, eval_kwargs = trainer.prepare_eval(self.test_sample, mock_result)
+        
+        # Execute evaluation
+        eval_score = eval_fn(**eval_kwargs)
+        self.assertEqual(eval_score, 1.0)  # Should match exactly
+        
+        # Test prepare_loss with Parameter
+        mock_pred = Parameter(
+            name="prediction",
+            data=mock_result,
+            requires_opt=True
+        )
+        
+        loss_fn, loss_kwargs = trainer.prepare_loss(self.test_sample, mock_pred)
+        
+        # Verify loss preparation
+        self.assertIn("kwargs", loss_kwargs)
+        self.assertEqual(loss_kwargs["id"], "integration_test")
+
+    def test_full_workflow_original_react_agent(self):
+        """Test complete workflow with original ReActAgent."""
+        # Create trainer for original ReActAgent
+        trainer = RunnerTrainer(runner=self.mock_runner, original_react_agent=True)
+        
+        # Test prepare_task
+        task_fn, task_kwargs = trainer.prepare_task(self.test_sample)
+        self.assertIn("input", task_kwargs)
+        self.assertEqual(task_kwargs["input"], "What is the capital of France?")
+        
+        # Test prepare_eval with string prediction
+        prediction = "Paris"
+        eval_fn, eval_kwargs = trainer.prepare_eval(self.test_sample, prediction)
+        
+        # Execute evaluation
+        eval_score = eval_fn(**eval_kwargs)
+        self.assertEqual(eval_score, 1.0)
+        
+        # Test prepare_loss
+        mock_pred = Parameter(
+            name="prediction",
+            data="Paris",
+            requires_opt=True
+        )
+        
+        loss_fn, loss_kwargs = trainer.prepare_loss(self.test_sample, mock_pred)
+        
+        # Verify eval_input is set directly to data
+        self.assertEqual(mock_pred.eval_input, "Paris")
+
+    def test_error_handling_in_eval(self):
+        """Test error handling in evaluation methods."""
+        # Create trainer with faulty eval function
+        def faulty_eval(y, y_gt):
+            raise ValueError("Evaluation error")
+            
+        trainer = RunnerTrainer(runner=self.mock_runner, eval_fn=faulty_eval)
+        
+        mock_result = RunnerResult(answer="Test", step_history=[])
+        eval_fn, eval_kwargs = trainer.prepare_eval(self.test_sample, mock_result)
+        
+        # Should propagate the error
+        with self.assertRaises(ValueError):
+            eval_fn(**eval_kwargs)
+
+    def test_custom_configurations(self):
+        """Test that custom configurations are preserved."""
+        backward_config = {"model_client": "test_backward"}
+        teacher_config = {"model_client": "test_teacher"}
+        text_optimizer_config = {"model_client": "test_optimizer"}
+        
+        trainer = RunnerTrainer(
+            runner=self.mock_runner,
+            backward_engine_model_config=backward_config,
+            teacher_model_config=teacher_config,
+            text_optimizer_model_config=text_optimizer_config
+        )
+        
+        self.assertEqual(trainer.backward_engine_model_config, backward_config)
+        self.assertEqual(trainer.teacher_model_config, teacher_config)
+        self.assertEqual(trainer.text_optimizer_model_config, text_optimizer_config)
+
+
+class TestRunnerTrainerEdgeCases(unittest.TestCase):
+    """Tests for edge cases and error conditions."""
+
+    def setUp(self):
+        """Set up edge case test fixtures."""
+        self.mock_runner = Mock(spec=adal.Component)
+        
+    def test_empty_answer(self):
+        """Test handling of empty answers."""
+        trainer = RunnerTrainer(runner=self.mock_runner)
+        
+        empty_sample = Example(
+            id="empty_test",
+            question="Test question?",
+            answer=""
+        )
+        
+        # Test with empty RunnerResult answer
+        mock_result = RunnerResult(answer="", step_history=[])
+        eval_fn, eval_kwargs = trainer.prepare_eval(empty_sample, mock_result)
+        
+        # Should match empty string
+        result = eval_fn(**eval_kwargs)
+        self.assertEqual(result, 1.0)
+
+    def test_none_runner_result_answer(self):
+        """Test handling of RunnerResult with None answer."""
+        trainer = RunnerTrainer(runner=self.mock_runner)
+        
+        test_sample = Example(
+            id="none_test",
+            question="Test question?",
+            answer="expected"
+        )
+        
+        # Test with RunnerResult having None answer
+        mock_result = RunnerResult(answer=None, step_history=[])
+        eval_fn, eval_kwargs = trainer.prepare_eval(test_sample, mock_result)
+        
+        # Should use empty string for None answer
+        self.assertEqual(eval_kwargs["y"], "")
+
+    def test_special_characters_in_answers(self):
+        """Test handling of special characters in answers."""
+        trainer = RunnerTrainer(runner=self.mock_runner)
+        
+        special_sample = Example(
+            id="special_test",
+            question="What is the result?",
+            answer="$100.50"
+        )
+        
+        mock_result = RunnerResult(answer="$100.50", step_history=[])
+        eval_fn, eval_kwargs = trainer.prepare_eval(special_sample, mock_result)
+        
+        result = eval_fn(**eval_kwargs)
+        self.assertEqual(result, 1.0)
+
+    def test_whitespace_normalization(self):
+        """Test that whitespace is properly normalized in evaluation."""
+        trainer = RunnerTrainer(runner=self.mock_runner)
+        
+        test_sample = Example(
+            id="whitespace_test",
+            question="Test?",
+            answer="answer"
+        )
+        
+        # Test with extra whitespace
+        mock_result = RunnerResult(answer="  answer  ", step_history=[])
+        eval_fn, eval_kwargs = trainer.prepare_eval(test_sample, mock_result)
+        
+        result = eval_fn(**eval_kwargs)
+        self.assertEqual(result, 1.0)  # Should match after stripping
+
+
+class TestRunnerTrainerNewRunnerFeatures(unittest.TestCase):
+    """Tests for RunnerTrainer compatibility with new Runner features."""
+
+    def setUp(self):
+        """Set up test fixtures for new Runner features."""
+        self.mock_runner = Mock(spec=adal.Component)
+        self.test_sample = Example(
+            id="new_feature_test",
+            question="Test with new runner?",
+            answer="New runner works"
+        )
+
+    def test_runner_trainer_with_training_flag(self):
+        """Test RunnerTrainer works with Runner that has training parameter."""
+        # Create a mock runner that simulates training behavior
+        mock_runner = Mock(spec=adal.Component)
+        mock_runner.training = False
+        
+        trainer = RunnerTrainer(runner=mock_runner)
+        
+        # Test that trainer initialization works with training-capable runners
+        self.assertEqual(trainer.task, mock_runner)
+        self.assertIsNotNone(trainer.eval_fn)
+        self.assertIsNotNone(trainer.loss_fn)
+
+    def test_runner_result_with_new_fields(self):
+        """Test handling of RunnerResult with potentially new fields."""
+        trainer = RunnerTrainer(runner=self.mock_runner)
+        
+        # Create RunnerResult with standard fields plus potential new ones
+        mock_result = RunnerResult(
+            answer="test answer",
+            step_history=[],
+            error=None,
+            id="test_id_123"
+        )
+        
+        eval_fn, eval_kwargs = trainer.prepare_eval(self.test_sample, mock_result)
+        
+        # Should handle RunnerResult regardless of additional fields
+        self.assertEqual(eval_fn, trainer.eval_fn)
+        self.assertEqual(eval_kwargs["y"], "test answer")
+        self.assertEqual(eval_kwargs["y_gt"], "New runner works")
+
+    def test_parameter_handling_with_new_runner_output(self):
+        """Test Parameter creation with new Runner output types."""
+        trainer = RunnerTrainer(runner=self.mock_runner)
+        
+        # Test with OutputParameter (which is now potentially returned by new Runner)
+        from adalflow.optim.parameter import OutputParameter
+        
+        # Create a mock RunnerResult
+        mock_result = RunnerResult(answer="output parameter test", step_history=[])
+        
+        # Create an OutputParameter wrapping the result (as new Runner might return)
+        mock_output_param = OutputParameter(
+            name="runner_output",
+            data=mock_result,
+            requires_opt=True
+        )
+        
+        # Test prepare_loss can handle OutputParameter input
+        loss_fn, loss_kwargs = trainer.prepare_loss(self.test_sample, mock_output_param)
+        
+        self.assertEqual(loss_fn, trainer.loss_fn)
+        self.assertIn("kwargs", loss_kwargs)
+        self.assertEqual(loss_kwargs["id"], "new_feature_test")
+        
+        # Check eval_input is set correctly for OutputParameter
+        self.assertEqual(mock_output_param.eval_input, "output parameter test")
+
+    def test_runner_trainer_initialization_with_new_runner_kwargs(self):
+        """Test RunnerTrainer handles any new Runner constructor parameters gracefully."""
+        # Test with various configurations that might be used with new Runner
+        configs = [
+            {"training": True},
+            {"conversation_memory": None},
+            {"permission_manager": None},
+            {"ctx": {"test": "context"}},
+        ]
+        
+        for config in configs:
+            with self.subTest(config=config):
+                # Mock runner should accept any additional parameters
+                mock_runner = Mock(spec=adal.Component)
+                for key, value in config.items():
+                    setattr(mock_runner, key, value)
+                
+                trainer = RunnerTrainer(runner=mock_runner)
+                
+                # Should initialize successfully regardless of runner's additional features
+                self.assertIsNotNone(trainer.task)
+                self.assertIsNotNone(trainer.eval_fn)
+                self.assertIsNotNone(trainer.loss_fn)
+
+    def test_step_history_handling_with_enhanced_runner(self):
+        """Test that step history handling works with potentially enhanced Runner."""
+        trainer = RunnerTrainer(runner=self.mock_runner)
+        
+        # Create mock step history with potentially new StepOutput format
+        mock_step = Mock()
+        mock_step.step = 1
+        mock_step.function = Mock()
+        mock_step.function.name = "test_function"
+        mock_step.observation = "test observation"
+        
+        mock_result = RunnerResult(
+            answer="final answer",
+            step_history=[mock_step],
+            error=None
+        )
+        
+        eval_fn, eval_kwargs = trainer.prepare_eval(self.test_sample, mock_result)
+        
+        # Should extract answer correctly regardless of step history format
+        self.assertEqual(eval_kwargs["y"], "final answer")
+
+    def test_error_handling_with_new_runner_exceptions(self):
+        """Test error handling with potentially new Runner exception types."""
+        trainer = RunnerTrainer(runner=self.mock_runner)
+        
+        # Test with RunnerResult containing error information
+        mock_result = RunnerResult(
+            answer=None,
+            step_history=[],
+            error="New runner error type"
+        )
+        
+        eval_fn, eval_kwargs = trainer.prepare_eval(self.test_sample, mock_result)
+        
+        # Should handle error cases gracefully
+        self.assertEqual(eval_kwargs["y"], "")  # Empty string for None answer
+        self.assertEqual(eval_kwargs["y_gt"], "New runner works")
+
+    def test_context_preservation_with_new_runner(self):
+        """Test that context and metadata are preserved with new Runner features."""
+        trainer = RunnerTrainer(runner=self.mock_runner)
+        
+        # Test prepare_task with additional context that new Runner might use
+        sample_with_metadata = Example(
+            id="context_test",
+            question="Test with context?",
+            answer="Context preserved"
+        )
+        
+        task_fn, kwargs = trainer.prepare_task(sample_with_metadata)
+        
+        # Should preserve all necessary information for new Runner
+        self.assertEqual(task_fn, self.mock_runner.__call__)
+        self.assertIn("prompt_kwargs", kwargs)
+        self.assertEqual(kwargs["prompt_kwargs"]["input_str"], "Test with context?")
+        self.assertEqual(kwargs["id"], "context_test")
+
+    def test_backward_compatibility_with_legacy_runner_features(self):
+        """Test that RunnerTrainer maintains backward compatibility."""
+        trainer = RunnerTrainer(runner=self.mock_runner)
+        
+        # Test with "old style" runner result
+        old_style_result = RunnerResult(answer="legacy answer", step_history=[])
+        
+        eval_fn, eval_kwargs = trainer.prepare_eval(self.test_sample, old_style_result)
+        
+        # Should work exactly as before
+        eval_result = eval_fn(**eval_kwargs)
+        # This should fail since answers don't match, proving evaluation works
+        self.assertEqual(eval_result, 0.0)  # "legacy answer" != "New runner works"
+
+    def test_concurrent_runner_execution_support(self):
+        """Test that RunnerTrainer supports potential concurrent execution features."""
+        trainer = RunnerTrainer(runner=self.mock_runner)
+        
+        # Test multiple samples in sequence (simulating batch processing)
+        samples = [
+            Example(id=f"concurrent_{i}", question=f"Question {i}?", answer=f"Answer {i}")
+            for i in range(3)
+        ]
+        
+        results = []
+        for sample in samples:
+            mock_result = RunnerResult(answer=f"Answer {sample.id.split('_')[1]}", step_history=[])
+            eval_fn, eval_kwargs = trainer.prepare_eval(sample, mock_result)
+            result = eval_fn(**eval_kwargs)
+            results.append(result)
+        
+        # All should evaluate correctly
+        self.assertEqual(results, [1.0, 1.0, 1.0])
+
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file
diff --git a/benchmarks/hotpot_qa/adal_exp/train_agent_rag.py b/benchmarks/hotpot_qa/adal_exp/train_agent_rag.py
index 370ec97e5..9419a1517 100644
--- a/benchmarks/hotpot_qa/adal_exp/train_agent_rag.py
+++ b/benchmarks/hotpot_qa/adal_exp/train_agent_rag.py
@@ -3,6 +3,10 @@
 import adalflow as adal
 from adalflow.eval.answer_match_acc import AnswerMatchAcc
 from adalflow.datasets.types import HotPotQAData
+import sys
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent))
 
 from benchmarks.hotpot_qa.config import load_datasets
 from benchmarks.hotpot_qa.adal_exp.build_multi_hop_rag import AgenticRAG
diff --git a/notebooks/tutorials/adalflow_classification_optimization.ipynb b/notebooks/tutorials/adalflow_classification_optimization.ipynb
index e83980d14..733630545 100644
--- a/notebooks/tutorials/adalflow_classification_optimization.ipynb
+++ b/notebooks/tutorials/adalflow_classification_optimization.ipynb
@@ -904,11 +904,13 @@
    "provenance": []
   },
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "adalflow-project-Y74mvs4e-py3.12",
+   "language": "python",
    "name": "python3"
   },
   "language_info": {
-   "name": "python"
+   "name": "python",
+   "version": "3.12.7"
   }
  },
  "nbformat": 4,
diff --git a/notebooks/tutorials/adalflow_rag_optimization.ipynb b/notebooks/tutorials/adalflow_rag_optimization.ipynb
index c05d9ad47..8cca7fd7b 100644
--- a/notebooks/tutorials/adalflow_rag_optimization.ipynb
+++ b/notebooks/tutorials/adalflow_rag_optimization.ipynb
@@ -497,11 +497,13 @@
    "provenance": []
   },
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "adalflow-project-Y74mvs4e-py3.12",
+   "language": "python",
    "name": "python3"
   },
   "language_info": {
-   "name": "python"
+   "name": "python",
+   "version": "3.12.7"
   }
  },
  "nbformat": 4,
diff --git a/poetry.lock b/poetry.lock
index 308f6d5e3..db6aa3d2b 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 2.1.4 and should not be changed by hand.
+# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand.
 
 [[package]]
 name = "absl-py"
@@ -114,7 +114,7 @@ version = "2.6.1"
 description = "Happy Eyeballs for asyncio"
 optional = false
 python-versions = ">=3.9"
-groups = ["dev"]
+groups = ["main", "dev"]
 files = [
     {file = "aiohappyeyeballs-2.6.1-py3-none-any.whl", hash = "sha256:f349ba8f4b75cb25c99c5c2d84e997e485204d2902a9597802b0371f09331fb8"},
     {file = "aiohappyeyeballs-2.6.1.tar.gz", hash = "sha256:c3f9d0113123803ccadfdf3f0faa505bc78e6a72d1cc4806cbd719826e943558"},
@@ -126,7 +126,7 @@ version = "3.12.15"
 description = "Async http client/server framework (asyncio)"
 optional = false
 python-versions = ">=3.9"
-groups = ["dev"]
+groups = ["main", "dev"]
 files = [
     {file = "aiohttp-3.12.15-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:b6fc902bff74d9b1879ad55f5404153e2b33a82e72a95c89cec5eb6cc9e92fbc"},
     {file = "aiohttp-3.12.15-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:098e92835b8119b54c693f2f88a1dec690e20798ca5f5fe5f0520245253ee0af"},
@@ -250,7 +250,7 @@ version = "1.4.0"
 description = "aiosignal: a list of registered asynchronous callbacks"
 optional = false
 python-versions = ">=3.9"
-groups = ["dev"]
+groups = ["main", "dev"]
 files = [
     {file = "aiosignal-1.4.0-py3-none-any.whl", hash = "sha256:053243f8b92b990551949e63930a839ff0cf0b0ebbe0597b0f3fb19e1a0fe82e"},
     {file = "aiosignal-1.4.0.tar.gz", hash = "sha256:f47eecd9468083c2029cc99945502cb7708b082c232f9aca65da147157b251c7"},
@@ -1324,7 +1324,7 @@ version = "4.0.0"
 description = "HuggingFace community-driven open-source library of datasets"
 optional = false
 python-versions = ">=3.9.0"
-groups = ["dev"]
+groups = ["main", "dev"]
 files = [
     {file = "datasets-4.0.0-py3-none-any.whl", hash = "sha256:7ef95e62025fd122882dbce6cb904c8cd3fbc829de6669a5eb939c77d50e203d"},
     {file = "datasets-4.0.0.tar.gz", hash = "sha256:9657e7140a9050db13443ba21cb5de185af8af944479b00e7ff1e00a61c8dbf1"},
@@ -1426,7 +1426,7 @@ version = "0.3.8"
 description = "serialize all of Python"
 optional = false
 python-versions = ">=3.8"
-groups = ["dev"]
+groups = ["main", "dev"]
 files = [
     {file = "dill-0.3.8-py3-none-any.whl", hash = "sha256:c36ca9ffb54365bdd2f8eb3eff7d2a21237f8452b57ace88b1ac615b7e815bd7"},
     {file = "dill-0.3.8.tar.gz", hash = "sha256:3ebe3c479ad625c4553aca177444d89b486b1d84982eeacded644afc0cf797ca"},
@@ -1735,7 +1735,7 @@ version = "3.18.0"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.9"
-groups = ["dev"]
+groups = ["main", "dev"]
 files = [
     {file = "filelock-3.18.0-py3-none-any.whl", hash = "sha256:c401f4f8377c4464e6db25fff06205fd89bdd83b65eb0488ed1b160f780e21de"},
     {file = "filelock-3.18.0.tar.gz", hash = "sha256:adbc88eabb99d2fec8c9c1b229b171f18afa655400173ddc653d5d01501fb9f2"},
@@ -1875,7 +1875,7 @@ version = "1.7.0"
 description = "A list-like structure which implements collections.abc.MutableSequence"
 optional = false
 python-versions = ">=3.9"
-groups = ["dev"]
+groups = ["main", "dev"]
 files = [
     {file = "frozenlist-1.7.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cc4df77d638aa2ed703b878dd093725b72a824c3c546c076e8fdf276f78ee84a"},
     {file = "frozenlist-1.7.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:716a9973a2cc963160394f701964fe25012600f3d311f60c790400b00e568b61"},
@@ -1989,7 +1989,7 @@ version = "2025.3.0"
 description = "File-system specification"
 optional = false
 python-versions = ">=3.8"
-groups = ["dev"]
+groups = ["main", "dev"]
 files = [
     {file = "fsspec-2025.3.0-py3-none-any.whl", hash = "sha256:efb87af3efa9103f94ca91a7f8cb7a4df91af9f74fc106c9c7ea0efd7277c1b3"},
     {file = "fsspec-2025.3.0.tar.gz", hash = "sha256:a935fd1ea872591f2b5148907d103488fc523295e6c64b835cfad8c3eca44972"},
@@ -2291,7 +2291,7 @@ version = "0.21"
 description = "Simple Python interface for Graphviz"
 optional = false
 python-versions = ">=3.9"
-groups = ["dev"]
+groups = ["main", "dev"]
 files = [
     {file = "graphviz-0.21-py3-none-any.whl", hash = "sha256:54f33de9f4f911d7e84e4191749cac8cc5653f815b06738c54db9a15ab8b1e42"},
     {file = "graphviz-0.21.tar.gz", hash = "sha256:20743e7183be82aaaa8ad6c93f8893c923bd6658a04c32ee115edb3c8a835f78"},
@@ -2528,7 +2528,7 @@ version = "1.1.5"
 description = "Fast transfer of large files with the Hugging Face Hub."
 optional = false
 python-versions = ">=3.8"
-groups = ["dev"]
+groups = ["main", "dev"]
 markers = "platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"arm64\" or platform_machine == \"aarch64\""
 files = [
     {file = "hf_xet-1.1.5-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:f52c2fa3635b8c37c7764d8796dfa72706cc4eded19d638331161e82b0792e23"},
@@ -2643,7 +2643,7 @@ version = "0.34.3"
 description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
 optional = false
 python-versions = ">=3.8.0"
-groups = ["dev"]
+groups = ["main", "dev"]
 files = [
     {file = "huggingface_hub-0.34.3-py3-none-any.whl", hash = "sha256:5444550099e2d86e68b2898b09e85878fbd788fc2957b506c6a79ce060e39492"},
     {file = "huggingface_hub-0.34.3.tar.gz", hash = "sha256:d58130fd5aa7408480681475491c0abd7e835442082fbc3ef4d45b6c39f83853"},
@@ -3104,6 +3104,25 @@ files = [
     {file = "jsonpath_python-1.0.6-py3-none-any.whl", hash = "sha256:1e3b78df579f5efc23565293612decee04214609208a2335884b3ee3f786b575"},
 ]
 
+[[package]]
+name = "jsonpickle"
+version = "4.1.1"
+description = "jsonpickle encodes/decodes any Python object to/from JSON"
+optional = false
+python-versions = ">=3.8"
+groups = ["main"]
+files = [
+    {file = "jsonpickle-4.1.1-py3-none-any.whl", hash = "sha256:bb141da6057898aa2438ff268362b126826c812a1721e31cf08a6e142910dc91"},
+    {file = "jsonpickle-4.1.1.tar.gz", hash = "sha256:f86e18f13e2b96c1c1eede0b7b90095bbb61d99fedc14813c44dc2f361dbbae1"},
+]
+
+[package.extras]
+cov = ["pytest-cov"]
+dev = ["black", "pyupgrade"]
+docs = ["furo", "rst.linker (>=1.9)", "sphinx (>=3.5)"]
+packaging = ["build", "setuptools (>=61.2)", "setuptools_scm[toml] (>=6.0)", "twine"]
+testing = ["PyYAML", "atheris (>=2.3.0,<2.4.0) ; python_version < \"3.12\"", "bson", "ecdsa", "feedparser", "gmpy2", "numpy", "pandas", "pymongo", "pytest (>=6.0,!=8.1.*)", "pytest-benchmark", "pytest-benchmark[histogram]", "pytest-checkdocs (>=1.2.3)", "pytest-enabler (>=1.0.1)", "pytest-ruff (>=0.2.1)", "scikit-learn", "scipy (>=1.9.3) ; python_version > \"3.10\"", "scipy ; python_version <= \"3.10\"", "simplejson", "sqlalchemy", "ujson"]
+
 [[package]]
 name = "jsonpointer"
 version = "3.0.0"
@@ -4214,7 +4233,7 @@ version = "6.6.3"
 description = "multidict implementation"
 optional = false
 python-versions = ">=3.9"
-groups = ["dev"]
+groups = ["main", "dev"]
 files = [
     {file = "multidict-6.6.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a2be5b7b35271f7fff1397204ba6708365e3d773579fe2a30625e16c4b4ce817"},
     {file = "multidict-6.6.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:12f4581d2930840295c461764b9a65732ec01250b46c6b2c510d7ee68872b140"},
@@ -4334,7 +4353,7 @@ version = "0.70.16"
 description = "better multiprocessing and multithreading in Python"
 optional = false
 python-versions = ">=3.8"
-groups = ["dev"]
+groups = ["main", "dev"]
 files = [
     {file = "multiprocess-0.70.16-pp310-pypy310_pp73-macosx_10_13_x86_64.whl", hash = "sha256:476887be10e2f59ff183c006af746cb6f1fd0eadcfd4ef49e605cbe2659920ee"},
     {file = "multiprocess-0.70.16-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:d951bed82c8f73929ac82c61f01a7b5ce8f3e5ef40f5b52553b4f547ce2b08ec"},
@@ -4465,7 +4484,7 @@ version = "3.5"
 description = "Python package for creating and manipulating graphs and networks"
 optional = false
 python-versions = ">=3.11"
-groups = ["dev"]
+groups = ["main", "dev"]
 files = [
     {file = "networkx-3.5-py3-none-any.whl", hash = "sha256:0030d386a9a06dee3565298b4a734b68589749a544acbb6c412dc9e2489ec6ec"},
     {file = "networkx-3.5.tar.gz", hash = "sha256:d4c6f9cf81f52d69230866796b82afbccdec3db7ae4fbd1b65ea750feed50037"},
@@ -5116,7 +5135,7 @@ version = "2.3.1"
 description = "Powerful data structures for data analysis, time series, and statistics"
 optional = false
 python-versions = ">=3.9"
-groups = ["dev"]
+groups = ["main", "dev"]
 files = [
     {file = "pandas-2.3.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:22c2e866f7209ebc3a8f08d75766566aae02bcc91d196935a1d9e59c7b990ac9"},
     {file = "pandas-2.3.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3583d348546201aff730c8c47e49bc159833f971c2899d6097bce68b9112a4f1"},
@@ -5479,7 +5498,7 @@ version = "0.3.2"
 description = "Accelerated property cache"
 optional = false
 python-versions = ">=3.9"
-groups = ["dev"]
+groups = ["main", "dev"]
 files = [
     {file = "propcache-0.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:22d9962a358aedbb7a2e36187ff273adeaab9743373a272976d2e348d08c7770"},
     {file = "propcache-0.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0d0fda578d1dc3f77b6b5a5dce3b9ad69a8250a891760a548df850a5e8da87f3"},
@@ -5678,7 +5697,7 @@ version = "20.0.0"
 description = "Python library for Apache Arrow"
 optional = false
 python-versions = ">=3.9"
-groups = ["dev"]
+groups = ["main", "dev"]
 files = [
     {file = "pyarrow-20.0.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:c7dd06fd7d7b410ca5dc839cc9d485d2bc4ae5240851bcd45d85105cc90a47d7"},
     {file = "pyarrow-20.0.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:d5382de8dc34c943249b01c19110783d0d64b207167c728461add1ecc2db88e4"},
@@ -6079,7 +6098,7 @@ version = "2.9.0.post0"
 description = "Extensions to the standard Python datetime module"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
-groups = ["dev"]
+groups = ["main", "dev"]
 files = [
     {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"},
     {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"},
@@ -6136,12 +6155,29 @@ version = "2025.2"
 description = "World timezone definitions, modern and historical"
 optional = false
 python-versions = "*"
-groups = ["dev"]
+groups = ["main", "dev"]
 files = [
     {file = "pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00"},
     {file = "pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3"},
 ]
 
+[[package]]
+name = "pyvis"
+version = "0.3.2"
+description = "A Python network graph visualization library"
+optional = false
+python-versions = ">3.6"
+groups = ["main"]
+files = [
+    {file = "pyvis-0.3.2-py3-none-any.whl", hash = "sha256:5720c4ca8161dc5d9ab352015723abb7a8bb8fb443edeb07f7a322db34a97555"},
+]
+
+[package.dependencies]
+ipython = ">=5.3.0"
+jinja2 = ">=2.9.6"
+jsonpickle = ">=1.4.1"
+networkx = ">=1.11"
+
 [[package]]
 name = "pywin32"
 version = "311"
@@ -7049,7 +7085,7 @@ version = "1.17.0"
 description = "Python 2 and 3 compatibility utilities"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
-groups = ["dev"]
+groups = ["main", "dev"]
 files = [
     {file = "six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274"},
     {file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"},
@@ -7880,7 +7916,6 @@ files = [
     {file = "typing_extensions-4.14.1-py3-none-any.whl", hash = "sha256:d1e1e3b58374dc93031d6eda2420a48ea44a36c2b4766a4fdeb3710755731d76"},
     {file = "typing_extensions-4.14.1.tar.gz", hash = "sha256:38b39f4aeeab64884ce9f74c94263ef78f3c22467c8724005483154c26648d36"},
 ]
-markers = {main = "python_version == \"3.11\""}
 
 [[package]]
 name = "typing-inspect"
@@ -7919,7 +7954,7 @@ version = "2025.2"
 description = "Provider of IANA time zone data"
 optional = false
 python-versions = ">=2"
-groups = ["dev"]
+groups = ["main", "dev"]
 files = [
     {file = "tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8"},
     {file = "tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9"},
@@ -8220,7 +8255,7 @@ version = "3.5.0"
 description = "Python binding for xxHash"
 optional = false
 python-versions = ">=3.7"
-groups = ["dev"]
+groups = ["main", "dev"]
 files = [
     {file = "xxhash-3.5.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ece616532c499ee9afbb83078b1b952beffef121d989841f7f4b3dc5ac0fd212"},
     {file = "xxhash-3.5.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3171f693dbc2cef6477054a665dc255d996646b4023fe56cb4db80e26f4cc520"},
@@ -8353,7 +8388,7 @@ version = "1.20.1"
 description = "Yet another URL library"
 optional = false
 python-versions = ">=3.9"
-groups = ["dev"]
+groups = ["main", "dev"]
 files = [
     {file = "yarl-1.20.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:6032e6da6abd41e4acda34d75a816012717000fa6839f37124a47fcefc49bec4"},
     {file = "yarl-1.20.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2c7b34d804b8cf9b214f05015c4fee2ebe7ed05cf581e7192c06555c71f4446a"},
@@ -8489,4 +8524,4 @@ type = ["pytest-mypy"]
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.11, <4.0"
-content-hash = "6fcfca26063930dfdb7d9c8aade6b90293abeb0ceb205861bc52a23f0b52188e"
+content-hash = "b7d22133e9fafd9adb0b4c5b65bf36e392c8e975e4fcdee90eeb74288e8971b7"
diff --git a/pyproject.toml b/pyproject.toml
index f90e32269..fe9579a85 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -20,6 +20,9 @@ adalflow = { path = "adalflow", develop = true }
 black = { extras = ["jupyter"], version = "^24.10.0" }
 aioitertools = "^0.12.0"
 asyncstdlib = "^3.13.1"
+datasets = "^4.0.0"
+pyvis = "^0.3.2"
+graphviz = "^0.21"
 
 [tool.poetry.group.dev.dependencies]
 
diff --git a/tutorials/v1_agent_runner/runner_trainable_tutorial.py b/tutorials/v1_agent_runner/runner_trainable_tutorial.py
new file mode 100644
index 000000000..d6660df68
--- /dev/null
+++ b/tutorials/v1_agent_runner/runner_trainable_tutorial.py
@@ -0,0 +1,198 @@
+"""
+Tutorial: Using Runner.forward() Method to Get Trainable Results
+
+This tutorial demonstrates how to use the Runner's forward() method to get 
+trainable outputs that can be used for optimization. The key difference between
+runner.call() and runner.forward() is that forward() returns trainable Parameter 
+objects that maintain gradient information for optimization.
+"""
+
+import adalflow as adal
+from adalflow.core.types import RunnerResult
+from adalflow.optim.parameter import OutputParameter
+from adalflow.utils import setup_env
+
+setup_env()
+
+
+def create_simple_agent():
+    """Create a simple agent for demonstration."""
+    # Configure model client
+    model_config = {
+        "model_client": adal.AnthropicAPIClient(),
+        "model_kwargs": {
+            "model": "claude-3-5-sonnet-20241022",
+            "temperature": 0.0,
+        }
+    }
+    
+    # Create agent with simple tools
+    agent = adal.Agent(
+        name="demo_agent",
+        add_llm_as_fallback=True,
+        max_steps=3,
+        **model_config
+    )
+    
+    return agent
+
+
+def example_1_basic_forward_usage():
+    """Example 1: Basic usage of runner.forward() method."""
+    print("=== Example 1: Basic Forward Usage ===")
+    
+    agent = create_simple_agent()
+    runner = adal.Runner(agent=agent, training=True)  # Important: set training=True
+    
+    # Use forward() instead of call() to get trainable results
+    prompt_kwargs = {
+        "input_str": "What is 2+2? Use the finish action to provide your final answer."
+    }
+    
+    # This returns an OutputParameter with gradient information
+    trainable_result = runner.forward(prompt_kwargs=prompt_kwargs)
+    
+    print(f"Result type: {type(trainable_result)}")
+    print(f"Is trainable (requires_opt): {trainable_result.requires_opt}")
+    print(f"Has gradient function: {trainable_result.grad_fn is not None}")
+    
+    # Access the actual RunnerResult from the parameter
+    runner_result: RunnerResult = trainable_result.data
+    print(f"Final answer: {runner_result.answer}")
+    print(f"Number of steps: {len(runner_result.step_history)}")
+    
+    return trainable_result
+
+
+def example_2_extracting_final_output():
+    """Example 2: How to extract the final output from trainable result."""
+    print("\n=== Example 2: Extracting Final Output ===")
+    
+    agent = create_simple_agent()
+    runner = adal.Runner(agent=agent, training=True)
+    
+    prompt_kwargs = {
+        "input_str": "Calculate 15 * 3 and explain your reasoning. Use finish to provide your final answer."
+    }
+    
+    # Get trainable result
+    trainable_result = runner.forward(prompt_kwargs=prompt_kwargs)
+    
+    # Method 1: Access through .data attribute
+    final_answer = trainable_result.data.answer
+    print(f"Method 1 - Final answer via .data: {final_answer}")
+    
+    # Method 2: Access step history for detailed breakdown
+    step_history = trainable_result.data.step_history
+    print(f"Method 2 - Step history length: {len(step_history)}")
+    for i, step in enumerate(step_history):
+        print(f"  Step {i}: {step.function.name if step.function else 'None'} -> {step.observation}")
+    
+    # Method 3: Check if there were any errors
+    if trainable_result.data.error:
+        print(f"Method 3 - Error occurred: {trainable_result.data.error}")
+    else:
+        print("Method 3 - No errors occurred")
+    
+    return trainable_result
+
+
+def example_3_comparison_call_vs_forward():
+    """Example 3: Comparing runner.call() vs runner.forward()."""
+    print("\n=== Example 3: Call vs Forward Comparison ===")
+    
+    agent = create_simple_agent()
+    runner = adal.Runner(agent=agent)
+    
+    prompt_kwargs = {
+        "input_str": "What is the capital of France? Use finish to provide your final answer."
+    }
+    
+    # Using call() - returns RunnerResult directly (not trainable)
+    runner.training = False  # Inference mode
+    call_result = runner(prompt_kwargs=prompt_kwargs)
+    
+    print("Call Result:")
+    print(f"  Type: {type(call_result)}")
+    print(f"  Answer: {call_result.answer}")
+    print(f"  Is trainable: No (RunnerResult object)")
+    
+    # Using forward() - returns trainable OutputParameter
+    runner.training = True  # Training mode
+    forward_result = runner.forward(prompt_kwargs=prompt_kwargs)
+    
+    print("\nForward Result:")
+    print(f"  Type: {type(forward_result)}")
+    print(f"  Answer: {forward_result.data.answer}")
+    print(f"  Is trainable: {forward_result.requires_opt}")
+    print(f"  Has predecessors: {len(forward_result.predecessors) > 0}")
+    
+    return call_result, forward_result
+
+def example_4_gradient_tracking():
+    """Example 5: Understanding gradient tracking in trainable results."""
+    print("\n=== Example 4: Gradient Tracking ===")
+    
+    agent = create_simple_agent()
+    runner = adal.Runner(agent=agent, training=True)
+    
+    prompt_kwargs = {
+        "input_str": "Solve: If John has 5 apples and gives away 2, how many does he have left?"
+    }
+    
+    # Get trainable result
+    trainable_result = runner.forward(prompt_kwargs=prompt_kwargs)
+    
+    print("Gradient Information:")
+    print(f"  Has grad_fn: {trainable_result.grad_fn is not None}")
+    print(f"  Requires optimization: {trainable_result.requires_opt}")
+    print(f"  Parameter name: {trainable_result.name}")
+    print(f"  Number of predecessors: {len(trainable_result.predecessors)}")
+    
+    # The grad_fn contains backward context for optimization
+    if trainable_result.grad_fn:
+        backward_ctx = trainable_result.grad_fn
+        print(backward_ctx)
+        print(f"  Backward function available: {backward_ctx.backward_fn is not None}")
+        print(f"  Template available: {backward_ctx.kwargs['template'] is not None}")
+        print(f"  Prompt string length: {len(backward_ctx.kwargs['prompt_str']) if backward_ctx.kwargs['prompt_str'] else 0}")
+    
+    return trainable_result
+
+
+def main():
+    """Run all examples."""
+    print("Tutorial: Runner.forward() for Trainable Results")
+    print("=" * 50)
+    
+    try:
+        # Run examples
+        example_1_basic_forward_usage()
+        example_2_extracting_final_output()
+        example_3_comparison_call_vs_forward()
+        example_4_gradient_tracking()
+        
+        print("\n" + "=" * 50)
+        print("Tutorial completed successfully!")
+        print("\nKey takeaways:")
+        print("1. Use runner.forward() instead of runner.call() for trainable results")
+        print("2. Set training=True when creating Runner for optimization")
+        print("3. Forward() returns OutputParameter with gradient information")
+        print("4. Trainable results can be used with AdalFlow's optimization system")
+        
+    except Exception as e:
+        print(f"Error occurred during tutorial: {e}")
+        print("Make sure you have:")
+        print("1. Set up your API key (ANTHROPIC_API_KEY)")
+        print("2. Installed AdalFlow with all dependencies")
+        return False
+    
+    return True
+
+
+if __name__ == "__main__":
+    # Set up environment (you may need to configure your API key)
+    from adalflow.utils import setup_env
+    setup_env()
+    
+    main()
\ No newline at end of file
diff --git a/use_cases/question_answering/gsm8k/train.py b/use_cases/question_answering/gsm8k/train.py
index 63fc2ef40..cf3b764d8 100644
--- a/use_cases/question_answering/gsm8k/train.py
+++ b/use_cases/question_answering/gsm8k/train.py
@@ -21,8 +21,9 @@ def __init__(
         backward_engine_model_config: Optional[Dict] = None,
         teacher_model_config: Optional[Dict] = None,
         text_optimizer_model_config: Optional[Dict] = None,
+        config: Optional[Dict] = None,
     ):
-        task = GSM8KTask(**gpt_3_model)
+        task = GSM8KTask(**config)
         eval_fn = AnswerMatchAcc(type="exact_match").compute_single_item
         loss_fn = adal.EvalFnToTextLoss(
             eval_fn=eval_fn,
@@ -66,16 +67,29 @@ def prepare_loss(
 
 def train():
     train_data, val_data, test_data = load_datasets()
-    task = GSM8KTask(**gpt_3_model)
+
+    # Create Anthropic client configuration instead of GPT-3
+    anthropic_config = {
+        "model_client": adal.AnthropicAPIClient(),
+        "model_kwargs": {
+            "model": "claude-3-5-sonnet-20241022",
+            # "max_tokens": 2000,
+            "temperature": 0.0,
+        }
+    }
+
+    task = GSM8KTask(**anthropic_config)
     adal_component = StandardTrain(
         task=task,
-        backward_engine_model_config=gpt_o3_mini_model,
-        text_optimizer_model_config=gpt_o3_mini_model,
+        backward_engine_model_config=anthropic_config,
+        text_optimizer_model_config=anthropic_config,
+        config=anthropic_config,
     )
     trainer = adal.Trainer(
         adaltask=adal_component,
         strategy="random",
-        max_steps=10,
+        # max_steps=10,
+        max_steps=1, 
         text_optimizers_config_kwargs={"max_past_history": 5},
     )
     trainer.fit(
@@ -83,7 +97,8 @@ def train():
         val_dataset=val_data,
         test_dataset=test_data,
         debug=False,
-        resume_from_ckpt="/Users/liyin/.adalflow/ckpt/StandardTrain/random_max_steps_10_57bb8_run_1.json",
+        # resume_from_ckpt=None,
+        # resume_from_ckpt="/Users/jinhakim/.adalflow/ckpt/StandardTrain/random_max_steps_1_e35e9_run_1.json",
     )