lastmile-ai · saqadri · Aug 9, 2025 · coderabbitai · Aug 9, 2025 · coderabbitai
diff --git a/src/mcp_agent/workflows/deep_orchestrator/config.py b/src/mcp_agent/workflows/deep_orchestrator/config.py
@@ -31,6 +31,29 @@ class ExecutionConfig(BaseModel):
     enable_filesystem: bool = True
     """Enable filesystem workspace for artifacts"""
 
+    # Efficiency and robustness controls
+    max_plan_verification_attempts: int = 4
+    """Maximum attempts to repair/verify a plan before proceeding"""
+
+    # Knowledge extraction strategy
+    knowledge_extraction_mode: str = "batch"
+    """Either 'per_task' or 'batch' (default) to extract knowledge after a step"""
+
+    knowledge_batch_max_concurrent: int = 3
+    """Max concurrent knowledge extraction tasks when in batch mode"""
+
+    # Token/cost optimization
+    lean_agent_design: bool = False
+    """If true, skip designer LLM call and create minimal agents for tasks"""
+
+    # Adaptive effort scaling based on objective complexity
+    dynamic_effort_scaling: bool = False
+    """If true, adjust execution/context budgets based on objective complexity"""
+
+    # Artifact persistence
+    save_task_outputs_to_artifacts: bool = True
+    """If true, persist each successful task's output into the workspace artifacts"""
+
 
 class ContextConfig(BaseModel):
     """Configuration for context management."""

diff --git a/src/mcp_agent/workflows/deep_orchestrator/knowledge.py b/src/mcp_agent/workflows/deep_orchestrator/knowledge.py
@@ -103,13 +103,24 @@ async def extract_knowledge(
                 else:
                     confidence = 0.8
 
+                # Attach provenance/citation if available from the calling LLM
+                citation = None
+                try:
+                    if hasattr(llm, "get_and_clear_tool_provenance"):
+                        prov = llm.get_and_clear_tool_provenance()
+                        if prov:
+                            citation = {"tools": prov[-3:]}  # last few tool calls
+                except Exception:
+                    citation = None
+
                 knowledge_items.append(
                     KnowledgeItem(
                         key=item.get("key", "Unknown"),
                         value=item.get("value", ""),
                         source=task_result.task_name,
                         confidence=confidence,
                         category=item.get("category", "general"),
+                        citation=citation,
                     )
-                        citation=citation,
-                    )
+                        citation=citation_info,
+                    )
-                        citation=citation,
-                    )
+                        citation=citation_info,
+                    )
                 )
 

diff --git a/src/mcp_agent/workflows/deep_orchestrator/models.py b/src/mcp_agent/workflows/deep_orchestrator/models.py
@@ -47,6 +47,8 @@ class KnowledgeItem:
     timestamp: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
     confidence: float = 1.0
     category: str = "general"
+    # Added citation/provenance
+    citation: Dict[str, Any] | None = None
 
-    # Added citation/provenance
-    citation: Dict[str, Any] | None = None
+     def to_dict(self) -> Dict[str, Any]:
+         """Convert to dictionary representation."""
+         return {
+             "key": self.key,
+             "value": self.value,
+             "source": self.source,
+             "timestamp": self.timestamp.isoformat(),
+             "confidence": self.confidence,
+             "category": self.category,
+            "citation": self.citation,
+         }
-    # Added citation/provenance
-    citation: Dict[str, Any] | None = None
+     def to_dict(self) -> Dict[str, Any]:
+         """Convert to dictionary representation."""
+         return {
+             "key": self.key,
+             "value": self.value,
+             "source": self.source,
+             "timestamp": self.timestamp.isoformat(),
+             "confidence": self.confidence,
+             "category": self.category,
+            "citation": self.citation,
+         }
     def to_dict(self) -> Dict[str, Any]:
         """Convert to dictionary representation."""
@@ -72,6 +74,8 @@ class TaskResult:
     knowledge_extracted: List[KnowledgeItem] = field(default_factory=list)
     duration_seconds: float = 0.0
     retry_count: int = 0
+    # Optional provenance/citation information for outputs
+    citation: Dict[str, Any] | None = None
 
     @property
     def success(self) -> bool:

diff --git a/src/mcp_agent/workflows/deep_orchestrator/orchestrator.py b/src/mcp_agent/workflows/deep_orchestrator/orchestrator.py
@@ -201,6 +201,8 @@ def _initialize_execution_components(self, objective: str):
             context=self.context,
             max_task_retries=self.config.execution.max_task_retries,
             enable_parallel=self.config.execution.enable_parallel,
+            knowledge_extraction_mode=self.config.execution.knowledge_extraction_mode,
+            lean_agent_design=self.config.execution.lean_agent_design,
         )
 
         # Set budget update callback
@@ -290,6 +292,47 @@ async def _execute_workflow(
         self.iteration = 0
         self.replan_count = 0
 
+        # Optional dynamic effort scaling inspired by Anthropic research heuristics
+        if getattr(self.config.execution, "dynamic_effort_scaling", False):
+            # Cheap LLM pass to assess complexity and suggest scaling
+            assessor = Agent(
+                name="EffortAssessor",
+                instruction=(
+                    "Assess objective complexity and recommend iteration/replan/context budgets."
+                ),
+                context=self.context,
+            )
+            llm = self.llm_factory(assessor)
+            try:
+                rec = await llm.generate_structured(  # type: ignore[arg-type]
+                    message=(
+                        f"<assess>Objective: {self.objective}\n"
+                        "Return JSON with keys: max_iterations, max_replans, task_context_budget.</assess>"
+                    ),
+                    response_model=dict,  # Loose schema to keep it cheap
+                    request_params=RequestParams(max_iterations=1, temperature=0.1),
+                )
+                mi = int(
+                    rec.get("max_iterations", self.config.execution.max_iterations)
+                )
+                mr = int(rec.get("max_replans", self.config.execution.max_replans))
+                tcb = int(
+                    rec.get(
+                        "task_context_budget", self.config.context.task_context_budget
+                    )
+                )
+                self.config.execution.max_iterations = max(
+                    self.config.execution.max_iterations, mi
+                )
+                self.config.execution.max_replans = max(
+                    self.config.execution.max_replans, mr
+                )
+                self.config.context.task_context_budget = max(
+                    self.config.context.task_context_budget, tcb
+                )
+            except Exception:
+                pass
+
-        # Optional dynamic effort scaling inspired by Anthropic research heuristics
-        if getattr(self.config.execution, "dynamic_effort_scaling", False):
-            # Cheap LLM pass to assess complexity and suggest scaling
-            assessor = Agent(
-                name="EffortAssessor",
-                instruction=(
-                    "Assess objective complexity and recommend iteration/replan/context budgets."
-                ),
-                context=self.context,
-            )
-            llm = self.llm_factory(assessor)
-            try:
-                rec = await llm.generate_structured(  # type: ignore[arg-type]
-                    message=(
-                        f"<assess>Objective: {self.objective}\n"
-                        "Return JSON with keys: max_iterations, max_replans, task_context_budget.</assess>"
-                    ),
-                    response_model=dict,  # Loose schema to keep it cheap
-                    request_params=RequestParams(max_iterations=1, temperature=0.1),
-                )
-                mi = int(
-                    rec.get("max_iterations", self.config.execution.max_iterations)
-                )
-                mr = int(rec.get("max_replans", self.config.execution.max_replans))
-                tcb = int(
-                    rec.get(
-                        "task_context_budget", self.config.context.task_context_budget
-                    )
-                )
-                self.config.execution.max_iterations = max(
-                    self.config.execution.max_iterations, mi
-                )
-                self.config.execution.max_replans = max(
-                    self.config.execution.max_replans, mr
-                )
-                self.config.context.task_context_budget = max(
-                    self.config.context.task_context_budget, tcb
-                )
-            except Exception:
-                pass
+        # Optional dynamic effort scaling inspired by Anthropic research heuristics
+        if getattr(self.config.execution, "dynamic_effort_scaling", False):
+            # Cheap LLM pass to assess complexity and suggest scaling
+            assessor = Agent(
+                name="EffortAssessor",
+                instruction=(
+                    "Assess objective complexity and recommend iteration/replan/context budgets."
+                ),
+                context=self.context,
+            )
+            llm = self.llm_factory(assessor)
+            try:
+                rp = RequestParams(max_iterations=1, temperature=0.1)
+                # Prefer cheap/fast model if available
+                try:
+                    setattr(
+                        rp,
+                        "modelPreferences",
+                        getattr(self.context, "assessor_model_preferences", None),
+                    )
+                except Exception:
+                    pass
+
+                raw = await llm.generate_str(
+                    message=(
+                        f"<assess>Objective: {self.objective}\n"
+                        "Return JSON with keys: max_iterations, max_replans, task_context_budget.</assess>"
+                    ),
+                    request_params=rp,
+                )
+                import json
+                rec = json.loads(raw) if raw else {}
+
+                mi = int(
+                    rec.get("max_iterations", self.config.execution.max_iterations)
+                )
+                mr = int(rec.get("max_replans", self.config.execution.max_replans))
+                tcb = int(
+                    rec.get(
+                        "task_context_budget",
+                        self.config.context.task_context_budget,
+                    )
+                )
+
+                # Clamp updates to prevent extreme values (tunable caps)
+                cap_iter = getattr(self.config.execution, "max_iterations_cap", 50)
+                cap_replans = getattr(self.config.execution, "max_replans_cap", 10)
+                cap_ctx = getattr(self.config.context, "task_context_budget_cap", 20000)
+
+                self.config.execution.max_iterations = min(
+                    max(self.config.execution.max_iterations, mi), cap_iter
+                )
+                self.config.execution.max_replans = min(
+                    max(self.config.execution.max_replans, mr), cap_replans
+                )
+                self.config.context.task_context_budget = min(
+                    max(self.config.context.task_context_budget, tcb), cap_ctx
+                )
+            except Exception:
+                pass
-        # Optional dynamic effort scaling inspired by Anthropic research heuristics
-        if getattr(self.config.execution, "dynamic_effort_scaling", False):
-            # Cheap LLM pass to assess complexity and suggest scaling
-            assessor = Agent(
-                name="EffortAssessor",
-                instruction=(
-                    "Assess objective complexity and recommend iteration/replan/context budgets."
-                ),
-                context=self.context,
-            )
-            llm = self.llm_factory(assessor)
-            try:
-                rec = await llm.generate_structured(  # type: ignore[arg-type]
-                    message=(
-                        f"<assess>Objective: {self.objective}\n"
-                        "Return JSON with keys: max_iterations, max_replans, task_context_budget.</assess>"
-                    ),
-                    response_model=dict,  # Loose schema to keep it cheap
-                    request_params=RequestParams(max_iterations=1, temperature=0.1),
-                )
-                mi = int(
-                    rec.get("max_iterations", self.config.execution.max_iterations)
-                )
-                mr = int(rec.get("max_replans", self.config.execution.max_replans))
-                tcb = int(
-                    rec.get(
-                        "task_context_budget", self.config.context.task_context_budget
-                    )
-                )
-                self.config.execution.max_iterations = max(
-                    self.config.execution.max_iterations, mi
-                )
-                self.config.execution.max_replans = max(
-                    self.config.execution.max_replans, mr
-                )
-                self.config.context.task_context_budget = max(
-                    self.config.context.task_context_budget, tcb
-                )
-            except Exception:
-                pass
+        # Optional dynamic effort scaling inspired by Anthropic research heuristics
+        if getattr(self.config.execution, "dynamic_effort_scaling", False):
+            # Cheap LLM pass to assess complexity and suggest scaling
+            assessor = Agent(
+                name="EffortAssessor",
+                instruction=(
+                    "Assess objective complexity and recommend iteration/replan/context budgets."
+                ),
+                context=self.context,
+            )
+            llm = self.llm_factory(assessor)
+            try:
+                rp = RequestParams(max_iterations=1, temperature=0.1)
+                # Prefer cheap/fast model if available
+                try:
+                    setattr(
+                        rp,
+                        "modelPreferences",
+                        getattr(self.context, "assessor_model_preferences", None),
+                    )
+                except Exception:
+                    pass
+
+                raw = await llm.generate_str(
+                    message=(
+                        f"<assess>Objective: {self.objective}\n"
+                        "Return JSON with keys: max_iterations, max_replans, task_context_budget.</assess>"
+                    ),
+                    request_params=rp,
+                )
+                import json
+                rec = json.loads(raw) if raw else {}
+
+                mi = int(
+                    rec.get("max_iterations", self.config.execution.max_iterations)
+                )
+                mr = int(rec.get("max_replans", self.config.execution.max_replans))
+                tcb = int(
+                    rec.get(
+                        "task_context_budget",
+                        self.config.context.task_context_budget,
+                    )
+                )
+
+                # Clamp updates to prevent extreme values (tunable caps)
+                cap_iter = getattr(self.config.execution, "max_iterations_cap", 50)
+                cap_replans = getattr(self.config.execution, "max_replans_cap", 10)
+                cap_ctx = getattr(self.config.context, "task_context_budget_cap", 20000)
+
+                self.config.execution.max_iterations = min(
+                    max(self.config.execution.max_iterations, mi), cap_iter
+                )
+                self.config.execution.max_replans = min(
+                    max(self.config.execution.max_replans, mr), cap_replans
+                )
+                self.config.context.task_context_budget = min(
+                    max(self.config.context.task_context_budget, tcb), cap_ctx
+                )
+            except Exception:
+                pass
         # Phase 1: Initial Planning
         span.add_event("phase_1_initial_planning")
         logger.info("Phase 1: Creating initial plan")
@@ -390,6 +433,29 @@ async def _execute_workflow(
                 next_step, request_params, self.executor
             )
 
+            # If configured, extract knowledge in batch post-step to reduce token churn
+            if (
+                self.config.execution.knowledge_extraction_mode == "batch"
+                and self.memory.task_results
+            ):
+                # Gather results from this step only
+                step_task_names = {t.name for t in next_step.tasks}
+                step_results = [
+                    r
+                    for r in self.memory.task_results
+                    if r.task_name in step_task_names
+                ]
+                try:
+                    extracted = await self.knowledge_extractor.extract_batch(
+                        step_results,
+                        self.objective,
+                        max_concurrent=self.config.execution.knowledge_batch_max_concurrent,
+                    )
+                    for item in extracted:
+                        self.memory.add_knowledge(item)
+                except Exception as batch_err:
+                    logger.warning(f"Batch knowledge extraction failed: {batch_err}")
+
             # Complete the step
             self.queue.complete_step(next_step)
 
@@ -442,9 +508,19 @@ async def _create_full_plan(self) -> Plan:
         )
 
         llm = self.llm_factory(planner)
+        # Prefer intelligent model for planning
+        try:
+            rp = RequestParams(max_iterations=2)
+            rp.modelPreferences = getattr(
+                self.context, "planner_model_preferences", None
+            )
+        except Exception:
+            rp = RequestParams(max_iterations=2)
 
         # Try to create a valid plan with retries
-        max_verification_attempts = 10
+        max_verification_attempts = max(
+            1, getattr(self.config.execution, "max_plan_verification_attempts", 4)
+        )
         previous_plan: Plan = None
         previous_errors = None
 
@@ -487,7 +563,9 @@ async def _create_full_plan(self) -> Plan:
             # Get structured plan
             prompt = get_full_plan_prompt(context)
             plan: Plan = await retry_with_backoff(
-                lambda: llm.generate_structured(message=prompt, response_model=Plan),
+                lambda: llm.generate_structured(
+                    message=prompt, response_model=Plan, request_params=rp
+                ),
                 max_attempts=2,
             )
 
@@ -553,6 +631,14 @@ async def _verify_completion(self) -> tuple[bool, float]:
         )
 
         llm = self.llm_factory(verifier)
+        # Prefer capable but cost-aware model
+        rp = RequestParams(max_iterations=1)
+        try:
+            rp.modelPreferences = getattr(
+                self.context, "verifier_model_preferences", None
+            )
+        except Exception:
+            pass
 
         # Build verification context
         context = get_verification_context(
@@ -647,9 +733,14 @@ async def _create_final_synthesis(self) -> List[MessageT]:
         async with synthesizer:
             llm = await synthesizer.attach_llm(self.llm_factory)
 
-            result = await llm.generate(
-                message=prompt, request_params=RequestParams(max_iterations=5)
-            )
+            rp = RequestParams(max_iterations=5)
+            try:
+                rp.modelPreferences = getattr(
+                    self.context, "synthesizer_model_preferences", None
+                )
+            except Exception:
+                pass
+            result = await llm.generate(message=prompt, request_params=rp)
 
             logger.info("Final synthesis completed")
             return result

diff --git a/src/mcp_agent/workflows/deep_orchestrator/prompts.py b/src/mcp_agent/workflows/deep_orchestrator/prompts.py
@@ -67,6 +67,7 @@
   <rule>requires_context_from can ONLY reference tasks from PREVIOUS steps, not the current step</rule>
   <rule>If a task needs output from another task in the same step, move it to a subsequent step</rule>
   <rule>Only set context_window_budget if task needs more than default (10000 tokens)</rule>
+  <rule>Scale effort to query complexity: simple fact-finding = 1 step, few tasks; comparisons = 2-4 parallel tasks; broad surveys = multiple steps with tight division of labor</rule>
 </task_design_rules>
 
 <important_notes>
@@ -75,6 +76,8 @@
   <note>Consider resource constraints and prefer efficient approaches</note>
   <note>Think step by step about the best way to achieve the objective</note>
   <note>Tasks within a step run in parallel, steps run sequentially</note>
+  <note>Prefer asynchronous, loosely-coupled sub-tasks that can progress independently and be synthesized later</note>
+  <note>Favor authoritative sources/tools over SEO spam; encode source-quality selection when planning tasks</note>
 </important_notes>
 
 <example_task_structure>