Merge pull request #55 from amazon-science/feature/prompt_tuner_fixes

SibaRajendran · web-flow · commit 951d5d926910 · 2025-04-16T15:04:22.000+05:30
Bug Fixes for Prompt Tuner
diff --git a/src/fmcore/llm/mixins/llm_mixins.py b/src/fmcore/llm/mixins/llm_mixins.py
@@ -1,5 +1,7 @@
 from typing import Optional, Union
 
+from pydantic import Field
+
 from fmcore.llm.types.llm_types import LLMConfig, DistributedLLMConfig
 from fmcore.types.mixins_types import Mixin
 from fmcore.types.typed import MutableTyped
@@ -13,4 +15,4 @@ class LLMConfigMixin(MutableTyped, Mixin):
         llm_config (Optional[LLMConfig]): The LLM configuration object.
     """
 
-    llm_config: Union[LLMConfig, DistributedLLMConfig]
+    llm_config: Union[LLMConfig, DistributedLLMConfig] = Field(union_mode="left_to_right")
diff --git a/src/fmcore/llm/mixins/provider_mixins.py b/src/fmcore/llm/mixins/provider_mixins.py
@@ -16,8 +16,8 @@ class AWSAccountMixin(MutableTyped, Mixin):
         region (str): The AWS region where the account operates. Defaults to 'us-east-1'.
     """
 
-    role_arn: str
-    region: str = Field(default=AWSRegion.US_EAST_1.value)
+    role_arn: Optional[str] = Field(default=None)
+    region: Optional[str] = Field(default="us-east-1")
 
 
 class APIKeyServiceMixin(MutableTyped, Mixin):
diff --git a/src/fmcore/prompt_tuner/dspy/optimizer_wrapper/miprov2/miprov2_optimizer_types.py b/src/fmcore/prompt_tuner/dspy/optimizer_wrapper/miprov2/miprov2_optimizer_types.py
@@ -1,5 +1,7 @@
 from typing import Optional
 
+from pydantic import Field
+
 from fmcore.prompt_tuner.types.enums.optimizer_enums import OptimizerMetricType, DSPyOptimizerType
 from fmcore.prompt_tuner.types.mixins.optimizer_mixins import (
     StudentConfigMixin,
@@ -22,8 +24,10 @@ class MIPROv2OptimizerParams(BaseOptimizerParams):
     """
 
     optimizer_metric: str = OptimizerMetricType.ACCURACY
-    auto: Optional[str] = "light"
-    num_candidates: int = 7
+    num_candidates: Optional[int] = Field(default=7)
+    max_errors: Optional[int] = Field(default=10)
+    minibatch: Optional[bool] = Field(default=False)
+    auto: Optional[str] = None
 
 
 class MIPROv2OptimizerConfig(
diff --git a/src/fmcore/prompt_tuner/dspy/utils/dspy_utils.py b/src/fmcore/prompt_tuner/dspy/utils/dspy_utils.py
@@ -168,13 +168,8 @@ def evaluate_func(example: dspy.Example, prediction: dspy.Prediction, trace=None
                 "output": prediction.toDict(),
             }
 
-            try:
-                # We are using this hack because dspy doesn't support async
-                decision = AsyncUtils.execute(evaluator.aevaluate(data=row))
-            except Exception as e:
-                # Defaulting to false incase of failures
-                Log.info(f"Error {e} during evaluating {row}")
-                decision = False
+            # We are using this hack because dspy doesn't support async
+            decision = AsyncUtils.execute(evaluator.aevaluate(data=row))
 
             return decision
 
diff --git a/src/fmcore/prompt_tuner/evaluator/llm_as_a_judge_boolean/llm_as_a_judge_boolean_evaluator.py b/src/fmcore/prompt_tuner/evaluator/llm_as_a_judge_boolean/llm_as_a_judge_boolean_evaluator.py
@@ -11,6 +11,7 @@
 from fmcore.mapper.llm_response_json_mapper import LLMResponseJsonMapper
 from fmcore.mapper.criteria_checker_mapper import CriteriaCheckerMapper
 from fmcore.mapper.llm_inference_mapper import LLMInferenceMapper
+from fmcore.utils.logging_utils import Log
 
 
 class LLMAsJudgeBooleanEvaluator(BaseEvaluator[Dict, bool]):
@@ -72,19 +73,38 @@ def _get_instance(cls, *, evaluator_config: EvaluatorConfig) -> "LLMAsJudgeBoole
 
     def evaluate(self, data: Dict) -> bool:
         """
-        Processes the input data by using the llm_as_a_judge_boolean_mapper to evaluate the context.
+        Processes the input data using the llm_as_a_judge_boolean_mapper to evaluate the context.
 
         Args:
             data (BooleanLLMJudgeInput): Input data containing context for evaluation.
 
         Returns:
-            BooleanLLMJudgeOutput: Evaluation result as a boolean decision.
+            bool: Evaluation result as a boolean decision.
         """
-        # Format the context into messages using the template
-        formatted_message: BaseMessage = self.text_prompt_mapper.map(data)
-        llm_response: BaseMessage = self.llm_inference_mapper.map([formatted_message])
-        json_response: Dict = self.json_mapper.map(llm_response.content)
-        decision: bool = self.criteria_checker.map(json_response)
+        formatted_message = llm_response = json_response = decision = None
+
+        try:
+            formatted_message = self.text_prompt_mapper.map(data)
+            llm_response = self.llm_inference_mapper.map([formatted_message])
+            json_response = self.json_mapper.map(llm_response.content)
+            decision = self.criteria_checker.map(json_response)
+
+            if not isinstance(decision, bool):
+                raise ValueError("Decision is not a boolean value")
+
+        except Exception as e:
+            Log.error(
+                "[SYNC EVALUATION ERROR]\t\t ->"
+                f"[INPUT DATA]: {data}\t\t ->"
+                f"[PROMPT]: {self.evaluator_config.evaluator_params.prompt}\t\t ->"
+                f"[FORMATTED MESSAGE]: {formatted_message}\t\t ->"
+                f"[LLM RESPONSE]: {llm_response}\t\t ->"
+                f"[JSON RESPONSE]: {json_response}\t\t ->"
+                f"[DECISION]: {decision}\t\t ->"
+                f"[ERROR]: {e}"
+            )
+            raise
+
         return decision
 
     async def aevaluate(self, data: Dict) -> bool:
@@ -95,11 +115,30 @@ async def aevaluate(self, data: Dict) -> bool:
             data (BooleanLLMJudgeInput): Input data containing context for evaluation.
 
         Returns:
-            BooleanLLMJudgeOutput: Evaluation result as a boolean decision.
+            bool: Evaluation result as a boolean decision.
         """
-        # Format the context into messages using the template
-        formatted_message: BaseMessage = await self.text_prompt_mapper.amap(data)
-        llm_response: BaseMessage = await self.llm_inference_mapper.amap([formatted_message])
-        json_response: Dict = await self.json_mapper.amap(llm_response.content)
-        decision: bool = await self.criteria_checker.amap(json_response)
+        formatted_message = llm_response = json_response = decision = None
+
+        try:
+            formatted_message = await self.text_prompt_mapper.amap(data)
+            llm_response = await self.llm_inference_mapper.amap([formatted_message])
+            json_response = await self.json_mapper.amap(llm_response.content)
+            decision = await self.criteria_checker.amap(json_response)
+
+            if not isinstance(decision, bool):
+                raise ValueError("Decision is not a boolean value")
+
+        except Exception as e:
+            Log.error(
+                "[ASYNC EVALUATION ERROR]\t\t->"
+                f"[INPUT DATA]: {data}\t\t ->"
+                f"[PROMPT]: {self.evaluator_config.evaluator_params.prompt}\t\t ->"
+                f"[FORMATTED MESSAGE]: {formatted_message}\t\t ->"
+                f"[LLM RESPONSE]: {llm_response}\t\t ->"
+                f"[JSON RESPONSE]: {json_response}\t\t ->"
+                f"[DECISION]: {decision}\t\t ->"
+                f"[ERROR]: {e}"
+            )
+            raise
+
         return decision
diff --git a/src/fmcore/prompt_tuner/types/mixins/optimizer_mixins.py b/src/fmcore/prompt_tuner/types/mixins/optimizer_mixins.py
@@ -1,5 +1,7 @@
 from typing import Optional, Union
 
+from pydantic import Field
+
 from fmcore.llm.types.llm_types import LLMConfig, DistributedLLMConfig
 from fmcore.prompt_tuner.evaluator.types.evaluator_types import EvaluatorConfig
 from fmcore.types.mixins_types import Mixin
@@ -14,7 +16,7 @@ class StudentConfigMixin(MutableTyped, Mixin):
         student_config (Optional[LLMConfig]): The LLM configuration object for student model
     """
 
-    student_config: Union[LLMConfig, DistributedLLMConfig]
+    student_config: Union[LLMConfig, DistributedLLMConfig] = Field(union_mode="left_to_right")
 
 
 class TeacherConfigMixin(MutableTyped, Mixin):
@@ -25,7 +27,7 @@ class TeacherConfigMixin(MutableTyped, Mixin):
         teacher_config (Optional[LLMConfig]): The LLM configuration object for teacher model
     """
 
-    teacher_config: Union[LLMConfig, DistributedLLMConfig]
+    teacher_config: Union[LLMConfig, DistributedLLMConfig] = Field(union_mode="left_to_right")
 
 
 class EvaluatorConfigMixin(MutableTyped, Mixin):