|
12 | 12 | # See the License for the specific language governing permissions and |
13 | 13 | # limitations under the License. |
14 | 14 |
|
| 15 | +from __future__ import annotations |
| 16 | + |
15 | 17 | import importlib.util |
16 | 18 | import json |
17 | 19 | import logging |
|
22 | 24 | from typing import Optional |
23 | 25 | import uuid |
24 | 26 |
|
25 | | -from pydantic import BaseModel |
26 | | -from pydantic import Field |
27 | | - |
28 | 27 | from ..agents import Agent |
29 | 28 | from ..artifacts.base_artifact_service import BaseArtifactService |
30 | 29 | from ..evaluation.eval_case import EvalCase |
31 | | -from ..evaluation.eval_case import Invocation |
| 30 | +from ..evaluation.eval_metrics import EvalMetric |
| 31 | +from ..evaluation.eval_metrics import EvalMetricResult |
| 32 | +from ..evaluation.eval_metrics import EvalMetricResultPerInvocation |
| 33 | +from ..evaluation.eval_result import EvalCaseResult |
32 | 34 | from ..evaluation.evaluator import EvalStatus |
33 | 35 | from ..evaluation.evaluator import Evaluator |
34 | 36 | from ..sessions.base_session_service import BaseSessionService |
35 | | -from ..sessions.session import Session |
36 | | -from .utils import common |
37 | 37 |
|
38 | 38 | logger = logging.getLogger("google_adk." + __name__) |
39 | 39 |
|
40 | 40 |
|
41 | | -class EvalMetric(common.BaseModel): |
42 | | - """A metric used to evaluate a particular aspect of an eval case.""" |
43 | | - |
44 | | - metric_name: str |
45 | | - """The name of the metric.""" |
46 | | - |
47 | | - threshold: float |
48 | | - """A threshold value. Each metric decides how to interpret this threshold.""" |
49 | | - |
50 | | - |
51 | | -class EvalMetricResult(EvalMetric): |
52 | | - """The actual computed score/value of a particular EvalMetric.""" |
53 | | - |
54 | | - score: Optional[float] = None |
55 | | - eval_status: EvalStatus |
56 | | - |
57 | | - |
58 | | -class EvalMetricResultPerInvocation(common.BaseModel): |
59 | | - """Eval metric results per invocation.""" |
60 | | - |
61 | | - actual_invocation: Invocation |
62 | | - """The actual invocation, usually obtained by inferencing the agent.""" |
63 | | - |
64 | | - expected_invocation: Invocation |
65 | | - """The expected invocation, usually the reference or golden invocation.""" |
66 | | - |
67 | | - eval_metric_results: list[EvalMetricResult] = [] |
68 | | - """Eval resutls for each applicable metric.""" |
69 | | - |
70 | | - |
71 | | -class EvalCaseResult(common.BaseModel): |
72 | | - """Case-level evaluation results.""" |
73 | | - |
74 | | - eval_set_file: str = Field( |
75 | | - deprecated=True, |
76 | | - description="This field is deprecated, use eval_set_id instead.", |
77 | | - ) |
78 | | - eval_set_id: str = "" |
79 | | - """The eval set id.""" |
80 | | - |
81 | | - eval_id: str = "" |
82 | | - """The eval case id.""" |
83 | | - |
84 | | - final_eval_status: EvalStatus |
85 | | - """Final eval status for this eval case.""" |
86 | | - |
87 | | - eval_metric_results: list[tuple[EvalMetric, EvalMetricResult]] = Field( |
88 | | - deprecated=True, |
89 | | - description=( |
90 | | - "This field is deprecated, use overall_eval_metric_results instead." |
91 | | - ), |
92 | | - ) |
93 | | - |
94 | | - overall_eval_metric_results: list[EvalMetricResult] |
95 | | - """Overall result for each metric for the entire eval case.""" |
96 | | - |
97 | | - eval_metric_result_per_invocation: list[EvalMetricResultPerInvocation] |
98 | | - """Result for each metric on a per invocation basis.""" |
99 | | - |
100 | | - session_id: str |
101 | | - """Session id of the session generated as result of inferencing/scraping stage of the eval.""" |
102 | | - |
103 | | - session_details: Optional[Session] = None |
104 | | - """Session generated as result of inferencing/scraping stage of the eval.""" |
105 | | - |
106 | | - user_id: Optional[str] = None |
107 | | - """User id used during inferencing/scraping stage of the eval.""" |
108 | | - |
109 | | - |
110 | | -class EvalSetResult(common.BaseModel): |
111 | | - eval_set_result_id: str |
112 | | - eval_set_result_name: str |
113 | | - eval_set_id: str |
114 | | - eval_case_results: list[EvalCaseResult] = Field(default_factory=list) |
115 | | - creation_timestamp: float = 0.0 |
116 | | - |
117 | | - |
118 | 41 | MISSING_EVAL_DEPENDENCIES_MESSAGE = ( |
119 | 42 | "Eval module is not installed, please install via `pip install" |
120 | 43 | " google-adk[eval]`." |
@@ -228,8 +151,6 @@ async def run_evals( |
228 | 151 | """ |
229 | 152 | try: |
230 | 153 | from ..evaluation.agent_evaluator import EvaluationGenerator |
231 | | - from ..evaluation.response_evaluator import ResponseEvaluator |
232 | | - from ..evaluation.trajectory_evaluator import TrajectoryEvaluator |
233 | 154 | except ModuleNotFoundError as e: |
234 | 155 | raise ModuleNotFoundError(MISSING_EVAL_DEPENDENCIES_MESSAGE) from e |
235 | 156 |
|
|
0 commit comments