diff --git a/tests/unit/vertexai/genai/test_evals.py b/tests/unit/vertexai/genai/test_evals.py index 2c6442818d..d72b48a2fc 100644 --- a/tests/unit/vertexai/genai/test_evals.py +++ b/tests/unit/vertexai/genai/test_evals.py @@ -3749,7 +3749,7 @@ def test_eval_case_to_agent_data_events_only(self): assert agent_data.agent_config is None assert agent_data.events.event[0].parts[0].text == "intermediate event" - def test_eval_case_to_agent_data_empty_events(self): + def test_eval_case_to_agent_data_empty_event_content(self): intermediate_events = [ vertexai_genai_types.Event( event_id="event1", @@ -3774,8 +3774,86 @@ def test_eval_case_to_agent_data_empty_events(self): ) assert agent_data.agent_config is None - assert agent_data.events is None - assert not agent_data.events_text + assert not agent_data.events.event + + def test_eval_case_to_agent_data_empty_intermediate_events_list(self): + agent_info = vertexai_genai_types.evals.AgentInfo( + name="agent1", + instruction="instruction1", + tool_declarations=[], + ) + + eval_case = vertexai_genai_types.EvalCase( + prompt=genai_types.Content(parts=[genai_types.Part(text="Hello")]), + responses=[ + vertexai_genai_types.ResponseCandidate( + response=genai_types.Content(parts=[genai_types.Part(text="Hi")]) + ) + ], + agent_info=agent_info, + ) + + agent_data = ( + _evals_metric_handlers.PredefinedMetricHandler._eval_case_to_agent_data( + eval_case + ) + ) + + assert not agent_data.events.event + + def test_eval_case_to_agent_data_agent_info_empty_tools(self): + agent_info = vertexai_genai_types.evals.AgentInfo( + name="agent1", + instruction="instruction1", + tool_declarations=[], + ) + eval_case = vertexai_genai_types.EvalCase( + prompt=genai_types.Content(parts=[genai_types.Part(text="Hello")]), + responses=[ + vertexai_genai_types.ResponseCandidate( + response=genai_types.Content(parts=[genai_types.Part(text="Hi")]) + ) + ], + agent_info=agent_info, + intermediate_events=None, + ) + + agent_data = ( + _evals_metric_handlers.PredefinedMetricHandler._eval_case_to_agent_data( + eval_case + ) + ) + + assert agent_data.agent_config.developer_instruction.text == "instruction1" + assert not agent_data.agent_config.tools.tool + + def test_eval_case_to_agent_data_agent_info_empty(self): + intermediate_events = [ + vertexai_genai_types.Event( + event_id="event1", + content=genai_types.Content( + parts=[genai_types.Part(text="intermediate event")] + ), + ) + ] + eval_case = vertexai_genai_types.EvalCase( + prompt=genai_types.Content(parts=[genai_types.Part(text="Hello")]), + responses=[ + vertexai_genai_types.ResponseCandidate( + response=genai_types.Content(parts=[genai_types.Part(text="Hi")]) + ) + ], + agent_info=None, + intermediate_events=intermediate_events, + ) + + agent_data = ( + _evals_metric_handlers.PredefinedMetricHandler._eval_case_to_agent_data( + eval_case + ) + ) + + assert agent_data.agent_config is None @pytest.mark.usefixtures("google_auth_mock") diff --git a/vertexai/_genai/_evals_data_converters.py b/vertexai/_genai/_evals_data_converters.py index 340bf72965..337abaaae8 100644 --- a/vertexai/_genai/_evals_data_converters.py +++ b/vertexai/_genai/_evals_data_converters.py @@ -366,10 +366,6 @@ def convert(self, raw_data: list[dict[str, Any]]) -> types.EvaluationDataset: intermediate_events: Optional[list[types.Event]] = None if intermediate_events_data: - logger.warning( - "intermediate_events attribute is experimental and may change in " - "future versions." - ) if isinstance(intermediate_events_data, list): intermediate_events = [] for event in intermediate_events_data: diff --git a/vertexai/_genai/_evals_metric_handlers.py b/vertexai/_genai/_evals_metric_handlers.py index acbeda2afd..322d3aff71 100644 --- a/vertexai/_genai/_evals_metric_handlers.py +++ b/vertexai/_genai/_evals_metric_handlers.py @@ -854,8 +854,9 @@ def _eval_case_to_agent_data( return None tools = None developer_instruction = None - events = None agent_config = None + tool_declarations = [] + event_contents = [] if eval_case.agent_info: agent_info = eval_case.agent_info @@ -865,7 +866,8 @@ def _eval_case_to_agent_data( ) if agent_info.tool_declarations: tool_declarations = agent_info.tool_declarations - tools = types.evals.Tools(tool=tool_declarations) + tools = types.evals.Tools(tool=tool_declarations) + if tools or developer_instruction: agent_config = types.evals.AgentConfig( tools=tools, @@ -878,19 +880,12 @@ def _eval_case_to_agent_data( for event in eval_case.intermediate_events if event.content ] - if event_contents: - events = types.evals.Events(event=event_contents) + events = types.evals.Events(event=event_contents) - if events: - return types.evals.AgentData( - agent_config=agent_config, - events=events, - ) - else: - return types.evals.AgentData( - agent_config=agent_config, - events_text="", - ) + return types.evals.AgentData( + agent_config=agent_config, + events=events, + ) def _build_request_payload( self, eval_case: types.EvalCase, response_index: int diff --git a/vertexai/_genai/_evals_visualization.py b/vertexai/_genai/_evals_visualization.py index 0436d893ae..f5e19d430a 100644 --- a/vertexai/_genai/_evals_visualization.py +++ b/vertexai/_genai/_evals_visualization.py @@ -280,7 +280,7 @@ def _get_evaluation_html(eval_result_json: str) -> str: // If we have agent info, render as trace if(agentInfo) {{ - let traceHtml = `
🏃agent_run
`; + let traceHtml = `
🤖agent_run
`; eventsArray.forEach(event => {{ if (event.content && event.content.parts && event.content.parts.length > 0) {{ event.content.parts.forEach(part => {{