Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 13 additions & 4 deletions tests/unit/vertexai/genai/replays/test_create_evaluation_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from google.genai import types as genai_types
import pytest

GCS_DEST = "gs://lakeyk-test-limited/eval_run_output"
GCS_DEST = "gs://lakeyk-limited-bucket/eval_run_output"
UNIVERSAL_AR_METRIC = types.EvaluationRunMetric(
metric="universal_ar_v1",
metric_config=types.UnifiedMetric(
Expand Down Expand Up @@ -51,9 +51,6 @@
# TODO(b/431231205): Re-enable once Unified Metrics are in prod.
# def test_create_eval_run_data_source_evaluation_set(client):
# """Tests that create_evaluation_run() creates a correctly structured EvaluationRun."""
# client._api_client._http_options.base_url = (
# "https://us-central1-autopush-aiplatform.sandbox.googleapis.com/"
# )
# client._api_client._http_options.api_version = "v1beta1"
# tool = genai_types.Tool(
# function_declarations=[
Expand All @@ -80,10 +77,12 @@
# LLM_METRIC
# ],
# agent_info=types.AgentInfo(
# agent="project/123/locations/us-central1/reasoningEngines/456",
# name="agent-1",
# instruction="agent-1 instruction",
# tool_declarations=[tool],
# ),
# labels={"label1": "value1"},
# )
# assert isinstance(evaluation_run, types.EvaluationRun)
# assert evaluation_run.display_name == "test4"
Expand All @@ -108,6 +107,10 @@
# tools=[tool],
# )
# )
# assert evaluation_run.labels == {
# "vertex-ai-evaluation-agent-engine-id": "456",
# "label1": "value1",
# }
# assert evaluation_run.error is None


Expand All @@ -127,6 +130,7 @@ def test_create_eval_run_data_source_bigquery_request_set(client):
},
)
),
labels={"label1": "value1"},
dest=GCS_DEST,
)
assert isinstance(evaluation_run, types.EvaluationRun)
Expand All @@ -150,6 +154,9 @@ def test_create_eval_run_data_source_bigquery_request_set(client):
),
)
assert evaluation_run.inference_configs is None
assert evaluation_run.labels == {
"label1": "value1",
}
assert evaluation_run.error is None


Expand Down Expand Up @@ -289,6 +296,8 @@ async def test_create_eval_run_async(client):
assert evaluation_run.error is None
assert evaluation_run.inference_configs is None
assert evaluation_run.error is None
assert evaluation_run.labels is None
assert evaluation_run.error is None


pytestmark = pytest_helper.setup(
Expand Down
36 changes: 36 additions & 0 deletions vertexai/_genai/evals.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,9 @@ def _CreateEvaluationRunParameters_to_vertex(
if getv(from_object, ["evaluation_config"]) is not None:
setv(to_object, ["evaluationConfig"], getv(from_object, ["evaluation_config"]))

if getv(from_object, ["labels"]) is not None:
setv(to_object, ["labels"], getv(from_object, ["labels"]))

if getv(from_object, ["config"]) is not None:
setv(to_object, ["config"], getv(from_object, ["config"]))

Expand Down Expand Up @@ -236,6 +239,9 @@ def _EvaluationRun_from_vertex(
if getv(from_object, ["inferenceConfigs"]) is not None:
setv(to_object, ["inference_configs"], getv(from_object, ["inferenceConfigs"]))

if getv(from_object, ["labels"]) is not None:
setv(to_object, ["labels"], getv(from_object, ["labels"]))

return to_object


Expand Down Expand Up @@ -464,6 +470,7 @@ def _create_evaluation_run(
display_name: Optional[str] = None,
data_source: types.EvaluationRunDataSourceOrDict,
evaluation_config: types.EvaluationRunConfigOrDict,
labels: Optional[dict[str, str]] = None,
config: Optional[types.CreateEvaluationRunConfigOrDict] = None,
inference_configs: Optional[
dict[str, types.EvaluationRunInferenceConfigOrDict]
Expand All @@ -478,6 +485,7 @@ def _create_evaluation_run(
display_name=display_name,
data_source=data_source,
evaluation_config=evaluation_config,
labels=labels,
config=config,
inference_configs=inference_configs,
)
Expand Down Expand Up @@ -1316,6 +1324,7 @@ def create_evaluation_run(
list[types.EvaluationRunMetricOrDict]
] = None, # TODO: Make required unified metrics available in prod.
agent_info: Optional[types.AgentInfo] = None,
labels: Optional[dict[str, str]] = None,
config: Optional[types.CreateEvaluationRunConfigOrDict] = None,
) -> types.EvaluationRun:
"""Creates an EvaluationRun."""
Expand Down Expand Up @@ -1353,13 +1362,25 @@ def create_evaluation_run(
tools=agent_info.tool_declarations,
)
)
if (
not agent_info.agent
or len(agent_info.agent.split("reasoningEngines/")) != 2
):
raise ValueError(
"agent_info.agent cannot be empty. Please provide a valid reasoning engine resource name in the format of projects/{project}/locations/{location}/reasoningEngines/{reasoning_engine}."
)
labels = labels or {}
labels["vertex-ai-evaluation-agent-engine-id"] = agent_info.agent.split(
"reasoningEngines/"
)[-1]

return self._create_evaluation_run( # type: ignore[no-any-return]
name=name,
display_name=display_name,
data_source=dataset,
evaluation_config=evaluation_config,
inference_configs=inference_configs,
labels=labels,
config=config,
)

Expand Down Expand Up @@ -1566,6 +1587,7 @@ async def _create_evaluation_run(
display_name: Optional[str] = None,
data_source: types.EvaluationRunDataSourceOrDict,
evaluation_config: types.EvaluationRunConfigOrDict,
labels: Optional[dict[str, str]] = None,
config: Optional[types.CreateEvaluationRunConfigOrDict] = None,
inference_configs: Optional[
dict[str, types.EvaluationRunInferenceConfigOrDict]
Expand All @@ -1580,6 +1602,7 @@ async def _create_evaluation_run(
display_name=display_name,
data_source=data_source,
evaluation_config=evaluation_config,
labels=labels,
config=config,
inference_configs=inference_configs,
)
Expand Down Expand Up @@ -2121,6 +2144,7 @@ async def create_evaluation_run(
list[types.EvaluationRunMetricOrDict]
] = None, # TODO: Make required unified metrics available in prod.
agent_info: Optional[types.AgentInfo] = None,
labels: Optional[dict[str, str]] = None,
config: Optional[types.CreateEvaluationRunConfigOrDict] = None,
) -> types.EvaluationRun:
"""Creates an EvaluationRun."""
Expand Down Expand Up @@ -2158,13 +2182,25 @@ async def create_evaluation_run(
tools=agent_info.tool_declarations,
)
)
if (
not agent_info.agent
or len(agent_info.agent.split("reasoningEngines/")) != 2
):
raise ValueError(
"agent_info.agent cannot be empty. Please provide a valid reasoning engine resource name in the format of projects/{project}/locations/{location}/reasoningEngines/{reasoning_engine}."
)
labels = labels or {}
labels["vertex-ai-evaluation-agent-engine-id"] = agent_info.agent.split(
"reasoningEngines/"
)[-1]

result = await self._create_evaluation_run( # type: ignore[no-any-return]
name=name,
display_name=display_name,
data_source=dataset,
evaluation_config=evaluation_config,
inference_configs=inference_configs,
labels=labels,
config=config,
)

Expand Down
17 changes: 17 additions & 0 deletions vertexai/_genai/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -1220,6 +1220,7 @@ class _CreateEvaluationRunParameters(_common.BaseModel):
evaluation_config: Optional[EvaluationRunConfig] = Field(
default=None, description=""""""
)
labels: Optional[dict[str, str]] = Field(default=None, description="""""")
config: Optional[CreateEvaluationRunConfig] = Field(
default=None, description=""""""
)
Expand All @@ -1243,6 +1244,9 @@ class _CreateEvaluationRunParametersDict(TypedDict, total=False):
evaluation_config: Optional[EvaluationRunConfigDict]
""""""

labels: Optional[dict[str, str]]
""""""

config: Optional[CreateEvaluationRunConfigDict]
""""""

Expand Down Expand Up @@ -1482,6 +1486,11 @@ class EventDict(TypedDict, total=False):
class AgentInfo(_common.BaseModel):
"""The agent info of an agent, used for agent eval."""

agent: Optional[str] = Field(
default=None,
description="""The agent engine used to run agent. Agent engine resource name in str type, with format
`projects/{project}/locations/{location}/reasoningEngines/{reasoning_engine_id}`.""",
)
name: Optional[str] = Field(
default=None, description="""Agent name, used as an identifier."""
)
Expand All @@ -1499,6 +1508,10 @@ class AgentInfo(_common.BaseModel):
class AgentInfoDict(TypedDict, total=False):
"""The agent info of an agent, used for agent eval."""

agent: Optional[str]
"""The agent engine used to run agent. Agent engine resource name in str type, with format
`projects/{project}/locations/{location}/reasoningEngines/{reasoning_engine_id}`."""

name: Optional[str]
"""Agent name, used as an identifier."""

Expand Down Expand Up @@ -1919,6 +1932,7 @@ class EvaluationRun(_common.BaseModel):
default=None,
description="""This field is experimental and may change in future versions. The inference configs for the evaluation run.""",
)
labels: Optional[dict[str, str]] = Field(default=None, description="""""")

# TODO(b/448806531): Remove all the overridden _from_response methods once the
# ticket is resolved and published.
Expand Down Expand Up @@ -2003,6 +2017,9 @@ class EvaluationRunDict(TypedDict, total=False):
inference_configs: Optional[dict[str, "EvaluationRunInferenceConfigDict"]]
"""This field is experimental and may change in future versions. The inference configs for the evaluation run."""

labels: Optional[dict[str, str]]
""""""


EvaluationRunOrDict = Union[EvaluationRun, EvaluationRunDict]

Expand Down
Loading