Skip to content

Commit ed8e218

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
feat: Add support for loading evaluation datasets from Observability GCS sources
PiperOrigin-RevId: 792182776
1 parent 0870512 commit ed8e218

File tree

2 files changed

+257
-0
lines changed

2 files changed

+257
-0
lines changed

tests/unit/vertexai/genai/test_evals.py

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3599,3 +3599,169 @@ def test_execute_evaluation_adds_creation_timestamp(
35993599

36003600
assert result.metadata is not None
36013601
assert result.metadata.creation_timestamp == mock_now
3602+
3603+
3604+
class TestEvaluationDataset:
3605+
"""Contains set of tests for the EvaluationDataset class methods."""
3606+
3607+
@mock.patch.object(_evals_utils, "GcsUtils")
3608+
def test_load_from_observability_eval_cases(self, mock_gcs_utils):
3609+
"""Tests that load_from_observability_eval_cases reads data from GCS."""
3610+
3611+
def read_file_contents_side_effect(src: str) -> str:
3612+
if src == "gs://project/input.json":
3613+
return "input"
3614+
elif src == "gs://project/output.json":
3615+
return "output"
3616+
elif src == "gs://project/system_instruction.json":
3617+
return "system_instruction"
3618+
else:
3619+
return ""
3620+
3621+
mock_gcs_utils.return_value.read_file_contents.side_effect = (
3622+
read_file_contents_side_effect
3623+
)
3624+
3625+
eval_cases = [
3626+
vertexai_genai_types.ObservabilityEvalCase(
3627+
input_src="gs://project/input.json",
3628+
output_src="gs://project/output.json",
3629+
system_instruction_src="gs://project/system_instruction.json",
3630+
)
3631+
]
3632+
result = (
3633+
vertexai_genai_types.EvaluationDataset.load_from_observability_eval_cases(
3634+
eval_cases
3635+
)
3636+
)
3637+
3638+
mock_gcs_utils.return_value.read_file_contents.assert_has_calls(
3639+
[
3640+
mock.call("gs://project/input.json"),
3641+
mock.call("gs://project/output.json"),
3642+
mock.call("gs://project/system_instruction.json"),
3643+
],
3644+
any_order=True,
3645+
)
3646+
assert result.eval_dataset_df is not None
3647+
pd.testing.assert_frame_equal(
3648+
result.eval_dataset_df,
3649+
pd.DataFrame(
3650+
{
3651+
"format": ["observability"],
3652+
"request": ["input"],
3653+
"response": ["output"],
3654+
"system_instruction": ["system_instruction"],
3655+
}
3656+
),
3657+
)
3658+
3659+
@mock.patch.object(_evals_utils, "GcsUtils")
3660+
def test_load_from_observability_eval_cases_no_system_instruction(
3661+
self, mock_gcs_utils
3662+
):
3663+
"""Tests load_from_observability_eval_cases works without system_instruction."""
3664+
3665+
def read_file_contents_side_effect(src: str) -> str:
3666+
if src == "gs://project/input.json":
3667+
return "input"
3668+
elif src == "gs://project/output.json":
3669+
return "output"
3670+
elif src == "gs://project/system_instruction.json":
3671+
return "system_instruction"
3672+
else:
3673+
return ""
3674+
3675+
mock_gcs_utils.return_value.read_file_contents.side_effect = (
3676+
read_file_contents_side_effect
3677+
)
3678+
3679+
eval_cases = [
3680+
vertexai_genai_types.ObservabilityEvalCase(
3681+
input_src="gs://project/input.json",
3682+
output_src="gs://project/output.json",
3683+
)
3684+
]
3685+
result = (
3686+
vertexai_genai_types.EvaluationDataset.load_from_observability_eval_cases(
3687+
eval_cases
3688+
)
3689+
)
3690+
3691+
mock_gcs_utils.return_value.read_file_contents.assert_has_calls(
3692+
[
3693+
mock.call("gs://project/input.json"),
3694+
mock.call("gs://project/output.json"),
3695+
],
3696+
any_order=True,
3697+
)
3698+
assert result.eval_dataset_df is not None
3699+
pd.testing.assert_frame_equal(
3700+
result.eval_dataset_df,
3701+
pd.DataFrame(
3702+
{
3703+
"format": ["observability"],
3704+
"request": ["input"],
3705+
"response": ["output"],
3706+
"system_instruction": [""],
3707+
}
3708+
),
3709+
)
3710+
3711+
@mock.patch.object(_evals_utils, "GcsUtils")
3712+
def test_load_from_observability_eval_cases_multiple_cases(self, mock_gcs_utils):
3713+
"""Test load_from_observability_eval_cases can handle multiple cases."""
3714+
3715+
def read_file_contents_side_effect(src: str) -> str:
3716+
if src == "gs://project/input_1.json":
3717+
return "input_1"
3718+
elif src == "gs://project/input_2.json":
3719+
return "input_2"
3720+
elif src == "gs://project/output_1.json":
3721+
return "output_1"
3722+
elif src == "gs://project/output_2.json":
3723+
return "output_2"
3724+
elif src == "gs://project/system_instruction_1.json":
3725+
return "system_instruction_1"
3726+
elif src == "gs://project/system_instruction_2.json":
3727+
return "system_instruction_2"
3728+
else:
3729+
return ""
3730+
3731+
mock_gcs_utils.return_value.read_file_contents.side_effect = (
3732+
read_file_contents_side_effect
3733+
)
3734+
3735+
eval_cases = [
3736+
vertexai_genai_types.ObservabilityEvalCase(
3737+
input_src="gs://project/input_1.json",
3738+
output_src="gs://project/output_1.json",
3739+
system_instruction_src="gs://project/system_instruction_1.json",
3740+
),
3741+
vertexai_genai_types.ObservabilityEvalCase(
3742+
input_src="gs://project/input_2.json",
3743+
output_src="gs://project/output_2.json",
3744+
system_instruction_src="gs://project/system_instruction_2.json",
3745+
),
3746+
]
3747+
result = (
3748+
vertexai_genai_types.EvaluationDataset.load_from_observability_eval_cases(
3749+
eval_cases
3750+
)
3751+
)
3752+
3753+
assert result.eval_dataset_df is not None
3754+
pd.testing.assert_frame_equal(
3755+
result.eval_dataset_df,
3756+
pd.DataFrame(
3757+
{
3758+
"format": ["observability", "observability"],
3759+
"request": ["input_1", "input_2"],
3760+
"response": ["output_1", "output_2"],
3761+
"system_instruction": [
3762+
"system_instruction_1",
3763+
"system_instruction_2",
3764+
],
3765+
}
3766+
),
3767+
)

vertexai/_genai/types.py

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7085,6 +7085,58 @@ def _check_pandas_installed(cls, data: Any) -> Any:
70857085
)
70867086
return data
70877087

7088+
@classmethod
7089+
def load_from_observability_eval_cases(
7090+
cls, cases: list["ObservabilityEvalCase"]
7091+
) -> "EvaluationDataset":
7092+
"""Fetches GenAI Observability data from GCS and parses into a DataFrame."""
7093+
try:
7094+
import pandas as pd
7095+
from . import _evals_utils
7096+
7097+
formats = []
7098+
requests = []
7099+
responses = []
7100+
system_instructions = []
7101+
7102+
for case in cases:
7103+
gcs_utils = _evals_utils.GcsUtils(
7104+
case.api_client._api_client if case.api_client else None
7105+
)
7106+
7107+
# Associate "observability" data format for given sources
7108+
formats.append("observability")
7109+
7110+
# Input source
7111+
request_data = gcs_utils.read_file_contents(case.input_src)
7112+
requests.append(request_data)
7113+
7114+
# Output source
7115+
response_data = gcs_utils.read_file_contents(case.output_src)
7116+
responses.append(response_data)
7117+
7118+
# System instruction source
7119+
system_instruction_data = ""
7120+
if case.system_instruction_src is not None:
7121+
system_instruction_data = gcs_utils.read_file_contents(
7122+
case.system_instruction_src
7123+
)
7124+
system_instructions.append(system_instruction_data)
7125+
7126+
eval_dataset_df = pd.DataFrame(
7127+
{
7128+
"format": formats,
7129+
"request": requests,
7130+
"response": responses,
7131+
"system_instruction": system_instructions,
7132+
}
7133+
)
7134+
7135+
except ImportError as e:
7136+
raise ImportError("Pandas DataFrame library is required.") from e
7137+
7138+
return EvaluationDataset(eval_dataset_df=eval_dataset_df)
7139+
70887140
def show(self) -> None:
70897141
"""Shows the evaluation dataset."""
70907142
from . import _evals_visualization
@@ -7559,6 +7611,45 @@ class EvaluateDatasetOperationDict(TypedDict, total=False):
75597611
]
75607612

75617613

7614+
class ObservabilityEvalCase(_common.BaseModel):
7615+
"""A single evaluation case instance for data stored in GCP Observability."""
7616+
7617+
input_src: Optional[str] = Field(
7618+
default=None,
7619+
description="""String containing the GCS reference to the GenAI input content.""",
7620+
)
7621+
output_src: Optional[str] = Field(
7622+
default=None,
7623+
description="""String containing the GCS reference to the GenAI response content.""",
7624+
)
7625+
system_instruction_src: Optional[str] = Field(
7626+
default=None,
7627+
description="""An optional string containing the GCS reference to the GenAI system instruction.""",
7628+
)
7629+
api_client: Optional[Any] = Field(
7630+
default=None, description="""The underlying API client."""
7631+
)
7632+
7633+
7634+
class ObservabilityEvalCaseDict(TypedDict, total=False):
7635+
"""A single evaluation case instance for data stored in GCP Observability."""
7636+
7637+
input_src: Optional[str]
7638+
"""String containing the GCS reference to the GenAI input content."""
7639+
7640+
output_src: Optional[str]
7641+
"""String containing the GCS reference to the GenAI response content."""
7642+
7643+
system_instruction_src: Optional[str]
7644+
"""An optional string containing the GCS reference to the GenAI system instruction."""
7645+
7646+
api_client: Optional[Any]
7647+
"""The underlying API client."""
7648+
7649+
7650+
ObservabilityEvalCaseOrDict = Union[ObservabilityEvalCase, ObservabilityEvalCaseDict]
7651+
7652+
75627653
class RubricGroup(_common.BaseModel):
75637654
"""A group of rubrics, used for grouping rubrics based on a metric or a version."""
75647655

0 commit comments

Comments
 (0)