Skip to content

Commit 65cb6d6

Browse files
ankursharmascopybara-github
authored andcommitted
fix: Check that mean_score is a valid float value
In some cases, Vertex AI evaluation returns nan values for the metrics. That was not handled correctly. PiperOrigin-RevId: 785514456
1 parent fc85348 commit 65cb6d6

File tree

2 files changed

+21
-3
lines changed

2 files changed

+21
-3
lines changed

src/google/adk/evaluation/vertex_ai_eval_facade.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
from __future__ import annotations
1616

17+
import math
1718
import os
1819
from typing import Optional
1920

@@ -112,7 +113,12 @@ def _get_text(self, content: Optional[genai_types.Content]) -> str:
112113
return ""
113114

114115
def _get_score(self, eval_result) -> Optional[float]:
115-
if eval_result and eval_result.summary_metrics:
116+
if (
117+
eval_result
118+
and eval_result.summary_metrics
119+
and isinstance(eval_result.summary_metrics[0].mean_score, float)
120+
and not math.isnan(eval_result.summary_metrics[0].mean_score)
121+
):
116122
return eval_result.summary_metrics[0].mean_score
117123

118124
return None

tests/unittests/evaluation/test_vertex_ai_eval_facade.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
# limitations under the License.
1414

1515
"""Tests for the Response Evaluator."""
16+
import math
1617
import random
1718
from unittest.mock import patch
1819

@@ -122,7 +123,18 @@ def test_evaluate_invocations_metric_failed(self, mock_perform_eval):
122123
vertexai_types.PrebuiltMetric.COHERENCE.name
123124
]
124125

125-
def test_evaluate_invocations_metric_no_score(self, mock_perform_eval):
126+
@pytest.mark.parametrize(
127+
"summary_metric_with_no_score",
128+
[
129+
([]),
130+
([vertexai_types.AggregatedMetricResult(mean_score=float("nan"))]),
131+
([vertexai_types.AggregatedMetricResult(mean_score=None)]),
132+
([vertexai_types.AggregatedMetricResult(mean_score=math.nan)]),
133+
],
134+
)
135+
def test_evaluate_invocations_metric_no_score(
136+
self, mock_perform_eval, summary_metric_with_no_score
137+
):
126138
"""Test evaluate_invocations function for a metric."""
127139
actual_invocations = [
128140
Invocation(
@@ -151,7 +163,7 @@ def test_evaluate_invocations_metric_no_score(self, mock_perform_eval):
151163
)
152164
# Mock the return value of _perform_eval
153165
mock_perform_eval.return_value = vertexai_types.EvaluationResult(
154-
summary_metrics=[],
166+
summary_metrics=summary_metric_with_no_score,
155167
eval_case_results=[],
156168
)
157169

0 commit comments

Comments
 (0)