fix: Check that mean_score is a valid float value

ankursharmas · copybara-github · commit 65cb6d6bf327 · 2025-07-21T11:36:55.000-07:00
In some cases, Vertex AI evaluation returns nan values for the metrics. That was not handled correctly.

PiperOrigin-RevId: 785514456
diff --git a/src/google/adk/evaluation/vertex_ai_eval_facade.py b/src/google/adk/evaluation/vertex_ai_eval_facade.py
@@ -14,6 +14,7 @@
 
 from __future__ import annotations
 
+import math
 import os
 from typing import Optional
 
@@ -112,7 +113,12 @@ def _get_text(self, content: Optional[genai_types.Content]) -> str:
     return ""
 
   def _get_score(self, eval_result) -> Optional[float]:
-    if eval_result and eval_result.summary_metrics:
+    if (
+        eval_result
+        and eval_result.summary_metrics
+        and isinstance(eval_result.summary_metrics[0].mean_score, float)
+        and not math.isnan(eval_result.summary_metrics[0].mean_score)
+    ):
       return eval_result.summary_metrics[0].mean_score
 
     return None
diff --git a/tests/unittests/evaluation/test_vertex_ai_eval_facade.py b/tests/unittests/evaluation/test_vertex_ai_eval_facade.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 """Tests for the Response Evaluator."""
+import math
 import random
 from unittest.mock import patch
 
@@ -122,7 +123,18 @@ def test_evaluate_invocations_metric_failed(self, mock_perform_eval):
         vertexai_types.PrebuiltMetric.COHERENCE.name
     ]
 
-  def test_evaluate_invocations_metric_no_score(self, mock_perform_eval):
+  @pytest.mark.parametrize(
+      "summary_metric_with_no_score",
+      [
+          ([]),
+          ([vertexai_types.AggregatedMetricResult(mean_score=float("nan"))]),
+          ([vertexai_types.AggregatedMetricResult(mean_score=None)]),
+          ([vertexai_types.AggregatedMetricResult(mean_score=math.nan)]),
+      ],
+  )
+  def test_evaluate_invocations_metric_no_score(
+      self, mock_perform_eval, summary_metric_with_no_score
+  ):
     """Test evaluate_invocations function for a metric."""
     actual_invocations = [
         Invocation(
@@ -151,7 +163,7 @@ def test_evaluate_invocations_metric_no_score(self, mock_perform_eval):
     )
     # Mock the return value of _perform_eval
     mock_perform_eval.return_value = vertexai_types.EvaluationResult(
-        summary_metrics=[],
+        summary_metrics=summary_metric_with_no_score,
         eval_case_results=[],
     )