Skip to content

Commit feb3ff4

Browse files
authored
fix: make ExecutionMetrics stats tracking more robust to missing stats (#1977)
1 parent afc1242 commit feb3ff4

File tree

3 files changed

+256
-7
lines changed

3 files changed

+256
-7
lines changed

GEMINI.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ We use `nox` to instrument our tests.
4545
nox -r -s lint
4646
```
4747

48+
- When writing tests, use the idiomatic "pytest" style.
49+
4850
## Documentation
4951

5052
If a method or property is implementing the same interface as a third-party

bigframes/session/metrics.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,9 @@ def count_job_stats(
4242
assert row_iterator is not None
4343

4444
# TODO(tswast): Pass None after making benchmark publishing robust to missing data.
45-
bytes_processed = getattr(row_iterator, "total_bytes_processed", 0)
46-
query_char_count = len(getattr(row_iterator, "query", ""))
47-
slot_millis = getattr(row_iterator, "slot_millis", 0)
45+
bytes_processed = getattr(row_iterator, "total_bytes_processed", 0) or 0
46+
query_char_count = len(getattr(row_iterator, "query", "") or "")
47+
slot_millis = getattr(row_iterator, "slot_millis", 0) or 0
4848
exec_seconds = 0.0
4949

5050
self.execution_count += 1
@@ -63,10 +63,10 @@ def count_job_stats(
6363
elif (stats := get_performance_stats(query_job)) is not None:
6464
query_char_count, bytes_processed, slot_millis, exec_seconds = stats
6565
self.execution_count += 1
66-
self.query_char_count += query_char_count
67-
self.bytes_processed += bytes_processed
68-
self.slot_millis += slot_millis
69-
self.execution_secs += exec_seconds
66+
self.query_char_count += query_char_count or 0
67+
self.bytes_processed += bytes_processed or 0
68+
self.slot_millis += slot_millis or 0
69+
self.execution_secs += exec_seconds or 0
7070
write_stats_to_disk(
7171
query_char_count=query_char_count,
7272
bytes_processed=bytes_processed,

tests/unit/session/test_metrics.py

Lines changed: 247 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,247 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import datetime
16+
import os
17+
import unittest.mock
18+
19+
import google.cloud.bigquery as bigquery
20+
import pytest
21+
22+
import bigframes.session.metrics as metrics
23+
24+
NOW = datetime.datetime.now(datetime.timezone.utc)
25+
26+
27+
def test_count_job_stats_with_row_iterator():
28+
row_iterator = unittest.mock.create_autospec(
29+
bigquery.table.RowIterator, instance=True
30+
)
31+
row_iterator.total_bytes_processed = 1024
32+
row_iterator.query = "SELECT * FROM table"
33+
row_iterator.slot_millis = 1234
34+
execution_metrics = metrics.ExecutionMetrics()
35+
execution_metrics.count_job_stats(row_iterator=row_iterator)
36+
37+
assert execution_metrics.execution_count == 1
38+
assert execution_metrics.bytes_processed == 1024
39+
assert execution_metrics.query_char_count == 19
40+
assert execution_metrics.slot_millis == 1234
41+
42+
43+
def test_count_job_stats_with_row_iterator_missing_stats():
44+
row_iterator = unittest.mock.create_autospec(
45+
bigquery.table.RowIterator, instance=True
46+
)
47+
# Simulate properties not being present on the object
48+
del row_iterator.total_bytes_processed
49+
del row_iterator.query
50+
del row_iterator.slot_millis
51+
execution_metrics = metrics.ExecutionMetrics()
52+
execution_metrics.count_job_stats(row_iterator=row_iterator)
53+
54+
assert execution_metrics.execution_count == 1
55+
assert execution_metrics.bytes_processed == 0
56+
assert execution_metrics.query_char_count == 0
57+
assert execution_metrics.slot_millis == 0
58+
59+
60+
def test_count_job_stats_with_row_iterator_none_stats():
61+
row_iterator = unittest.mock.create_autospec(
62+
bigquery.table.RowIterator, instance=True
63+
)
64+
row_iterator.total_bytes_processed = None
65+
row_iterator.query = None
66+
row_iterator.slot_millis = None
67+
execution_metrics = metrics.ExecutionMetrics()
68+
execution_metrics.count_job_stats(row_iterator=row_iterator)
69+
70+
assert execution_metrics.execution_count == 1
71+
assert execution_metrics.bytes_processed == 0
72+
assert execution_metrics.query_char_count == 0
73+
assert execution_metrics.slot_millis == 0
74+
75+
76+
def test_count_job_stats_with_dry_run():
77+
query_job = unittest.mock.create_autospec(bigquery.QueryJob, instance=True)
78+
query_job.configuration.dry_run = True
79+
query_job.query = "SELECT * FROM table"
80+
execution_metrics = metrics.ExecutionMetrics()
81+
execution_metrics.count_job_stats(query_job=query_job)
82+
83+
# Dry run jobs shouldn't count as "executed"
84+
assert execution_metrics.execution_count == 0
85+
assert execution_metrics.bytes_processed == 0
86+
assert execution_metrics.query_char_count == 0
87+
assert execution_metrics.slot_millis == 0
88+
89+
90+
def test_count_job_stats_with_valid_job():
91+
query_job = unittest.mock.create_autospec(bigquery.QueryJob, instance=True)
92+
query_job.configuration.dry_run = False
93+
query_job.query = "SELECT * FROM table"
94+
query_job.total_bytes_processed = 2048
95+
query_job.slot_millis = 5678
96+
query_job.created = NOW
97+
query_job.ended = NOW + datetime.timedelta(seconds=2)
98+
execution_metrics = metrics.ExecutionMetrics()
99+
execution_metrics.count_job_stats(query_job=query_job)
100+
101+
assert execution_metrics.execution_count == 1
102+
assert execution_metrics.bytes_processed == 2048
103+
assert execution_metrics.query_char_count == 19
104+
assert execution_metrics.slot_millis == 5678
105+
assert execution_metrics.execution_secs == pytest.approx(2.0)
106+
107+
108+
def test_count_job_stats_with_cached_job():
109+
query_job = unittest.mock.create_autospec(bigquery.QueryJob, instance=True)
110+
query_job.configuration.dry_run = False
111+
query_job.query = "SELECT * FROM table"
112+
# Cache hit jobs don't have total_bytes_processed or slot_millis
113+
query_job.total_bytes_processed = None
114+
query_job.slot_millis = None
115+
query_job.created = NOW
116+
query_job.ended = NOW + datetime.timedelta(seconds=1)
117+
execution_metrics = metrics.ExecutionMetrics()
118+
execution_metrics.count_job_stats(query_job=query_job)
119+
120+
assert execution_metrics.execution_count == 1
121+
assert execution_metrics.bytes_processed == 0
122+
assert execution_metrics.query_char_count == 19
123+
assert execution_metrics.slot_millis == 0
124+
assert execution_metrics.execution_secs == pytest.approx(1.0)
125+
126+
127+
def test_count_job_stats_with_unsupported_job():
128+
query_job = unittest.mock.create_autospec(bigquery.QueryJob, instance=True)
129+
query_job.configuration.dry_run = False
130+
query_job.query = "SELECT * FROM table"
131+
# Some jobs, such as scripts, don't have these properties.
132+
query_job.total_bytes_processed = None
133+
query_job.slot_millis = None
134+
query_job.created = None
135+
query_job.ended = None
136+
execution_metrics = metrics.ExecutionMetrics()
137+
execution_metrics.count_job_stats(query_job=query_job)
138+
139+
# Don't count jobs if we can't get performance stats.
140+
assert execution_metrics.execution_count == 0
141+
assert execution_metrics.bytes_processed == 0
142+
assert execution_metrics.query_char_count == 0
143+
assert execution_metrics.slot_millis == 0
144+
assert execution_metrics.execution_secs == pytest.approx(0.0)
145+
146+
147+
def test_get_performance_stats_with_valid_job():
148+
query_job = unittest.mock.create_autospec(bigquery.QueryJob, instance=True)
149+
query_job.configuration.dry_run = False
150+
query_job.query = "SELECT * FROM table"
151+
query_job.total_bytes_processed = 2048
152+
query_job.slot_millis = 5678
153+
query_job.created = NOW
154+
query_job.ended = NOW + datetime.timedelta(seconds=2)
155+
stats = metrics.get_performance_stats(query_job)
156+
assert stats is not None
157+
query_char_count, bytes_processed, slot_millis, exec_seconds = stats
158+
assert query_char_count == 19
159+
assert bytes_processed == 2048
160+
assert slot_millis == 5678
161+
assert exec_seconds == pytest.approx(2.0)
162+
163+
164+
def test_get_performance_stats_with_dry_run():
165+
query_job = unittest.mock.create_autospec(bigquery.QueryJob, instance=True)
166+
query_job.configuration.dry_run = True
167+
stats = metrics.get_performance_stats(query_job)
168+
assert stats is None
169+
170+
171+
def test_get_performance_stats_with_missing_timestamps():
172+
query_job = unittest.mock.create_autospec(bigquery.QueryJob, instance=True)
173+
query_job.configuration.dry_run = False
174+
query_job.created = None
175+
query_job.ended = NOW
176+
stats = metrics.get_performance_stats(query_job)
177+
assert stats is None
178+
179+
query_job.created = NOW
180+
query_job.ended = None
181+
stats = metrics.get_performance_stats(query_job)
182+
assert stats is None
183+
184+
185+
def test_get_performance_stats_with_mocked_types():
186+
query_job = unittest.mock.create_autospec(bigquery.QueryJob, instance=True)
187+
query_job.configuration.dry_run = False
188+
query_job.created = NOW
189+
query_job.ended = NOW
190+
query_job.total_bytes_processed = unittest.mock.Mock()
191+
query_job.slot_millis = 123
192+
stats = metrics.get_performance_stats(query_job)
193+
assert stats is None
194+
195+
query_job.total_bytes_processed = 123
196+
query_job.slot_millis = unittest.mock.Mock()
197+
stats = metrics.get_performance_stats(query_job)
198+
assert stats is None
199+
200+
201+
@pytest.fixture
202+
def mock_environ(monkeypatch):
203+
"""Fixture to mock os.environ."""
204+
monkeypatch.setenv(metrics.LOGGING_NAME_ENV_VAR, "my_test_case")
205+
206+
207+
def test_write_stats_to_disk_writes_files(tmp_path, mock_environ):
208+
os.chdir(tmp_path)
209+
test_name = os.environ[metrics.LOGGING_NAME_ENV_VAR]
210+
metrics.write_stats_to_disk(
211+
query_char_count=100,
212+
bytes_processed=200,
213+
slot_millis=300,
214+
exec_seconds=1.23,
215+
)
216+
217+
slot_file = tmp_path / (test_name + ".slotmillis")
218+
assert slot_file.exists()
219+
with open(slot_file) as f:
220+
assert f.read() == "300\n"
221+
222+
exec_time_file = tmp_path / (test_name + ".bq_exec_time_seconds")
223+
assert exec_time_file.exists()
224+
with open(exec_time_file) as f:
225+
assert f.read() == "1.23\n"
226+
227+
query_char_count_file = tmp_path / (test_name + ".query_char_count")
228+
assert query_char_count_file.exists()
229+
with open(query_char_count_file) as f:
230+
assert f.read() == "100\n"
231+
232+
bytes_file = tmp_path / (test_name + ".bytesprocessed")
233+
assert bytes_file.exists()
234+
with open(bytes_file) as f:
235+
assert f.read() == "200\n"
236+
237+
238+
def test_write_stats_to_disk_no_env_var(tmp_path, monkeypatch):
239+
monkeypatch.delenv(metrics.LOGGING_NAME_ENV_VAR, raising=False)
240+
os.chdir(tmp_path)
241+
metrics.write_stats_to_disk(
242+
query_char_count=100,
243+
bytes_processed=200,
244+
slot_millis=300,
245+
exec_seconds=1.23,
246+
)
247+
assert len(list(tmp_path.iterdir())) == 0

0 commit comments

Comments
 (0)