Skip to content

Commit 609412d

Browse files
kmuehlbauerjhamman
andauthored
Use zarr-fixture to prevent thread leakage errors (#9967)
* Use zarr-fixture to prevent thread leakage errors * Apply suggestions from code review Co-authored-by: Joe Hamman <[email protected]> * Add whats-new.rst entry * Explicitely add pyarrow to windows builds, as importing dask.dataframe (dask>=2025.1.0) raises ImportError when missing. --------- Co-authored-by: Joe Hamman <[email protected]>
1 parent f2e9f86 commit 609412d

File tree

2 files changed

+49
-18
lines changed

2 files changed

+49
-18
lines changed

doc/whats-new.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,8 @@ Bug fixes
6969
By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_.
7070
- Remove dask-expr from CI runs, add "pyarrow" dask dependency to windows CI runs, fix related tests (:issue:`9962`, :pull:`9971`).
7171
By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_.
72+
- Use zarr-fixture to prevent thread leakage errors (:pull:`9967`).
73+
By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_.
7274

7375
Documentation
7476
~~~~~~~~~~~~~

xarray/tests/test_distributed.py

Lines changed: 47 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@
2121
from distributed.client import futures_of
2222
from distributed.utils_test import ( # noqa: F401
2323
cleanup,
24+
client,
2425
cluster,
26+
cluster_fixture,
2527
gen_cluster,
2628
loop,
2729
loop_in_thread,
@@ -46,6 +48,7 @@
4648
from xarray.tests.test_dataset import create_test_data
4749

4850
loop = loop # loop is an imported fixture, which flake8 has issues ack-ing
51+
client = client # client is an imported fixture, which flake8 has issues ack-ing
4952

5053

5154
@pytest.fixture
@@ -214,35 +217,61 @@ def test_dask_distributed_read_netcdf_integration_test(
214217
assert_allclose(original, computed)
215218

216219

220+
# fixture vendored from dask
221+
# heads-up, this is using quite private zarr API
222+
# https://github.com/dask/dask/blob/e04734b4d8959ba259801f2e2a490cb4ee8d891f/dask/tests/test_distributed.py#L338-L358
223+
@pytest.fixture(scope="function")
224+
def zarr(client):
225+
zarr_lib = pytest.importorskip("zarr")
226+
# Zarr-Python 3 lazily allocates a dedicated thread/IO loop
227+
# for to execute async tasks. To avoid having this thread
228+
# be picked up as a "leaked thread", we manually trigger it's
229+
# creation before using zarr
230+
try:
231+
_ = zarr_lib.core.sync._get_loop()
232+
_ = zarr_lib.core.sync._get_executor()
233+
yield zarr_lib
234+
except AttributeError:
235+
yield zarr_lib
236+
finally:
237+
# Zarr-Python 3 lazily allocates a IO thread, a thread pool executor, and
238+
# an IO loop. Here we clean up these resources to avoid leaking threads
239+
# In normal operations, this is done as by an atexit handler when Zarr
240+
# is shutting down.
241+
try:
242+
zarr_lib.core.sync.cleanup_resources()
243+
except AttributeError:
244+
pass
245+
246+
217247
@requires_zarr
218248
@pytest.mark.parametrize("consolidated", [True, False])
219249
@pytest.mark.parametrize("compute", [True, False])
220250
def test_dask_distributed_zarr_integration_test(
221-
loop, consolidated: bool, compute: bool
251+
client,
252+
zarr,
253+
consolidated: bool,
254+
compute: bool,
222255
) -> None:
223256
if consolidated:
224257
write_kwargs: dict[str, Any] = {"consolidated": True}
225258
read_kwargs: dict[str, Any] = {"backend_kwargs": {"consolidated": True}}
226259
else:
227260
write_kwargs = read_kwargs = {}
228261
chunks = {"dim1": 4, "dim2": 3, "dim3": 5}
229-
with cluster() as (s, [a, b]):
230-
with Client(s["address"], loop=loop):
231-
original = create_test_data().chunk(chunks)
232-
with create_tmp_file(
233-
allow_cleanup_failure=ON_WINDOWS, suffix=".zarrc"
234-
) as filename:
235-
maybe_futures = original.to_zarr( # type: ignore[call-overload] #mypy bug?
236-
filename, compute=compute, **write_kwargs
237-
)
238-
if not compute:
239-
maybe_futures.compute()
240-
with xr.open_dataset(
241-
filename, chunks="auto", engine="zarr", **read_kwargs
242-
) as restored:
243-
assert isinstance(restored.var1.data, da.Array)
244-
computed = restored.compute()
245-
assert_allclose(original, computed)
262+
original = create_test_data().chunk(chunks)
263+
with create_tmp_file(allow_cleanup_failure=ON_WINDOWS, suffix=".zarrc") as filename:
264+
maybe_futures = original.to_zarr( # type: ignore[call-overload] #mypy bug?
265+
filename, compute=compute, **write_kwargs
266+
)
267+
if not compute:
268+
maybe_futures.compute()
269+
with xr.open_dataset(
270+
filename, chunks="auto", engine="zarr", **read_kwargs
271+
) as restored:
272+
assert isinstance(restored.var1.data, da.Array)
273+
computed = restored.compute()
274+
assert_allclose(original, computed)
246275

247276

248277
@gen_cluster(client=True)

0 commit comments

Comments
 (0)