Skip to content

Commit 2ed6d57

Browse files
shoyerdcherian
andauthored
Fix for h5py deepcopy issues (#4426)
* Potential fix for h5py deepcopy issues * lint * Add unit test * blacker than the blackest black Co-authored-by: dcherian <[email protected]>
1 parent b2c1550 commit 2ed6d57

File tree

2 files changed

+46
-3
lines changed

2 files changed

+46
-3
lines changed

xarray/core/indexing.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -664,6 +664,12 @@ def __setitem__(self, key, value):
664664
self._ensure_copied()
665665
self.array[key] = value
666666

667+
def __deepcopy__(self, memo):
668+
# CopyOnWriteArray is used to wrap backend array objects, which might
669+
# point to files on disk, so we can't rely on the default deepcopy
670+
# implementation.
671+
return type(self)(self.array)
672+
667673

668674
class MemoryCachedArray(ExplicitlyIndexedNDArrayMixin):
669675
__slots__ = ("array",)

xarray/tests/test_backends.py

Lines changed: 40 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1441,7 +1441,10 @@ def test_autoclose_future_warning(self):
14411441
with self.open(tmp_file, autoclose=True) as actual:
14421442
assert_identical(data, actual)
14431443

1444-
def test_already_open_dataset(self):
1444+
1445+
@requires_netCDF4
1446+
class TestNetCDF4AlreadyOpen:
1447+
def test_base_case(self):
14451448
with create_tmp_file() as tmp_file:
14461449
with nc4.Dataset(tmp_file, mode="w") as nc:
14471450
v = nc.createVariable("x", "int")
@@ -1453,7 +1456,7 @@ def test_already_open_dataset(self):
14531456
expected = Dataset({"x": ((), 42)})
14541457
assert_identical(expected, ds)
14551458

1456-
def test_already_open_dataset_group(self):
1459+
def test_group(self):
14571460
with create_tmp_file() as tmp_file:
14581461
with nc4.Dataset(tmp_file, mode="w") as nc:
14591462
group = nc.createGroup("g")
@@ -1476,6 +1479,21 @@ def test_already_open_dataset_group(self):
14761479
with pytest.raises(ValueError, match="must supply a root"):
14771480
backends.NetCDF4DataStore(nc.groups["g"], group="g")
14781481

1482+
def test_deepcopy(self):
1483+
# regression test for https://github.com/pydata/xarray/issues/4425
1484+
with create_tmp_file() as tmp_file:
1485+
with nc4.Dataset(tmp_file, mode="w") as nc:
1486+
nc.createDimension("x", 10)
1487+
v = nc.createVariable("y", np.int32, ("x",))
1488+
v[:] = np.arange(10)
1489+
1490+
h5 = nc4.Dataset(tmp_file, mode="r")
1491+
store = backends.NetCDF4DataStore(h5)
1492+
with open_dataset(store) as ds:
1493+
copied = ds.copy(deep=True)
1494+
expected = Dataset({"y": ("x", np.arange(10))})
1495+
assert_identical(expected, copied)
1496+
14791497

14801498
@requires_netCDF4
14811499
@requires_dask
@@ -2422,7 +2440,10 @@ def test_dump_encodings_h5py(self):
24222440
assert actual.x.encoding["compression"] == "lzf"
24232441
assert actual.x.encoding["compression_opts"] is None
24242442

2425-
def test_already_open_dataset_group(self):
2443+
2444+
@requires_h5netcdf
2445+
class TestH5NetCDFAlreadyOpen:
2446+
def test_open_dataset_group(self):
24262447
import h5netcdf
24272448

24282449
with create_tmp_file() as tmp_file:
@@ -2443,6 +2464,22 @@ def test_already_open_dataset_group(self):
24432464
expected = Dataset({"x": ((), 42)})
24442465
assert_identical(expected, ds)
24452466

2467+
def test_deepcopy(self):
2468+
import h5netcdf
2469+
2470+
with create_tmp_file() as tmp_file:
2471+
with nc4.Dataset(tmp_file, mode="w") as nc:
2472+
nc.createDimension("x", 10)
2473+
v = nc.createVariable("y", np.int32, ("x",))
2474+
v[:] = np.arange(10)
2475+
2476+
h5 = h5netcdf.File(tmp_file, mode="r")
2477+
store = backends.H5NetCDFStore(h5)
2478+
with open_dataset(store) as ds:
2479+
copied = ds.copy(deep=True)
2480+
expected = Dataset({"y": ("x", np.arange(10))})
2481+
assert_identical(expected, copied)
2482+
24462483

24472484
@requires_h5netcdf
24482485
class TestH5NetCDFFileObject(TestH5NetCDFData):

0 commit comments

Comments
 (0)