Skip to content

Commit 6552b0e

Browse files
authored
Merge pull request #864 from davidhassell/zarr
Read Zarr datasets
2 parents ac1f8c2 + fbb6ff7 commit 6552b0e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

75 files changed

+1604
-591
lines changed

Changelog.rst

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,21 @@
1-
version NEXTVERSION
2-
-------------------
1+
Version NEXTVERSION
2+
----------------
33

44
**2025-??-??**
55

6+
* Read Zarr datasets with `cf.read`
7+
(https://github.com/NCAS-CMS/cf-python/issues/863)
68
* Update CF aggregation keywords
79
(https://github.com/NCAS-CMS/cf-python/issues/868)
810
* New keyword parameter to `cf.DimensionCoordinate.create_bounds`:
911
``inplace`` (https://github.com/NCAS-CMS/cf-python/issues/855)
1012
* Set new minimum version of `dask`: ``2025.5.1``
1113
(https://github.com/NCAS-CMS/cf-python/issues/866)
14+
* Changed dependency: ``cfdm>=1.12.2.0, <1.12.3.0``
1215
* Changed dependency: ``dask>=2025.5.1``
1316

1417
----
15-
18+
1619
version 3.17.0
1720
--------------
1821

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ The `cf` package uses
8484
[Dask](https://ncas-cms.github.io/cf-python/performance.html) for all
8585
of its array manipulation and can:
8686

87-
* read field constructs from netCDF, CDL, PP and UM datasets with a
87+
* read field constructs from netCDF, CDL, Zarr, PP and UM datasets with a
8888
choice of netCDF backends,and in local, http, and s3 locations,
8989
* create new field constructs in memory,
9090
* write and append field and domain constructs to netCDF datasets on disk,

cf/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
The `cf` package uses `dask` for all of its array manipulation and
1212
can:
1313
14-
* read field constructs from netCDF, CDL, PP and UM datasets,
14+
* read field constructs from netCDF, CDL, Zarr, PP and UM datasets,
1515
1616
* read field constructs and domain constructs from netCDF, CDL, PP and
1717
UM datasets with a choice of netCDF backends,
@@ -284,6 +284,7 @@
284284
RaggedIndexedContiguousArray,
285285
SubsampledArray,
286286
UMArray,
287+
ZarrArray,
287288
)
288289

289290
from .aggregate import aggregate, climatology_cells

cf/cfimplementation.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
RaggedIndexedArray,
3939
RaggedIndexedContiguousArray,
4040
SubsampledArray,
41+
ZarrArray,
4142
)
4243
from .functions import CF
4344

@@ -49,8 +50,8 @@ class CFImplementation(cfdm.CFDMImplementation):
4950
5051
"""
5152

52-
def nc_set_hdf5_chunksizes(self, data, sizes, override=False):
53-
"""Set the data HDF5 chunksizes.
53+
def nc_set_dataset_chunksizes(self, data, sizes, override=False):
54+
"""Set the data dataset chunksizes.
5455
5556
.. versionadded:: 3.16.2
5657
@@ -60,21 +61,21 @@ def nc_set_hdf5_chunksizes(self, data, sizes, override=False):
6061
The data.
6162
6263
sizes: sequence of `int`
63-
The new HDF5 chunk sizes.
64+
The new dataset chunk sizes.
6465
6566
override: `bool`, optional
66-
If True then set the HDF5 chunks sizes even if some
67+
If True then set the dataset chunks sizes even if some
6768
have already been specified. If False, the default,
68-
then only set the HDF5 chunks sizes if some none have
69-
already been specified.
69+
then only set the dataset chunks sizes if some none
70+
have already been specified.
7071
7172
:Returns:
7273
7374
`None`
7475
7576
"""
76-
if override or not data.nc_hdf5_chunksizes():
77-
data.nc_set_hdf5_chunksizes(sizes)
77+
if override or not data.nc_dataset_chunksizes():
78+
data.nc_set_dataset_chunksizes(sizes)
7879

7980
def set_construct(self, parent, construct, axes=None, copy=True, **kwargs):
8081
"""Insert a construct into a field or domain.
@@ -151,6 +152,7 @@ def set_construct(self, parent, construct, axes=None, copy=True, **kwargs):
151152
RaggedIndexedContiguousArray=RaggedIndexedContiguousArray,
152153
SubsampledArray=SubsampledArray,
153154
TiePointIndex=TiePointIndex,
155+
ZarrArray=ZarrArray,
154156
)
155157

156158

@@ -205,7 +207,9 @@ def implementation():
205207
'RaggedIndexedArray': cf.data.array.raggedindexedarray.RaggedIndexedArray,
206208
'RaggedIndexedContiguousArray': cf.data.array.raggedindexedcontiguousarray.RaggedIndexedContiguousArray,
207209
'SubsampledArray': cf.data.array.subsampledarray.SubsampledArray,
208-
'TiePointIndex': cf.tiepointindex.TiePointIndex}
210+
'TiePointIndex': cf.tiepointindex.TiePointIndex,
211+
'ZarrArray': cf.data.array.zarrarray.ZarrArray,
212+
}
209213
210214
"""
211215
return _implementation.copy()

cf/data/array/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,4 @@
1212
from .raggedindexedcontiguousarray import RaggedIndexedContiguousArray
1313
from .subsampledarray import SubsampledArray
1414
from .umarray import UMArray
15+
from .zarrarray import ZarrArray

cf/data/array/zarrarray.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
import cfdm
2+
3+
from ...mixin_container import Container
4+
5+
# Uncomment when we can use active storage on Zarr datasets:
6+
# from .mixin import ActiveStorageMixin
7+
8+
9+
class ZarrArray(
10+
# Uncomment when we can use active storage on Zarr datasets:
11+
# ActiveStorageMixin,
12+
Container,
13+
cfdm.ZarrArray,
14+
):
15+
"""A Zarr array accessed with `zarr`."""

cf/data/data.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5532,7 +5532,7 @@ def outerproduct(self, a, inplace=False, i=False):
55325532
d = _inplace_enabled_define_and_cleanup(self)
55335533

55345534
shape = d.shape
5535-
chunksizes0 = d.nc_hdf5_chunksizes()
5535+
chunksizes0 = d.nc_dataset_chunksizes()
55365536

55375537
# Cast 'a' as a Data object so that it definitely has sensible
55385538
# Units. We don't mind if the units of 'a' are incompatible
@@ -5563,19 +5563,19 @@ def outerproduct(self, a, inplace=False, i=False):
55635563
for a_axis in a._cyclic:
55645564
d.cyclic(ndim + a._axes.index(a_axis))
55655565

5566-
# Update the HDF5 chunking strategy
5567-
chunksizes1 = a.nc_hdf5_chunksizes()
5566+
# Update the dataset chunking strategy
5567+
chunksizes1 = a.nc_dataset_chunksizes()
55685568
if chunksizes0 or chunksizes1:
55695569
if isinstance(chunksizes0, tuple):
55705570
if isinstance(chunksizes1, tuple):
55715571
chunksizes = chunksizes0 + chunksizes1
55725572
else:
55735573
chunksizes = chunksizes0 + a.shape
55745574

5575-
d.nc_set_hdf5_chunksizes(chunksizes)
5575+
d.nc_set_dataset_chunksizes(chunksizes)
55765576
elif isinstance(chunksizes1, tuple):
55775577
chunksizes = shape + chunksizes1
5578-
d.nc_set_hdf5_chunksizes(chunksizes)
5578+
d.nc_set_dataset_chunksizes(chunksizes)
55795579

55805580
d._update_deterministic(a)
55815581
return d
@@ -6259,7 +6259,7 @@ def reshape(self, *shape, merge_chunks=True, limit=None, inplace=False):
62596259

62606260
# Clear cyclic axes, as we can't help but lose them in this
62616261
# operation
6262-
d._cyclic = _empty_set
6262+
del d._cyclic
62636263

62646264
return d
62656265

cf/data/mixin/deprecations.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -437,15 +437,17 @@ def dumps(self):
437437
def HDF_chunks(self, *chunks):
438438
"""Get or set HDF chunk sizes.
439439
440+
Deprecated at version 3.14.0 and is no longer available. Use
441+
the methods `nc_clear_dataset_chunksizes`,
442+
`nc_dataset_chunksizes`, and `nc_set_dataset_chunksizes`
443+
instead.
444+
440445
The HDF chunk sizes may be used by external code that allows
441446
`Data` objects to be written to netCDF files.
442447
443-
Deprecated at version 3.14.0 and is no longer available. Use
444-
the methods `nc_clear_hdf5_chunksizes`, `nc_hdf5_chunksizes`,
445-
and `nc_set_hdf5_chunksizes` instead.
446-
447-
.. seealso:: `nc_clear_hdf5_chunksizes`, `nc_hdf5_chunksizes`,
448-
`nc_set_hdf5_chunksizes`
448+
.. seealso:: `nc_clear_dataset_chunksizes`,
449+
`nc_dataset_chunksizes`,
450+
`nc_set_dataset_chunksizes`
449451
450452
:Parameters:
451453
@@ -506,8 +508,8 @@ def HDF_chunks(self, *chunks):
506508
_DEPRECATION_ERROR_METHOD(
507509
self,
508510
"HDF_chunks",
509-
message="Use the methods 'nc_clear_hdf5_chunksizes', "
510-
"'nc_hdf5_chunksizes', and 'nc_set_hdf5_chunksizes' "
511+
message="Use the methods 'nc_clear_dataset_chunksizes', "
512+
"'nc_dataset_chunksizes', and 'nc_set_dataset_chunksizes' "
511513
"instead.",
512514
version="3.14.0",
513515
removed_at="5.0.0",

cf/data/utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -435,8 +435,8 @@ def collapse(
435435
d._axes = [a for i, a in enumerate(d._axes) if i not in axis]
436436

437437
if d.size != original_size:
438-
# Remove the out-dated HDF5 chunking strategy
439-
d.nc_clear_hdf5_chunksizes()
438+
# Remove the out-dated dataset chunking strategy
439+
d.nc_clear_dataset_chunksizes()
440440

441441
return d, weights
442442

cf/field.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13991,19 +13991,20 @@ def field(
1399113991
) # pragma: no cover
1399213992

1399313993
def HDF_chunks(self, *chunksizes):
13994-
"""Deprecated at version 3.0.0.
13994+
"""Get or set HDF chunk sizes.
1399513995

13996-
Use methods 'Data.nc_hdf5_chunksizes',
13997-
'Data.nc_set_hdf5_chunksizes', 'Data.nc_clear_hdf5_chunksizes'
13998-
instead.
13996+
Deprecated at version 3.0.0 and is no longer available. Use
13997+
methods `Data.nc_dataset_chunksizes`,
13998+
`Data.nc_set_dataset_chunksizes`,
13999+
`Data.nc_clear_dataset_chunksizes` instead.
1399914000

1400014001
"""
1400114002
_DEPRECATION_ERROR_METHOD(
1400214003
self,
1400314004
"HDF_chunks",
14004-
"Use methods 'Data.nc_hdf5_chunksizes', "
14005-
"'Data.nc_set_hdf5_chunksizes', "
14006-
"'Data.nc_clear_hdf5_chunksizes' instead.",
14005+
"Use methods 'Data.nc_dataset_chunksizes', "
14006+
"'Data.nc_set_dataset_chunksizes', "
14007+
"'Data.nc_clear_dataset_chunksizes' instead.",
1400714008
version="3.0.0",
1400814009
removed_at="4.0.0",
1400914010
) # pragma: no cover

0 commit comments

Comments
 (0)