Skip to content

Commit a044c23

Browse files
authored
fix is_time to avoid memory overload (#397)
* fix is_time to avoid memory overload * using latest xarray * xfail regrid tests ... not working with latest xarray * added "test-x" target: run tests in parallel * unblock connection * update HISTORY.rst * disable PDF generation * Bump version: 0.15.0 → 0.16.0 * removed xfail_if_xarray_incompatible from conftest.py * mark flaky test
1 parent 7e0ee57 commit a044c23

File tree

14 files changed

+115
-66
lines changed

14 files changed

+115
-66
lines changed

.github/workflows/main.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ jobs:
122122
disable-sudo: true
123123
egress-policy: block
124124
allowed-endpoints: >
125+
cdn.fwupd.org:443
125126
conda.anaconda.org:443
126127
coveralls.io:443
127128
files.pythonhosted.org:443

.readthedocs.yml

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,7 @@ sphinx:
1010
configuration: docs/conf.py
1111
fail_on_warning: true
1212

13-
# Optionally build your docs in additional formats such as PDF and ePub
14-
formats:
15-
- pdf
16-
13+
# Build configuration
1714
build:
1815
os: ubuntu-24.04
1916
tools:
@@ -24,6 +21,7 @@ build:
2421
pre_build:
2522
- env SPHINX_APIDOC_OPTIONS="members,undoc-members,show-inheritance,no-index" sphinx-apidoc -o docs/apidoc/ --private --module-first clisops
2623

24+
# Conda configuration
2725
conda:
2826
environment: environment.yml
2927

HISTORY.rst

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,19 @@
11
Version History
22
===============
33

4-
v0.16.0 (unreleased)
4+
v0.16.0 (2025-03-18)
55
--------------------
66

77
New Features
88
^^^^^^^^^^^^
99
* `clisops` now officially supports Python 3.13 (#398).
1010
* Version limitations have been lifted on `xarray` and `numpy` to support newer versions (#398).
11+
* Mark regrid tests as `xfail` which do not pass due to latest xarray version (#397).
12+
13+
Bug Fixes
14+
^^^^^^^^^
15+
* `clisops.utils.dataset_utils`
16+
* Fixed `is_time` to avoid memory overload (#397).
1117

1218
Internal Changes
1319
^^^^^^^^^^^^^^^^

Makefile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,9 @@ lint: ## check style with flake8
6464
test: ## run tests quickly with the default Python
6565
python -m pytest -m "not slow"
6666

67+
test-x: ## run tests quickly with the default Python and in parallel
68+
python -m pytest -m "not slow" -n auto
69+
6770
test-all: ## run tests on every Python version with tox
6871
tox
6972

clisops/__version__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,4 @@
66
__contact__ = "[email protected]"
77
__copyright__ = "Copyright 2018-2024. United Kingdom Research and Innovation"
88
__license__ = "BSD-3-Clause"
9-
__version__ = "0.15.0"
9+
__version__ = "0.16.0"

clisops/utils/dataset_utils.py

Lines changed: 57 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
import cf_xarray as cfxr # noqa
77
import cftime
8+
import dask.array as da
89
import fsspec
910
import numpy as np
1011
import xarray as xr
@@ -55,12 +56,22 @@ def get_coord_by_type(
5556
if coord_type not in known_coord_types:
5657
raise ValueError(f"Coordinate type not known: {coord_type}")
5758

59+
# Get main variable ... if possible
60+
try:
61+
main_var = get_main_variable(ds)
62+
except ValueError:
63+
warnings.warn(f"No main variable found for dataset '{ds}'.")
64+
main_var = None
65+
5866
# Loop through all (potential) coordinates to find all possible matches
5967
if isinstance(ds, xr.DataArray):
6068
coord_vars = list(ds.coords)
6169
elif isinstance(ds, xr.Dataset):
6270
# Not all coordinate variables are always classified as such
6371
coord_vars = list(ds.coords) + list(ds.data_vars)
72+
# make sure we skip the main variable!
73+
if main_var is not None:
74+
coord_vars.remove(main_var)
6475
else:
6576
raise TypeError("Not an xarray.Dataset or xarray.DataArray.")
6677
for coord_id in coord_vars:
@@ -90,16 +101,18 @@ def get_coord_by_type(
90101
# Sort in terms of number of dimensions
91102
coords = sorted(coords, key=lambda x: len(ds[x].dims), reverse=True)
92103

93-
# Get dimensions and singleton coords of main variable
94-
main_var_dims = list(ds[get_main_variable(ds)].dims)
95-
96-
# Select coordinate with most dims (matching with main variable dims)
97-
for coord_id in coords:
98-
if all([dim in main_var_dims for dim in ds.coords[coord_id].dims]):
99-
if return_further_matches:
100-
return coord_id, [x for x in coords if x != coord_id]
101-
else:
102-
return coord_id
104+
if main_var is not None:
105+
# Get dimensions and singleton coords of main variable
106+
main_var_dims = list(ds[main_var].dims)
107+
108+
# Select coordinate with most dims (matching with main variable dims)
109+
for coord_id in coords:
110+
if coord_id in ds.coords:
111+
if all([dim in main_var_dims for dim in ds.coords[coord_id].dims]):
112+
if return_further_matches:
113+
return coord_id, [x for x in coords if x != coord_id]
114+
else:
115+
return coord_id
103116
# If the decision making fails, pass the first match
104117
if return_further_matches:
105118
return coords[0], coords[1:]
@@ -207,13 +220,39 @@ def is_level(coord):
207220
return False
208221

209222

223+
def _is_time(coord):
224+
"""
225+
Check if a coordinate uses cftime datetime objects.
226+
227+
Handles Dask-backed arrays for lazy evaluation.
228+
"""
229+
if coord.size == 0:
230+
return False # Empty array
231+
232+
if isinstance(coord.dtype.type(), cftime.datetime):
233+
return True
234+
235+
# Safely get first element without loading entire array
236+
first_value = coord.isel({dim: 0 for dim in coord.dims}).values
237+
238+
# Compute only if it's a Dask array
239+
if isinstance(first_value, da.Array):
240+
first_value = first_value.compute()
241+
242+
return isinstance(first_value.item(0), cftime.datetime)
243+
244+
210245
def is_time(coord):
211246
"""
212247
Determines if a coordinate is time.
213248
214249
:param coord: coordinate of xarray dataset e.g. coord = ds.coords[coord_id]
215250
:return: (bool) True if the coordinate is time.
216251
"""
252+
if False and coord.ndim >= 2:
253+
# skip variables with more than two dimensions: lat_bnds, lon_bnds, time_bnds, t, ...
254+
return False
255+
217256
if "time" in coord.cf.coordinates and coord.name in coord.cf.coordinates["time"]:
218257
return True
219258

@@ -226,14 +265,11 @@ def is_time(coord):
226265
if np.issubdtype(coord.dtype, np.datetime64):
227266
return True
228267

229-
if isinstance(np.atleast_1d(coord.values)[0], cftime.datetime):
230-
return True
231-
232268
if hasattr(coord, "axis"):
233269
if coord.axis == "T":
234270
return True
235271

236-
return False
272+
return _is_time(coord)
237273

238274

239275
def is_realization(coord):
@@ -287,8 +323,12 @@ def get_main_variable(ds, exclude_common_coords=True):
287323
Default is True.
288324
:return: (str) The main variable of the dataset e.g. 'tas'
289325
"""
290-
291-
data_dims = [data.dims for var_id, data in ds.variables.items()]
326+
if isinstance(ds, xr.Dataset):
327+
variables = list(ds.variables.items())
328+
data_dims = [data.dims for var_id, data in variables]
329+
else:
330+
variables = []
331+
data_dims = []
292332
flat_dims = [dim for sublist in data_dims for dim in sublist]
293333

294334
results = {}
@@ -303,7 +343,7 @@ def get_main_variable(ds, exclude_common_coords=True):
303343
"realization",
304344
]
305345

306-
for var_id, data in ds.variables.items():
346+
for var_id, data in variables:
307347
if var_id in flat_dims:
308348
continue
309349
if exclude_common_coords is True and any(

docs/conf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@
8989
# the built documents.
9090
#
9191
# The short X.Y version.
92-
version = "0.15.0"
92+
version = "0.16.0"
9393
# The full version, including alpha/beta/rc tags.
9494
release = version
9595

environment.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,11 @@ dependencies:
2828
- roocs-grids >=0.1.2
2929
- scipy >=1.10.0
3030
- shapely >=2.0
31-
- xarray >=2022.6.0 # Some issues with CFTime calendars. See: https://github.com/pydata/xarray/issues/7794
31+
- xarray >=2025.1.1 # use decode_times=CFDatetimeCoder(use_cftime=True)
3232
- zarr >=2.13.0
33-
# Compression
33+
# Compression
3434
- zstandard
35-
# Extras
35+
# Extras
3636
- netcdf4 >=1.4
3737
- xesmf >=0.8.2
3838
# # Dev tools and testing

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ dependencies = [
6767
"roocs_grids >=0.1.2",
6868
"scipy>=1.9.0",
6969
"shapely >=1.9",
70-
"xarray >=2022.9.0", # Some issues with CFTime calendars. See: https://github.com/pydata/xarray/issues/7794
70+
"xarray >=2025.1.1", # Use decode_times=CFDatetimeCoder(use_cftime=True)
7171
"zarr >=2.13.0"
7272
]
7373

@@ -130,7 +130,7 @@ target-version = [
130130
]
131131

132132
[tool.bumpversion]
133-
current_version = "0.15.0"
133+
current_version = "0.16.0"
134134
commit = true
135135
tag = false
136136
allow_dirty = false

tests/conftest.py

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -409,14 +409,3 @@ def clisops_test_data():
409409
"small_geojson": test_data.joinpath("small_geojson.json").as_posix(),
410410
"multi_regions_geojson": test_data.joinpath("multi_regions.json").as_posix(),
411411
}
412-
413-
414-
# Temporarily required until https://github.com/pydata/xarray/issues/7794 is addressed
415-
@pytest.fixture(scope="session")
416-
def xfail_if_xarray_incompatible():
417-
if Version(xr.__version__) >= Version(XARRAY_INCOMPATIBLE_VERSION):
418-
pytest.xfail(
419-
f"xarray version >= {XARRAY_INCOMPATIBLE_VERSION} "
420-
f"is not supported for several operations with cf-time indexed arrays. "
421-
"For more information, see: https://github.com/pydata/xarray/issues/7794."
422-
)

0 commit comments

Comments
 (0)