Skip to content

Commit 472b50e

Browse files
committed
Add coarsen.construct
1 parent f47af05 commit 472b50e

File tree

5 files changed

+131
-7
lines changed

5 files changed

+131
-7
lines changed

doc/howdoi.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ How do I ...
2424
* - change the order of dimensions
2525
- :py:meth:`DataArray.transpose`, :py:meth:`Dataset.transpose`
2626
* - reshape dimensions
27-
- :py:meth:`DataArray.stack`, :py:meth:`Dataset.stack`
27+
- :py:meth:`DataArray.stack`, :py:meth:`Dataset.stack`, :py:meth:`Dataset.coarsen.construct`, :py:meth:`DataArray.coarsen.construct`
2828
* - remove a variable from my object
2929
- :py:meth:`Dataset.drop_vars`, :py:meth:`DataArray.drop_vars`
3030
* - remove dimensions of length 1 or 0

doc/whats-new.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ v0.18.3 (unreleased)
2121

2222
New Features
2323
~~~~~~~~~~~~
24+
- Added :py:meth:`Dataset.coarsen.construct`, :py:meth:`DataArray.coarsen.construct` (:issue:`5454`, :pull:`5475`).
25+
By `Deepak Cherian <https://github.com/dcherian>`_.
2426
- Allow assigning values to a subset of a dataset using positional or label-based
2527
indexing (:issue:`3015`, :pull:`5362`).
2628
By `Matthias Göbel <https://github.com/matzegoebel>`_.

xarray/core/rolling.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import functools
2+
import itertools
23
import warnings
34
from typing import Any, Callable, Dict
45

@@ -845,6 +846,80 @@ def __repr__(self):
845846
klass=self.__class__.__name__, attrs=",".join(attrs)
846847
)
847848

849+
def construct(
850+
self,
851+
window_dim=None,
852+
keep_attrs=None,
853+
**window_dim_kwargs,
854+
):
855+
"""
856+
Convert this Coarsen object to a Dataset,
857+
where the window dimension is reshaped to new dimensions
858+
859+
Parameters
860+
----------
861+
window_dim: str or a mapping, optional
862+
A mapping from dimension name to the new window dimension names.
863+
Just a string can be used for 1d-rolling.
864+
fill_value: optional. Default dtypes.NA
865+
Filling value to match the dimension size.
866+
**window_dim_kwargs : {dim: new_name, ...}, optional
867+
The keyword arguments form of ``window_dim``.
868+
869+
Returns
870+
-------
871+
Dataset with variables converted from rolling object.
872+
"""
873+
874+
from .dataarray import DataArray
875+
from .dataset import Dataset
876+
877+
if window_dim is None:
878+
if len(window_dim_kwargs) == 0:
879+
raise ValueError(
880+
"Either window_dim or window_dim_kwargs need to be specified."
881+
)
882+
window_dim = {d: window_dim_kwargs[d] for d in self.dim}
883+
884+
if keep_attrs is None:
885+
keep_attrs = _get_keep_attrs(default=True)
886+
887+
missing_dims = set(window_dim) - set(self.windows)
888+
if missing_dims:
889+
raise ValueError(
890+
f"'window_dim' must contain entries for all dimensions to coarsen. Missing {missing_dims}"
891+
)
892+
missing_windows = set(self.windows) - set(window_dim)
893+
if missing_windows:
894+
raise ValueError(
895+
f"'window_dim' includes dimensions that will not be coarsened: {missing_windows}"
896+
)
897+
898+
reshaped = Dataset()
899+
if isinstance(self.obj, DataArray):
900+
obj = self.obj._to_temp_dataset()
901+
else:
902+
obj = self.obj
903+
904+
for key, var in obj.variables.items():
905+
reshaped_dims = tuple(
906+
itertools.chain(*[window_dim.get(dim, [dim]) for dim in list(var.dims)])
907+
)
908+
if reshaped_dims != var.dims:
909+
windows = {w: self.windows[w] for w in window_dim if w in var.dims}
910+
reshaped_var, _ = var.coarsen_reshape(windows, self.boundary, self.side)
911+
attrs = var.attrs if keep_attrs else {}
912+
reshaped[key] = (reshaped_dims, reshaped_var, attrs)
913+
else:
914+
reshaped[key] = var
915+
916+
should_be_coords = set(window_dim) & set(self.obj.coords)
917+
result = reshaped.set_coords(should_be_coords)
918+
if isinstance(self.obj, DataArray):
919+
return self.obj._from_temp_dataset(result)
920+
else:
921+
return result
922+
848923

849924
class DataArrayCoarsen(Coarsen):
850925
__slots__ = ()

xarray/core/variable.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2158,7 +2158,7 @@ def coarsen(
21582158
if not windows:
21592159
return self._replace(attrs=_attrs)
21602160

2161-
reshaped, axes = self._coarsen_reshape(windows, boundary, side)
2161+
reshaped, axes = self.coarsen_reshape(windows, boundary, side)
21622162
if isinstance(func, str):
21632163
name = func
21642164
func = getattr(duck_array_ops, name, None)
@@ -2167,7 +2167,7 @@ def coarsen(
21672167

21682168
return self._replace(data=func(reshaped, axis=axes, **kwargs), attrs=_attrs)
21692169

2170-
def _coarsen_reshape(self, windows, boundary, side):
2170+
def coarsen_reshape(self, windows, boundary, side):
21712171
"""
21722172
Construct a reshaped-array for coarsen
21732173
"""
@@ -2183,7 +2183,9 @@ def _coarsen_reshape(self, windows, boundary, side):
21832183

21842184
for d, window in windows.items():
21852185
if window <= 0:
2186-
raise ValueError(f"window must be > 0. Given {window}")
2186+
raise ValueError(
2187+
f"window must be > 0. Given {window} for dimension {d}"
2188+
)
21872189

21882190
variable = self
21892191
for d, window in windows.items():
@@ -2193,8 +2195,8 @@ def _coarsen_reshape(self, windows, boundary, side):
21932195
if boundary[d] == "exact":
21942196
if n * window != size:
21952197
raise ValueError(
2196-
"Could not coarsen a dimension of size {} with "
2197-
"window {}".format(size, window)
2198+
f"Could not coarsen a dimension of size {size} with "
2199+
f"window {window} and boundary='exact'. Try a different 'boundary' option."
21982200
)
21992201
elif boundary[d] == "trim":
22002202
if side[d] == "left":

xarray/tests/test_coarsen.py

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,13 @@
55
import xarray as xr
66
from xarray import DataArray, Dataset, set_options
77

8-
from . import assert_allclose, assert_equal, has_dask, requires_cftime
8+
from . import (
9+
assert_allclose,
10+
assert_equal,
11+
has_dask,
12+
raise_if_dask_computes,
13+
requires_cftime,
14+
)
915
from .test_dataarray import da
1016
from .test_dataset import ds
1117

@@ -299,3 +305,42 @@ def test_coarsen_da_reduce(da, window, name):
299305
actual = coarsen_obj.reduce(getattr(np, f"nan{name}"))
300306
expected = getattr(coarsen_obj, name)()
301307
assert_allclose(actual, expected)
308+
309+
310+
@pytest.mark.parametrize("dask", [True, False])
311+
def test_coarsen_construct(dask):
312+
313+
ds = Dataset(
314+
{
315+
"vart": ("time", np.arange(48)),
316+
"varx": ("x", np.arange(10)),
317+
"vartx": (("x", "time"), np.arange(480).reshape(10, 48)),
318+
"vary": ("y", np.arange(12)),
319+
},
320+
coords={"time": np.arange(48), "y": np.arange(12)},
321+
)
322+
323+
if dask and has_dask:
324+
ds = ds.chunk({"x": 4, "time": 10})
325+
with raise_if_dask_computes():
326+
actual = ds.coarsen(time=12, x=5).construct(
327+
{"time": ("year", "month"), "x": ("x", "x_reshaped")}
328+
)
329+
330+
expected = xr.Dataset()
331+
expected["vart"] = (("year", "month"), ds.vart.data.reshape((-1, 12)))
332+
expected["varx"] = (("x", "x_reshaped"), ds.varx.data.reshape((-1, 5)))
333+
expected["vartx"] = (
334+
("x", "x_reshaped", "year", "month"),
335+
ds.vartx.data.reshape(2, 5, 4, 12),
336+
)
337+
expected["vary"] = ds.vary
338+
expected.coords["time"] = (("year", "month"), ds.time.data.reshape((-1, 12)))
339+
340+
assert_equal(actual, expected)
341+
342+
with raise_if_dask_computes():
343+
actual = ds.vartx.coarsen(time=12, x=5).construct(
344+
{"time": ("year", "month"), "x": ("x", "x_reshaped")}
345+
)
346+
assert_equal(actual, expected["vartx"])

0 commit comments

Comments
 (0)