Skip to content

Commit fd5268f

Browse files
committed
Merge pull request #137 from jhamman/dataset_reductions
Dataset.reduce methods
2 parents 7732816 + b5d82a0 commit fd5268f

File tree

4 files changed

+159
-0
lines changed

4 files changed

+159
-0
lines changed

doc/api.rst

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,23 @@ Selecting
7070
Dataset.squeeze
7171
Dataset.groupby
7272

73+
Computations
74+
~~~~~~~~~~~~
75+
76+
.. autosummary::
77+
:toctree: generated/
78+
79+
Dataset.all
80+
Dataset.any
81+
Dataset.argmax
82+
Dataset.argmin
83+
Dataset.max
84+
Dataset.min
85+
Dataset.mean
86+
Dataset.std
87+
Dataset.sum
88+
Dataset.var
89+
7390
IO / Conversion
7491
~~~~~~~~~~~~~~~
7592

doc/tutorial.rst

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,23 @@ contents of the ``Dataset`` will still be the same underlying
159159
:py:class:`xray.Variable`. You can copy all data by supplying the argument
160160
``deep=True``.
161161

162+
Datasets reductions
163+
~~~~~~~~~~~~~~~~~~
164+
We can numpy reduction functions to the entire dataset, returning a new
165+
``Dataset``.
166+
167+
.. ipython:: python
168+
169+
bar = ds.mean()
170+
bar
171+
172+
The ``dimension``(default=None) keyword will limit the reduction to only the dimension(s) provided.
173+
174+
.. ipython:: python
175+
176+
spam = ds.mean(dimension='time')
177+
spam
178+
162179
``DataArray`` objects
163180
---------------------
164181

test/test_dataset.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -656,3 +656,45 @@ def test_lazy_load(self):
656656
# these should not raise UnexpectedDataAccess:
657657
ds.indexed(time=10)
658658
ds.indexed(time=slice(10), dim1=[0]).indexed(dim1=0, dim2=-1)
659+
660+
def test_reduce(self):
661+
data = create_test_data()
662+
663+
self.assertEqual(len(data.mean().coordinates), 0)
664+
665+
expected = data.max()
666+
for var in data.noncoordinates:
667+
expected = data[var].max()
668+
actual = expected[var]
669+
self.assertDataArrayEqual(expected, actual)
670+
671+
self.assertDatasetEqual(data.min(dimension=['dim1']),
672+
data.min(dimension='dim1'))
673+
674+
for reduct, expected in [('dim2', ['dim1', 'dim3', 'time']),
675+
(['dim2', 'time'], ['dim1', 'dim3']),
676+
(('dim2', 'time'), ['dim1', 'dim3']),
677+
((), ['dim1', 'dim2', 'dim3', 'time'])]:
678+
actual = data.min(dimension=reduct).dimensions
679+
print(reduct, actual, expected)
680+
self.assertItemsEqual(actual, expected)
681+
682+
self.assertDatasetEqual(data.mean(dimension=[]), data)
683+
684+
def test_reduce_bad_dimension(self):
685+
data = create_test_data()
686+
with self.assertRaisesRegexp(ValueError, 'Dataset does not contain'):
687+
ds = data.mean(dimension='bad_dim')
688+
689+
def test_reduce_non_numeric(self):
690+
data1 = create_test_data(seed=44)
691+
data2 = create_test_data(seed=44)
692+
add_vars = {'var4': ['dim1', 'dim2']}
693+
for v, dims in sorted(add_vars.items()):
694+
data = np.random.random_integers(0, 100, size=tuple(_dims[d] for d in dims)).astype(np.str_)
695+
data1[v] = (dims, data, {'foo': 'variable'})
696+
697+
self.assertTrue('var4' not in data1.mean())
698+
self.assertDatasetEqual(data1.mean(), data2.mean())
699+
self.assertDatasetEqual(data1.mean(dimension='dim1'),
700+
data2.mean(dimension='dim1'))

xray/dataset.py

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from . import variable
1616
from . import utils
1717
from . import data_array
18+
from . import ops
1819
from .utils import (FrozenOrderedDict, Frozen, SortedKeysDict, ChainMap,
1920
multi_index_from_product)
2021
from .pycompat import iteritems, basestring
@@ -973,6 +974,86 @@ def squeeze(self, dimension=None):
973974
"""
974975
return utils.squeeze(self, self.dimensions, dimension)
975976

977+
_reduce_method_docstring = \
978+
"""Reduce this {cls}'s data' by applying `{name}` along some
979+
dimension(s).
980+
981+
Parameters
982+
----------
983+
dimension : str or sequence of str, optional
984+
Dimension(s) over which to apply `func`. By default `func` is
985+
applied over all dimensions.
986+
**kwargs : dict
987+
Additional keyword arguments passed on to `{name}`.
988+
989+
Returns
990+
-------
991+
reduced : {cls}
992+
New {cls} object with `{name}` applied to its data and the
993+
indicated dimension(s) removed.
994+
"""
995+
996+
@classmethod
997+
def _reduce_method(cls, f, name=None, module=None):
998+
def func(self, dimension=None, **kwargs):
999+
return self.reduce(f, dimension, **kwargs)
1000+
if name is None:
1001+
name = f.__name__
1002+
func.__name__ = name
1003+
func.__doc__ = cls._reduce_method_docstring.format(
1004+
name=('' if module is None else module + '.') + name,
1005+
cls=cls.__name__)
1006+
return func
1007+
1008+
def reduce(self, func, dimension=None, **kwargs):
1009+
"""Reduce this dataset by applying `func` along some dimension(s).
1010+
1011+
Parameters
1012+
----------
1013+
func : function
1014+
Function which can be called in the form
1015+
`f(x, axis=axis, **kwargs)` to return the result of reducing an
1016+
np.ndarray over an integer valued axis.
1017+
dimension : str or sequence of str, optional
1018+
Dimension(s) over which to apply `func`. By default `func` is
1019+
applied over all dimensions.
1020+
**kwargs : dict
1021+
Additional keyword arguments passed on to `func`.
1022+
1023+
Returns
1024+
-------
1025+
reduced : Dataset
1026+
Dataset with this object's DataArrays replaced with new DataArrays
1027+
of summarized data and the indicated dimension(s) removed.
1028+
"""
1029+
1030+
if isinstance(dimension, basestring):
1031+
dims = set([dimension])
1032+
elif dimension is None:
1033+
dims = set(self.coordinates)
1034+
else:
1035+
dims = set(dimension)
1036+
1037+
bad_dims = [dim for dim in dims if dim not in self.coordinates]
1038+
if bad_dims:
1039+
raise ValueError('Dataset does not contain the dimensions: '
1040+
'{0}'.format(bad_dims))
1041+
1042+
variables = OrderedDict()
1043+
for name, var in iteritems(self.variables):
1044+
reduce_dims = [dim for dim in var.dimensions if dim in dims]
1045+
if reduce_dims:
1046+
if name not in self.dimensions:
1047+
try:
1048+
variables[name] = var.reduce(func,
1049+
dimension=reduce_dims,
1050+
**kwargs)
1051+
except TypeError:
1052+
pass
1053+
else:
1054+
variables[name] = var
1055+
return Dataset(variables=variables)
1056+
9761057
@classmethod
9771058
def concat(cls, datasets, dimension='concat_dimension', indexers=None,
9781059
mode='different', concat_over=None, compat='equals'):
@@ -1166,3 +1247,5 @@ def from_dataframe(cls, dataframe):
11661247
data = series.values.reshape(shape)
11671248
obj[name] = (dimensions, data)
11681249
return obj
1250+
1251+
ops.inject_reduce_methods(Dataset)

0 commit comments

Comments
 (0)