Skip to content

Commit 13be9c2

Browse files
authored
Merge branch 'master' into issue4208
2 parents aacef50 + a36d0a1 commit 13be9c2

28 files changed

+509
-175
lines changed

doc/conf.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
# -*- coding: utf-8 -*-
21
#
32
# xarray documentation build configuration file, created by
43
# sphinx-quickstart on Thu Feb 6 18:57:54 2014.
@@ -35,7 +34,7 @@
3534
print("pip environment:")
3635
subprocess.run(["pip", "list"])
3736

38-
print("xarray: %s, %s" % (xarray.__version__, xarray.__file__))
37+
print(f"xarray: {xarray.__version__}, {xarray.__file__}")
3938

4039
with suppress(ImportError):
4140
import matplotlib

doc/whats-new.rst

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ New Features
3535
- ``min_count`` can be supplied to reductions such as ``.sum`` when specifying
3636
multiple dimension to reduce over. (:pull:`4356`)
3737
By `Maximilian Roos <https://github.com/max-sixty>`_.
38+
- :py:func:`xarray.cov` and :py:func:`xarray.corr` now handle missing values. (:pull:`4351`)
39+
By `Maximilian Roos <https://github.com/max-sixty>`_.
3840
- Build ``CFTimeIndex.__repr__`` explicitly as :py:class:`pandas.Index`. Add ``calendar`` as a new
3941
property for :py:class:`CFTimeIndex` and show ``calendar`` and ``length`` in
4042
``CFTimeIndex.__repr__`` (:issue:`2416`, :pull:`4092`)
@@ -73,6 +75,8 @@ Bug fixes
7375
and :py:meth:`DataArray.str.wrap` (:issue:`4334`). By `Mathias Hauser <https://github.com/mathause>`_.
7476
- Fixed overflow issue causing incorrect results in computing means of :py:class:`cftime.datetime`
7577
arrays (:issue:`4341`). By `Spencer Clark <https://github.com/spencerkclark>`_.
78+
- Fix :py:func:`xarray.apply_ufunc` with ``vectorize=True`` and ``exclude_dims`` (:issue:`3890`).
79+
By `Mathias Hauser <https://github.com/mathause>`_.
7680

7781
Documentation
7882
~~~~~~~~~~~~~
@@ -95,7 +99,8 @@ Internal Changes
9599
(:issue:`4294`) By `Guido Imperiale <https://github.com/crusaderky>`_
96100
- Enable type checking for :py:func:`concat` (:issue:`4238`)
97101
By `Mathias Hauser <https://github.com/mathause>`_.
98-
102+
- Updated plot functions for matplotlib version 3.3 and silenced warnings in the
103+
plot tests (:pull:`4365`). By `Mathias Hauser <https://github.com/mathause>`_.
99104

100105
.. _whats-new.0.16.0:
101106

setup.cfg

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -103,15 +103,11 @@ markers =
103103

104104
[flake8]
105105
ignore =
106-
# whitespace before ':' - doesn't work well with black
107-
E203
108-
E402
109-
# line too long - let black worry about that
110-
E501
111-
# do not assign a lambda expression, use a def
112-
E731
113-
# line break before binary operator
114-
W503
106+
E203 # whitespace before ':' - doesn't work well with black
107+
E402 # module level import not at top of file
108+
E501 # line too long - let black worry about that
109+
E731 # do not assign a lambda expression, use a def
110+
W503 # line break before binary operator
115111
exclude=
116112
.eggs
117113
doc

setup.py

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,4 @@
11
#!/usr/bin/env python
22
from setuptools import setup
33

4-
try:
5-
setup(use_scm_version=True)
6-
except LookupError as e:
7-
# .git has been removed, and this is not a package created by sdist
8-
# This is the case e.g. of a remote deployment with PyCharm Professional
9-
if not str(e).startswith("setuptools-scm was unable to detect version"):
10-
raise
11-
setup(version="999")
4+
setup(use_scm_version={"fallback_version": "999"})

xarray/core/alignment.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -103,8 +103,10 @@ def align(
103103
used in preference to the aligned indexes.
104104
exclude : sequence of str, optional
105105
Dimensions that must be excluded from alignment
106-
fill_value : scalar, optional
107-
Value to use for newly missing values
106+
fill_value : scalar or dict-like, optional
107+
Value to use for newly missing values. If a dict-like, maps
108+
variable names to fill values. Use a data array's name to
109+
refer to its values.
108110
109111
Returns
110112
-------
@@ -581,16 +583,21 @@ def reindex_variables(
581583

582584
for name, var in variables.items():
583585
if name not in indexers:
586+
if isinstance(fill_value, dict):
587+
fill_value_ = fill_value.get(name, dtypes.NA)
588+
else:
589+
fill_value_ = fill_value
590+
584591
if sparse:
585-
var = var._as_sparse(fill_value=fill_value)
592+
var = var._as_sparse(fill_value=fill_value_)
586593
key = tuple(
587594
slice(None) if d in unchanged_dims else int_indexers.get(d, slice(None))
588595
for d in var.dims
589596
)
590597
needs_masking = any(d in masked_dims for d in var.dims)
591598

592599
if needs_masking:
593-
new_var = var._getitem_with_mask(key, fill_value=fill_value)
600+
new_var = var._getitem_with_mask(key, fill_value=fill_value_)
594601
elif all(is_full_slice(k) for k in key):
595602
# no reindexing necessary
596603
# here we need to manually deal with copying data, since

xarray/core/combine.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -393,8 +393,10 @@ def combine_nested(
393393
Details are in the documentation of concat
394394
coords : {"minimal", "different", "all" or list of str}, optional
395395
Details are in the documentation of concat
396-
fill_value : scalar, optional
397-
Value to use for newly missing values
396+
fill_value : scalar or dict-like, optional
397+
Value to use for newly missing values. If a dict-like, maps
398+
variable names to fill values. Use a data array's name to
399+
refer to its values.
398400
join : {"outer", "inner", "left", "right", "exact"}, optional
399401
String indicating how to combine differing indexes
400402
(excluding concat_dim) in objects
@@ -569,10 +571,12 @@ def combine_by_coords(
569571
addition to the "minimal" data variables.
570572
571573
If objects are DataArrays, `data_vars` must be "all".
572-
coords : {"minimal", "different", "all" or list of str}, optional
574+
coords : {"minimal", "different", "all"} or list of str, optional
573575
As per the "data_vars" kwarg, but for coordinate variables.
574-
fill_value : scalar, optional
575-
Value to use for newly missing values. If None, raises a ValueError if
576+
fill_value : scalar or dict-like, optional
577+
Value to use for newly missing values. If a dict-like, maps
578+
variable names to fill values. Use a data array's name to
579+
refer to its values. If None, raises a ValueError if
576580
the passed Datasets do not create a complete hypercube.
577581
join : {"outer", "inner", "left", "right", "exact"}, optional
578582
String indicating how to combine differing indexes

xarray/core/common.py

Lines changed: 51 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1364,10 +1364,13 @@ def full_like(other, fill_value, dtype: DTypeLike = None):
13641364
----------
13651365
other : DataArray, Dataset or Variable
13661366
The reference object in input
1367-
fill_value : scalar
1368-
Value to fill the new object with before returning it.
1369-
dtype : dtype, optional
1370-
dtype of the new array. If omitted, it defaults to other.dtype.
1367+
fill_value : scalar or dict-like
1368+
Value to fill the new object with before returning it. If
1369+
other is a Dataset, may also be a dict-like mapping data
1370+
variables to fill values.
1371+
dtype : dtype or dict-like of dtype, optional
1372+
dtype of the new array. If a dict-like, maps dtypes to
1373+
variables. If omitted, it defaults to other.dtype.
13711374
13721375
Returns
13731376
-------
@@ -1427,6 +1430,34 @@ def full_like(other, fill_value, dtype: DTypeLike = None):
14271430
* lat (lat) int64 1 2
14281431
* lon (lon) int64 0 1 2
14291432
1433+
>>> ds = xr.Dataset(
1434+
... {"a": ("x", [3, 5, 2]), "b": ("x", [9, 1, 0])}, coords={"x": [2, 4, 6]}
1435+
... )
1436+
>>> ds
1437+
<xarray.Dataset>
1438+
Dimensions: (x: 3)
1439+
Coordinates:
1440+
* x (x) int64 2 4 6
1441+
Data variables:
1442+
a (x) int64 3 5 2
1443+
b (x) int64 9 1 0
1444+
>>> xr.full_like(ds, fill_value={"a": 1, "b": 2})
1445+
<xarray.Dataset>
1446+
Dimensions: (x: 3)
1447+
Coordinates:
1448+
* x (x) int64 2 4 6
1449+
Data variables:
1450+
a (x) int64 1 1 1
1451+
b (x) int64 2 2 2
1452+
>>> xr.full_like(ds, fill_value={"a": 1, "b": 2}, dtype={"a": bool, "b": float})
1453+
<xarray.Dataset>
1454+
Dimensions: (x: 3)
1455+
Coordinates:
1456+
* x (x) int64 2 4 6
1457+
Data variables:
1458+
a (x) bool True True True
1459+
b (x) float64 2.0 2.0 2.0
1460+
14301461
See also
14311462
--------
14321463
@@ -1438,12 +1469,22 @@ def full_like(other, fill_value, dtype: DTypeLike = None):
14381469
from .dataset import Dataset
14391470
from .variable import Variable
14401471

1441-
if not is_scalar(fill_value):
1442-
raise ValueError(f"fill_value must be scalar. Received {fill_value} instead.")
1472+
if not is_scalar(fill_value) and not (
1473+
isinstance(other, Dataset) and isinstance(fill_value, dict)
1474+
):
1475+
raise ValueError(
1476+
f"fill_value must be scalar or, for datasets, a dict-like. Received {fill_value} instead."
1477+
)
14431478

14441479
if isinstance(other, Dataset):
1480+
if not isinstance(fill_value, dict):
1481+
fill_value = {k: fill_value for k in other.data_vars.keys()}
1482+
1483+
if not isinstance(dtype, dict):
1484+
dtype = {k: dtype for k in other.data_vars.keys()}
1485+
14451486
data_vars = {
1446-
k: _full_like_variable(v, fill_value, dtype)
1487+
k: _full_like_variable(v, fill_value.get(k, dtypes.NA), dtype.get(k, None))
14471488
for k, v in other.data_vars.items()
14481489
}
14491490
return Dataset(data_vars, coords=other.coords, attrs=other.attrs)
@@ -1466,6 +1507,9 @@ def _full_like_variable(other, fill_value, dtype: DTypeLike = None):
14661507
"""
14671508
from .variable import Variable
14681509

1510+
if fill_value is dtypes.NA:
1511+
fill_value = dtypes.get_fill_value(dtype if dtype is not None else other.dtype)
1512+
14691513
if is_duck_dask_array(other.data):
14701514
import dask.array
14711515

xarray/core/computation.py

Lines changed: 36 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -120,19 +120,23 @@ def __ne__(self, other):
120120

121121
def __repr__(self):
122122
return "{}({!r}, {!r})".format(
123-
type(self).__name__, list(self.input_core_dims), list(self.output_core_dims)
123+
type(self).__name__,
124+
list(self.input_core_dims),
125+
list(self.output_core_dims),
124126
)
125127

126128
def __str__(self):
127129
lhs = ",".join("({})".format(",".join(dims)) for dims in self.input_core_dims)
128130
rhs = ",".join("({})".format(",".join(dims)) for dims in self.output_core_dims)
129131
return f"{lhs}->{rhs}"
130132

131-
def to_gufunc_string(self):
133+
def to_gufunc_string(self, exclude_dims=frozenset()):
132134
"""Create an equivalent signature string for a NumPy gufunc.
133135
134136
Unlike __str__, handles dimensions that don't map to Python
135137
identifiers.
138+
139+
Also creates unique names for input_core_dims contained in exclude_dims.
136140
"""
137141
input_core_dims = [
138142
[self.dims_map[dim] for dim in core_dims]
@@ -142,6 +146,25 @@ def to_gufunc_string(self):
142146
[self.dims_map[dim] for dim in core_dims]
143147
for core_dims in self.output_core_dims
144148
]
149+
150+
# enumerate input_core_dims contained in exclude_dims to make them unique
151+
if exclude_dims:
152+
153+
exclude_dims = [self.dims_map[dim] for dim in exclude_dims]
154+
155+
counter = Counter()
156+
157+
def _enumerate(dim):
158+
if dim in exclude_dims:
159+
n = counter[dim]
160+
counter.update([dim])
161+
dim = f"{dim}_{n}"
162+
return dim
163+
164+
input_core_dims = [
165+
[_enumerate(dim) for dim in arg] for arg in input_core_dims
166+
]
167+
145168
alt_signature = type(self)(input_core_dims, output_core_dims)
146169
return str(alt_signature)
147170

@@ -545,10 +568,12 @@ def broadcast_compat_data(
545568
return data
546569

547570

548-
def _vectorize(func, signature, output_dtypes):
571+
def _vectorize(func, signature, output_dtypes, exclude_dims):
549572
if signature.all_core_dims:
550573
func = np.vectorize(
551-
func, otypes=output_dtypes, signature=signature.to_gufunc_string()
574+
func,
575+
otypes=output_dtypes,
576+
signature=signature.to_gufunc_string(exclude_dims),
552577
)
553578
else:
554579
func = np.vectorize(func, otypes=output_dtypes)
@@ -623,7 +648,7 @@ def func(*arrays):
623648

624649
res = da.apply_gufunc(
625650
numpy_func,
626-
signature.to_gufunc_string(),
651+
signature.to_gufunc_string(exclude_dims),
627652
*arrays,
628653
vectorize=vectorize,
629654
output_dtypes=output_dtypes,
@@ -649,7 +674,9 @@ def func(*arrays):
649674
)
650675
else:
651676
if vectorize:
652-
func = _vectorize(func, signature, output_dtypes=output_dtypes)
677+
func = _vectorize(
678+
func, signature, output_dtypes=output_dtypes, exclude_dims=exclude_dims
679+
)
653680

654681
result_data = func(*input_data)
655682

@@ -1250,7 +1277,9 @@ def _cov_corr(da_a, da_b, dim=None, ddof=0, method=None):
12501277
# N.B. `skipna=False` is required or there is a bug when computing
12511278
# auto-covariance. E.g. Try xr.cov(da,da) for
12521279
# da = xr.DataArray([[1, 2], [1, np.nan]], dims=["x", "time"])
1253-
cov = (demeaned_da_a * demeaned_da_b).sum(dim=dim, skipna=False) / (valid_count)
1280+
cov = (demeaned_da_a * demeaned_da_b).sum(dim=dim, skipna=True, min_count=1) / (
1281+
valid_count
1282+
)
12541283

12551284
if method == "cov":
12561285
return cov

xarray/core/concat.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -125,8 +125,10 @@ def concat(
125125
List of integer arrays which specifies the integer positions to which
126126
to assign each dataset along the concatenated dimension. If not
127127
supplied, objects are concatenated in the provided order.
128-
fill_value : scalar, optional
129-
Value to use for newly missing values
128+
fill_value : scalar or dict-like, optional
129+
Value to use for newly missing values. If a dict-like, maps
130+
variable names to fill values. Use a data array's name to
131+
refer to its values.
130132
join : {"outer", "inner", "left", "right", "exact"}, optional
131133
String indicating how to combine differing indexes
132134
(excluding dim) in objects

xarray/core/dask_array_compat.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ def _validate_pad_output_shape(input_shape, pad_width, output_shape):
114114
elif (
115115
len(pad_width) == len(input_shape)
116116
and all(map(lambda x: len(x) == 2, pad_width))
117-
and all((isint(i) for p in pad_width for i in p))
117+
and all(isint(i) for p in pad_width for i in p)
118118
):
119119
pad_width = np.sum(pad_width, axis=1)
120120
else:

0 commit comments

Comments
 (0)