-
-
Notifications
You must be signed in to change notification settings - Fork 1.2k
combine_by_coordinates to handle unnamed data arrays. #4696
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
b35de8e
f966e76
68b7b49
540961f
1c9b4c2
cb5ed5e
77020c0
6af896b
f06371a
7cdeabb
6190839
3055000
cbc002f
11a868b
89ac962
5f3afa5
feb90ce
db5b906
e884f52
0044bb9
44548ee
5fe8323
55f53b9
805145c
3eed47a
05faa88
6c75525
2c43030
f6fae25
81ec1ff
caaee74
637d4cc
b5940a1
d02da23
04cd5f8
e58a9e2
c0fc4f1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -1,4 +1,5 @@ | ||||||
import itertools | ||||||
import warnings | ||||||
from collections import Counter | ||||||
|
||||||
import pandas as pd | ||||||
|
@@ -10,6 +11,7 @@ | |||||
from .merge import merge | ||||||
from .utils import iterate_nested | ||||||
|
||||||
|
||||||
def _infer_concat_order_from_positions(datasets): | ||||||
return dict(_infer_tile_ids_from_nested_list(datasets, ())) | ||||||
|
||||||
|
@@ -310,6 +312,7 @@ def _new_tile_id(single_id_ds_pair): | |||||
tile_id, ds = single_id_ds_pair | ||||||
return tile_id[1:] | ||||||
|
||||||
|
||||||
def _nested_combine( | ||||||
datasets, | ||||||
concat_dims, | ||||||
|
@@ -351,6 +354,7 @@ def _nested_combine( | |||||
) | ||||||
return combined | ||||||
|
||||||
|
||||||
def combine_nested( | ||||||
datasets, | ||||||
concat_dim, | ||||||
|
@@ -541,7 +545,8 @@ def combine_nested( | |||||
mixed_datasets_and_arrays = any( | ||||||
isinstance(obj, Dataset) for obj in iterate_nested(datasets) | ||||||
) and any( | ||||||
isinstance(obj, DataArray) and obj.name is None for obj in iterate_nested(datasets) | ||||||
isinstance(obj, DataArray) and obj.name is None | ||||||
for obj in iterate_nested(datasets) | ||||||
) | ||||||
if mixed_datasets_and_arrays: | ||||||
raise ValueError("Can't combine datasets with unnamed arrays.") | ||||||
|
@@ -626,15 +631,16 @@ def _combine_single_variable_hypercube( | |||||
return concatenated | ||||||
|
||||||
|
||||||
# TODO remove empty list default param after version 0.19, see PR4696 | ||||||
def combine_by_coords( | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we also modify There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wrote a test There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think I know what @dcherian meant - at first glance it looks like the |
||||||
data_objects, | ||||||
data_objects=[], | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
(It's considered bad practice to have mutable default arguments to functions in python.) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I put this in because if someone calls this method with datasets as a named parameter the data_objects argument would be unspecified and their code would break with an unspecified argument error. This is part of the deprecation warning below. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You make a good point, but that means the default argument should be |
||||||
compat="no_conflicts", | ||||||
data_vars="all", | ||||||
coords="different", | ||||||
fill_value=dtypes.NA, | ||||||
join="outer", | ||||||
combine_attrs="no_conflicts", | ||||||
datasets=None | ||||||
datasets=None, | ||||||
): | ||||||
""" | ||||||
Attempt to auto-magically combine the given datasets (or data arrays) | ||||||
|
@@ -840,7 +846,8 @@ def combine_by_coords( | |||||
if datasets is not None: | ||||||
warnings.warn( | ||||||
"The datasets argument has been renamed to `data_objects`." | ||||||
" In future passing a value for datasets will raise an error.") | ||||||
" In future passing a value for datasets will raise an error." | ||||||
) | ||||||
data_objects = datasets | ||||||
|
||||||
if not data_objects: | ||||||
|
@@ -849,9 +856,7 @@ def combine_by_coords( | |||||
mixed_arrays_and_datasets = any( | ||||||
isinstance(data_object, DataArray) and data_object.name is None | ||||||
for data_object in data_objects | ||||||
) and any( | ||||||
isinstance(data_object, Dataset) for data_object in data_objects | ||||||
) | ||||||
) and any(isinstance(data_object, Dataset) for data_object in data_objects) | ||||||
if mixed_arrays_and_datasets: | ||||||
raise ValueError("Can't automatically combine datasets with unnamed arrays.") | ||||||
|
||||||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -680,11 +680,11 @@ def test_nested_combine_mixed_datasets_arrays(self): | |||||
Dataset({"x": [2, 3]}), | ||||||
] | ||||||
with pytest.raises( | ||||||
ValueError, | ||||||
match=r"Can't combine datasets with unnamed arrays." | ||||||
): | ||||||
ValueError, match=r"Can't combine datasets with unnamed arrays." | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Tiny clarification that this means datasets with other xarray.datarrays, not something about the numpy arrays inside the xarray.dataset objects. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good clarification. |
||||||
): | ||||||
combine_nested(objs, "x") | ||||||
|
||||||
|
||||||
class TestCombineAuto: | ||||||
def test_combine_by_coords(self): | ||||||
objs = [Dataset({"x": [0]}), Dataset({"x": [1]})] | ||||||
|
@@ -733,9 +733,9 @@ def test_combine_coords_mixed_datasets_arrays(self): | |||||
Dataset({"x": [2, 3]}), | ||||||
] | ||||||
with pytest.raises( | ||||||
ValueError, | ||||||
match=r"Can't automatically combine datasets with unnamed arrays." | ||||||
): | ||||||
ValueError, | ||||||
match=r"Can't automatically combine datasets with unnamed arrays.", | ||||||
): | ||||||
combine_by_coords(objs) | ||||||
|
||||||
@pytest.mark.parametrize( | ||||||
|
Uh oh!
There was an error while loading. Please reload this page.