From 41f4fd89959ebeb2987cd0e892d3abb501087d8c Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Tue, 25 Oct 2022 16:11:41 +0200 Subject: [PATCH 01/69] add indexes argument to Dataset.__init__ --- xarray/core/dataset.py | 52 ++++++++++++++++++++++++++++++++++++------ xarray/core/merge.py | 34 +++++++++++++++++++++++++-- 2 files changed, 77 insertions(+), 9 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index ab1d36a9e54..241fb553902 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -64,6 +64,7 @@ dataset_update_method, merge_coordinates_without_align, merge_data_and_coords, + merge_indexes, ) from .missing import get_clean_interp_index from .options import OPTIONS, _get_keep_attrs @@ -440,8 +441,10 @@ class Dataset( Dataset implements the mapping interface with keys given by variable names and values given by DataArray objects for each variable name. - One dimensional variables with name equal to their dimension are - index coordinates used for label based indexing. + By default, pandas indexes are created for one dimensional variables with + name equal to their dimension so those variables can be used as coordinates + for label based indexing. Xarray-compatible indexes may also be provided + via the `indexes` argument. To load data from a file or file-like object, use the `open_dataset` function. @@ -492,6 +495,11 @@ class Dataset( attrs : dict-like, optional Global attributes to save on this dataset. + indexes : py:class:`~xarray.Indexes` or list of py:class`~xarray.Indexes`, optional + One or more collections of Xarray-compatible indexes and their + coordinates variables. Provide an empty list or collection if you + want to skip the creation of default (pandas) indexes for dimension + coordinates. Examples -------- @@ -551,6 +559,7 @@ class Dataset( precipitation float64 8.326 Attributes: description: Weather related data. + """ _attrs: dict[Hashable, Any] | None @@ -581,14 +590,28 @@ def __init__( data_vars: Mapping[Any, Any] | None = None, coords: Mapping[Any, Any] | None = None, attrs: Mapping[Any, Any] | None = None, + indexes: Indexes[Index] | Sequence[Indexes[Index]] | None = None, ) -> None: - # TODO(shoyer): expose indexes as a public argument in __init__ - if data_vars is None: data_vars = {} if coords is None: coords = {} + if indexes is not None and len(indexes) == 0: + create_default_indexes = False + else: + create_default_indexes = True + + if indexes is None: + indexes = [] + elif isinstance(indexes, Indexes): + indexes = [indexes] + else: + if any(not isinstance(idxs, Indexes) for idxs in indexes): + raise TypeError( + "indexes only accept one or more instances of `Indexes`" + ) + both_data_and_coords = set(data_vars) & set(coords) if both_data_and_coords: raise ValueError( @@ -598,17 +621,32 @@ def __init__( if isinstance(coords, Dataset): coords = coords.variables - variables, coord_names, dims, indexes, _ = merge_data_and_coords( - data_vars, coords, compat="broadcast_equals" + variables, coord_names, dims, ds_indexes, _ = merge_data_and_coords( + data_vars, + coords, + compat="broadcast_equals", + create_default_indexes=create_default_indexes, ) + idx_indexes, idx_variables = merge_indexes(indexes) + + both_indexes_and_coords = set(idx_indexes) & coord_names + if both_indexes_and_coords: + raise ValueError( + f"{both_indexes_and_coords} are found in both indexes and coords" + ) + + variables.update(idx_variables) + coord_names.update(idx_variables) + ds_indexes.update(idx_indexes) + self._attrs = dict(attrs) if attrs is not None else None self._close = None self._encoding = None self._variables = variables self._coord_names = coord_names self._dims = dims - self._indexes = indexes + self._indexes = ds_indexes @classmethod def load_store(cls: type[T_Dataset], store, decoder=None) -> T_Dataset: diff --git a/xarray/core/merge.py b/xarray/core/merge.py index c2efcc791a1..7e2222f2464 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -567,9 +567,18 @@ def merge_coords( return variables, out_indexes -def merge_data_and_coords(data_vars, coords, compat="broadcast_equals", join="outer"): +def merge_data_and_coords( + data_vars, + coords, + compat="broadcast_equals", + join="outer", + create_default_indexes=True, +): """Used in Dataset.__init__.""" - indexes, coords = _create_indexes_from_coords(coords, data_vars) + if create_default_indexes: + indexes, coords = _create_indexes_from_coords(coords, data_vars) + else: + indexes = {} objects = [data_vars, coords] explicit_coords = coords.keys() return merge_core( @@ -581,6 +590,27 @@ def merge_data_and_coords(data_vars, coords, compat="broadcast_equals", join="ou ) +def merge_indexes( + indexes: Iterable[Indexes], +) -> tuple[dict[Hashable, Index], dict[Hashable, Variable]]: + indexes_: dict[Hashable, Index] = {} + variables_: dict[Hashable, Variable] = {} + + duplicates = set() + + for idxs in indexes: + for k, v in idxs.items(): + if k in indexes: + duplicates.add(k) + indexes_[k] = v + variables_[k] = v.variables[k] + + if duplicates: + raise ValueError(f"found duplicate indexes {duplicates}") + + return indexes_, variables_ + + def _create_indexes_from_coords(coords, data_vars=None): """Maybe create default indexes from a mapping of coordinates. From 4baa8af3d8f877f00464e8d7962bd549da8da0a5 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Tue, 25 Oct 2022 16:13:54 +0200 Subject: [PATCH 02/69] make indexes arg public for DataArray.__init__ --- xarray/core/dataarray.py | 56 ++++++++++++++++++++++++++++++---------- 1 file changed, 43 insertions(+), 13 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 8d971c53917..3bf5f24078c 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -43,7 +43,7 @@ isel_indexes, ) from .indexing import is_fancy_indexer, map_index_queries -from .merge import PANDAS_TYPES, MergeError, _create_indexes_from_coords +from .merge import PANDAS_TYPES, MergeError, _create_indexes_from_coords, merge_indexes from .options import OPTIONS, _get_keep_attrs from .utils import ( Default, @@ -288,6 +288,11 @@ class DataArray( attrs : dict_like or None, optional Attributes to assign to the new instance. By default, an empty attribute dictionary is initialized. + indexes : py:class:`~xarray.Indexes` or list of py:class`~xarray.Indexes`, optional + One or more collections of Xarray-compatible indexes and their + coordinates variables. Provide an empty list or collection if you + want to skip the creation of default (pandas) indexes for dimension + coordinates. Examples -------- @@ -376,21 +381,20 @@ def __init__( dims: Hashable | Sequence[Hashable] | None = None, name: Hashable = None, attrs: Mapping = None, + indexes: Indexes[Index] + | Sequence[Indexes[Index]] + | dict[Hashable, Index] + | None = None, # internal parameters - indexes: dict[Hashable, Index] = None, fastpath: bool = False, ) -> None: if fastpath: variable = data assert dims is None assert attrs is None - assert indexes is not None + assert isinstance(indexes, dict) + da_indexes = indexes else: - # TODO: (benbovy - explicit indexes) remove - # once it becomes part of the public interface - if indexes is not None: - raise ValueError("Providing explicit indexes is not supported yet") - # try to fill in arguments from data if they weren't supplied if coords is None: @@ -410,21 +414,47 @@ def __init__( if attrs is None and not isinstance(data, PANDAS_TYPES): attrs = getattr(data, "attrs", None) + if indexes is not None and len(indexes) == 0: + create_default_indexes = False + else: + create_default_indexes = True + + if indexes is None: + indexes = [] + elif isinstance(indexes, Indexes): + indexes = [indexes] + else: + if any(not isinstance(idxs, Indexes) for idxs in indexes): + raise TypeError( + "indexes only accept one or more instances of `Indexes`" + ) + data = _check_data_shape(data, coords, dims) data = as_compatible_data(data) coords, dims = _infer_coords_and_dims(data.shape, coords, dims) variable = Variable(dims, data, attrs, fastpath=True) - indexes, coords = _create_indexes_from_coords(coords) + if create_default_indexes: + da_indexes, coords = _create_indexes_from_coords(coords) + else: + da_indexes = {} + + idx_indexes, idx_variables = merge_indexes(cast(Sequence[Indexes], indexes)) + + both_indexes_and_coords = set(idx_indexes) & set(coords) + if both_indexes_and_coords: + raise ValueError( + f"{both_indexes_and_coords} are found in both indexes and coords" + ) + + coords.update(idx_variables) + da_indexes.update(idx_indexes) # These fully describe a DataArray self._variable = variable assert isinstance(coords, dict) self._coords = coords self._name = name - - # TODO(shoyer): document this argument, once it becomes part of the - # public interface. - self._indexes = indexes # type: ignore[assignment] + self._indexes = da_indexes self._close = None From dbc058af5cb14b3342ea3d3f77106296bfff0db9 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Wed, 26 Oct 2022 11:53:16 +0200 Subject: [PATCH 03/69] Indexes constructor updates - easily create an empty Indexes collection - check consistency between indexes and variables --- xarray/core/indexes.py | 15 +++++++++++++-- xarray/core/merge.py | 2 +- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index a18322fe06b..32aed28c0d3 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -1100,8 +1100,8 @@ class Indexes(collections.abc.Mapping, Generic[T_PandasOrXarrayIndex]): def __init__( self, - indexes: dict[Any, T_PandasOrXarrayIndex], - variables: dict[Any, Variable], + indexes: dict[Any, T_PandasOrXarrayIndex] | None = None, + variables: dict[Any, Variable] | None = None, ): """Constructor not for public consumption. @@ -1113,6 +1113,17 @@ def __init__( Indexed coordinate variables in this object. """ + if indexes is None: + indexes = {} + if variables is None: + variables = {} + + unmatched_keys = set(indexes) ^ set(variables) + if unmatched_keys: + raise ValueError( + f"unmatched keys found in indexes and variables: {unmatched_keys}" + ) + self._indexes = indexes self._variables = variables diff --git a/xarray/core/merge.py b/xarray/core/merge.py index 7e2222f2464..becae345fd2 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -586,7 +586,7 @@ def merge_data_and_coords( compat, join, explicit_coords=explicit_coords, - indexes=Indexes(indexes, coords), + indexes=Indexes(indexes, {k: coords[k] for k in indexes}), ) From 16a9983ce7d03f2e0d8ebe5178d263b81f430859 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Wed, 26 Oct 2022 11:57:30 +0200 Subject: [PATCH 04/69] use the generic Mapping[Any, Index] for indexes --- xarray/core/dataarray.py | 53 ++++++++++++++++++---------------------- xarray/core/dataset.py | 41 +++++++++++++------------------ xarray/core/merge.py | 26 +++----------------- 3 files changed, 45 insertions(+), 75 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 3bf5f24078c..b090f37988e 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -43,7 +43,7 @@ isel_indexes, ) from .indexing import is_fancy_indexer, map_index_queries -from .merge import PANDAS_TYPES, MergeError, _create_indexes_from_coords, merge_indexes +from .merge import PANDAS_TYPES, MergeError, _create_indexes_from_coords from .options import OPTIONS, _get_keep_attrs from .utils import ( Default, @@ -288,11 +288,11 @@ class DataArray( attrs : dict_like or None, optional Attributes to assign to the new instance. By default, an empty attribute dictionary is initialized. - indexes : py:class:`~xarray.Indexes` or list of py:class`~xarray.Indexes`, optional - One or more collections of Xarray-compatible indexes and their - coordinates variables. Provide an empty list or collection if you - want to skip the creation of default (pandas) indexes for dimension - coordinates. + indexes : py:class:`~xarray.Indexes` or dict-like, optional + A collection of :py:class:`~xarray.indexes.Index` objects and + their coordinates variables. If an empty collection is given, + it will skip the creation of default (pandas) indexes for + dimension coordinates. Examples -------- @@ -381,10 +381,7 @@ def __init__( dims: Hashable | Sequence[Hashable] | None = None, name: Hashable = None, attrs: Mapping = None, - indexes: Indexes[Index] - | Sequence[Indexes[Index]] - | dict[Hashable, Index] - | None = None, + indexes: Mapping[Any, Index] | None = None, # internal parameters fastpath: bool = False, ) -> None: @@ -394,6 +391,7 @@ def __init__( assert attrs is None assert isinstance(indexes, dict) da_indexes = indexes + da_coords = coords else: # try to fill in arguments from data if they weren't supplied if coords is None: @@ -414,47 +412,44 @@ def __init__( if attrs is None and not isinstance(data, PANDAS_TYPES): attrs = getattr(data, "attrs", None) - if indexes is not None and len(indexes) == 0: + if indexes is None: + create_default_indexes = True + indexes = Indexes() + elif len(indexes) == 0: create_default_indexes = False + indexes = Indexes() else: create_default_indexes = True - - if indexes is None: - indexes = [] - elif isinstance(indexes, Indexes): - indexes = [indexes] - else: - if any(not isinstance(idxs, Indexes) for idxs in indexes): + if not isinstance(indexes, Indexes): raise TypeError( - "indexes only accept one or more instances of `Indexes`" + "non-empty indexes must be an instance of `Indexes`" ) data = _check_data_shape(data, coords, dims) data = as_compatible_data(data) - coords, dims = _infer_coords_and_dims(data.shape, coords, dims) + da_coords, dims = _infer_coords_and_dims(data.shape, coords, dims) variable = Variable(dims, data, attrs, fastpath=True) + if create_default_indexes: - da_indexes, coords = _create_indexes_from_coords(coords) + da_indexes, da_coords = _create_indexes_from_coords(da_coords) else: da_indexes = {} - idx_indexes, idx_variables = merge_indexes(cast(Sequence[Indexes], indexes)) - - both_indexes_and_coords = set(idx_indexes) & set(coords) + both_indexes_and_coords = set(indexes) & set(da_coords) if both_indexes_and_coords: raise ValueError( f"{both_indexes_and_coords} are found in both indexes and coords" ) - coords.update(idx_variables) - da_indexes.update(idx_indexes) + da_coords.update(indexes.variables) + da_indexes.update(indexes) # These fully describe a DataArray self._variable = variable - assert isinstance(coords, dict) - self._coords = coords + assert isinstance(da_coords, dict) + self._coords = da_coords self._name = name - self._indexes = da_indexes + self._indexes = da_indexes # type: ignore[assignment] self._close = None diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 241fb553902..20332e85025 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -64,7 +64,6 @@ dataset_update_method, merge_coordinates_without_align, merge_data_and_coords, - merge_indexes, ) from .missing import get_clean_interp_index from .options import OPTIONS, _get_keep_attrs @@ -495,11 +494,11 @@ class Dataset( attrs : dict-like, optional Global attributes to save on this dataset. - indexes : py:class:`~xarray.Indexes` or list of py:class`~xarray.Indexes`, optional - One or more collections of Xarray-compatible indexes and their - coordinates variables. Provide an empty list or collection if you - want to skip the creation of default (pandas) indexes for dimension - coordinates. + indexes : py:class:`~xarray.Indexes` or dict-like, optional + A collection of :py:class:`~xarray.indexes.Index` objects and + their coordinates variables. If an empty collection is given, + it will skip the creation of default (pandas) indexes for + dimension coordinates. Examples -------- @@ -590,27 +589,23 @@ def __init__( data_vars: Mapping[Any, Any] | None = None, coords: Mapping[Any, Any] | None = None, attrs: Mapping[Any, Any] | None = None, - indexes: Indexes[Index] | Sequence[Indexes[Index]] | None = None, + indexes: Mapping[Any, Index] | None = None, ) -> None: if data_vars is None: data_vars = {} if coords is None: coords = {} - if indexes is not None and len(indexes) == 0: + if indexes is None: + create_default_indexes = True + indexes = Indexes() + elif len(indexes) == 0: create_default_indexes = False + indexes = Indexes() else: create_default_indexes = True - - if indexes is None: - indexes = [] - elif isinstance(indexes, Indexes): - indexes = [indexes] - else: - if any(not isinstance(idxs, Indexes) for idxs in indexes): - raise TypeError( - "indexes only accept one or more instances of `Indexes`" - ) + if not isinstance(indexes, Indexes): + raise TypeError("non-empty indexes must be an instance of `Indexes`") both_data_and_coords = set(data_vars) & set(coords) if both_data_and_coords: @@ -628,17 +623,15 @@ def __init__( create_default_indexes=create_default_indexes, ) - idx_indexes, idx_variables = merge_indexes(indexes) - - both_indexes_and_coords = set(idx_indexes) & coord_names + both_indexes_and_coords = set(indexes) & coord_names if both_indexes_and_coords: raise ValueError( f"{both_indexes_and_coords} are found in both indexes and coords" ) - variables.update(idx_variables) - coord_names.update(idx_variables) - ds_indexes.update(idx_indexes) + variables.update(indexes.variables) + coord_names.update(indexes.variables) + ds_indexes.update(indexes) self._attrs = dict(attrs) if attrs is not None else None self._close = None diff --git a/xarray/core/merge.py b/xarray/core/merge.py index becae345fd2..03b9adbe693 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -23,6 +23,7 @@ from .indexes import ( Index, Indexes, + PandasIndex, create_default_index_implicit, filter_indexes_from_coords, indexes_equal, @@ -590,28 +591,9 @@ def merge_data_and_coords( ) -def merge_indexes( - indexes: Iterable[Indexes], -) -> tuple[dict[Hashable, Index], dict[Hashable, Variable]]: - indexes_: dict[Hashable, Index] = {} - variables_: dict[Hashable, Variable] = {} - - duplicates = set() - - for idxs in indexes: - for k, v in idxs.items(): - if k in indexes: - duplicates.add(k) - indexes_[k] = v - variables_[k] = v.variables[k] - - if duplicates: - raise ValueError(f"found duplicate indexes {duplicates}") - - return indexes_, variables_ - - -def _create_indexes_from_coords(coords, data_vars=None): +def _create_indexes_from_coords( + coords: Mapping[Any, Variable], data_vars: Mapping[Any, Variable] | None = None +) -> tuple[dict[Any, PandasIndex], dict[Any, Variable]]: """Maybe create default indexes from a mapping of coordinates. Return those indexes and updated coordinates. From 3c076d5b0b9f502b497ac2abd90dbeb52915f4a9 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Wed, 26 Oct 2022 13:15:23 +0200 Subject: [PATCH 05/69] add wrap_pandas_multiindex function --- xarray/core/indexes.py | 30 ++++++++++++++++++++++++++++++ xarray/indexes/__init__.py | 4 ++-- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index 32aed28c0d3..4c1a758ac95 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -1505,3 +1505,33 @@ def assert_no_index_corrupted( f"the following index built from coordinates {index_names_str}:\n" f"{index}" ) + + +def wrap_pandas_multiindex(midx: pd.MultiIndex, dim: str) -> Indexes: + """Wrap a pandas multi-index as Xarray-compatible indexes + and coordinates. + + This function returns an object that can be directly assigned to a + :py:class:`~xarray.Dataset` or :py:class:`~xarray.DataArray` (via the + ``indexes`` argument of their constructor). + + Parameters + ---------- + midx : :py:class:`pandas.MultiIndex` + The pandas multi-index object to wrap. + dim : str + Dimension name. + + Returns + ------- + indexes : :py:class`~xarray.Indexes` + An object that contains both the wrapped Xarray index and + its coordinate variables (dimension + levels). + + """ + xr_idx = PandasMultiIndex(midx, dim) + + variables = xr_idx.create_variables() + indexes = {k: xr_idx for k in variables} + + return Indexes(indexes=indexes, variables=variables) diff --git a/xarray/indexes/__init__.py b/xarray/indexes/__init__.py index 41321c9a0ff..ee5bff19143 100644 --- a/xarray/indexes/__init__.py +++ b/xarray/indexes/__init__.py @@ -2,6 +2,6 @@ DataArray objects. """ -from ..core.indexes import Index, PandasIndex, PandasMultiIndex +from ..core.indexes import Index, PandasIndex, PandasMultiIndex, wrap_pandas_multiindex -__all__ = ["Index", "PandasIndex", "PandasMultiIndex"] +__all__ = ["Index", "PandasIndex", "PandasMultiIndex", "wrap_pandas_multiindex"] From 70e7a5de1b27adb4b54615c1359527f4dbfd356e Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Wed, 26 Oct 2022 13:34:18 +0200 Subject: [PATCH 06/69] do not create default indexes when not desired --- xarray/core/merge.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/xarray/core/merge.py b/xarray/core/merge.py index 03b9adbe693..a7e45601a33 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -320,6 +320,7 @@ def merge_collected( def collect_variables_and_indexes( list_of_mappings: list[DatasetLike], indexes: Mapping[Any, Any] | None = None, + create_default_indexes: bool = True, ) -> dict[Hashable, list[MergeElement]]: """Collect variables and indexes from list of mappings of xarray objects. @@ -366,7 +367,7 @@ def append_all(variables, indexes): variable = as_variable(variable, name=name) if name in indexes: append(name, variable, indexes[name]) - elif variable.dims == (name,): + elif variable.dims == (name,) and create_default_indexes: idx, idx_vars = create_default_index_implicit(variable) append_all(idx_vars, {k: idx for k in idx_vars}) else: @@ -588,6 +589,7 @@ def merge_data_and_coords( join, explicit_coords=explicit_coords, indexes=Indexes(indexes, {k: coords[k] for k in indexes}), + create_default_indexes=create_default_indexes, ) @@ -714,6 +716,7 @@ def merge_core( explicit_coords: Sequence | None = None, indexes: Mapping[Any, Any] | None = None, fill_value: object = dtypes.NA, + create_default_indexes: bool = True, ) -> _MergeResult: """Core logic for merging labeled objects. @@ -739,6 +742,8 @@ def merge_core( may be cast to pandas.Index objects. fill_value : scalar, optional Value to use for newly missing values + create_default_indexes : bool, optional + If True, create default (pandas) indexes for dimension coordinates. Returns ------- @@ -764,7 +769,9 @@ def merge_core( aligned = deep_align( coerced, join=join, copy=False, indexes=indexes, fill_value=fill_value ) - collected = collect_variables_and_indexes(aligned, indexes=indexes) + collected = collect_variables_and_indexes( + aligned, indexes=indexes, create_default_indexes=create_default_indexes + ) prioritized = _get_priority_vars_and_indexes(aligned, priority_arg, compat=compat) variables, out_indexes = merge_collected( collected, prioritized, compat=compat, combine_attrs=combine_attrs From 00e1766ecb581808cf8ecb879d8513682578bb9d Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Wed, 26 Oct 2022 13:44:02 +0200 Subject: [PATCH 07/69] fix Dataset dimensions TODO: check indexes shapes / dims for DataArray --- xarray/core/dataarray.py | 2 ++ xarray/core/dataset.py | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index b090f37988e..e19789d4078 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -441,6 +441,8 @@ def __init__( f"{both_indexes_and_coords} are found in both indexes and coords" ) + # TODO: also check shape and dims of indexes (coordinate variables) + da_coords.update(indexes.variables) da_indexes.update(indexes) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 20332e85025..8f129ad703d 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -633,6 +633,10 @@ def __init__( coord_names.update(indexes.variables) ds_indexes.update(indexes) + # re-calculate dimensions if indexes are given explicitly + if indexes: + dims = calculate_dimensions(variables) + self._attrs = dict(attrs) if attrs is not None else None self._close = None self._encoding = None From 3bf92cd1dde019f35ade59d2676685dce666069c Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Wed, 26 Oct 2022 17:18:31 +0200 Subject: [PATCH 08/69] copy the coordinate variables of passed indexes --- xarray/core/dataarray.py | 4 +++- xarray/core/dataset.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index e19789d4078..448187cf0a5 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -443,7 +443,9 @@ def __init__( # TODO: also check shape and dims of indexes (coordinate variables) - da_coords.update(indexes.variables) + da_coords.update( + {k: v.copy(deep=False) for k, v in indexes.variables.items()} + ) da_indexes.update(indexes) # These fully describe a DataArray diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 8f129ad703d..e0093fad40e 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -629,7 +629,7 @@ def __init__( f"{both_indexes_and_coords} are found in both indexes and coords" ) - variables.update(indexes.variables) + variables.update({k: v.copy(deep=False) for k, v in indexes.variables.items()}) coord_names.update(indexes.variables) ds_indexes.update(indexes) From c9b63636821b892163f4c6e99e225beff030b14f Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Wed, 26 Oct 2022 18:11:55 +0200 Subject: [PATCH 09/69] DataArray: check dimensions/shape of index coords --- xarray/core/dataarray.py | 52 +++++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 448187cf0a5..7b71368006d 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -98,6 +98,32 @@ T_XarrayOther = TypeVar("T_XarrayOther", bound=Union["DataArray", Dataset]) +def _check_coords_dims(shape, coords, dims): + sizes = dict(zip(dims, shape)) + for k, v in coords.items(): + if any(d not in dims for d in v.dims): + raise ValueError( + f"coordinate {k} has dimensions {v.dims}, but these " + "are not a subset of the DataArray " + f"dimensions {dims}" + ) + + for d, s in zip(v.dims, v.shape): + if s != sizes[d]: + raise ValueError( + f"conflicting sizes for dimension {d!r}: " + f"length {sizes[d]} on the data but length {s} on " + f"coordinate {k!r}" + ) + + if k in sizes and v.shape != (sizes[k],): + raise ValueError( + f"coordinate {k!r} is a DataArray dimension, but " + f"it has shape {v.shape!r} rather than expected shape {sizes[k]!r} " + "matching the dimension size" + ) + + def _infer_coords_and_dims( shape, coords, dims ) -> tuple[dict[Hashable, Variable], tuple[Hashable, ...]]: @@ -149,29 +175,7 @@ def _infer_coords_and_dims( var.dims = (dim,) new_coords[dim] = var.to_index_variable() - sizes = dict(zip(dims, shape)) - for k, v in new_coords.items(): - if any(d not in dims for d in v.dims): - raise ValueError( - f"coordinate {k} has dimensions {v.dims}, but these " - "are not a subset of the DataArray " - f"dimensions {dims}" - ) - - for d, s in zip(v.dims, v.shape): - if s != sizes[d]: - raise ValueError( - f"conflicting sizes for dimension {d!r}: " - f"length {sizes[d]} on the data but length {s} on " - f"coordinate {k!r}" - ) - - if k in sizes and v.shape != (sizes[k],): - raise ValueError( - f"coordinate {k!r} is a DataArray dimension, but " - f"it has shape {v.shape!r} rather than expected shape {sizes[k]!r} " - "matching the dimension size" - ) + _check_coords_dims(shape, new_coords, dims) return new_coords, dims @@ -441,7 +445,7 @@ def __init__( f"{both_indexes_and_coords} are found in both indexes and coords" ) - # TODO: also check shape and dims of indexes (coordinate variables) + _check_coords_dims(data.shape, indexes.variables, dims) da_coords.update( {k: v.copy(deep=False) for k, v in indexes.variables.items()} From 82dc5ccc2ff26e9deaeeb1722442b8ed80d729cd Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Thu, 27 Oct 2022 11:40:50 +0200 Subject: [PATCH 10/69] docstrings tweaks --- xarray/core/indexes.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index 4c1a758ac95..1aefb15aca9 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -1077,12 +1077,13 @@ def create_default_index_implicit( class Indexes(collections.abc.Mapping, Generic[T_PandasOrXarrayIndex]): - """Immutable proxy for Dataset or DataArrary indexes. + """Immutable proxy for Dataset or DataArray indexes. - Keys are coordinate names and values may correspond to either pandas or - xarray indexes. + It is a mapping where keys are coordinate names and values are either pandas + or xarray indexes. - Also provides some utility methods. + It also contains the indexed coordinate variables and provides some utility + methods. """ @@ -1110,7 +1111,8 @@ def __init__( indexes : dict Indexes held by this object. variables : dict - Indexed coordinate variables in this object. + Indexed coordinate variables in this object. Entries must + match those of `indexes`. """ if indexes is None: From a58c9d0c28d215e43cb045f2810c93c6e7292a18 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Thu, 27 Oct 2022 12:23:05 +0200 Subject: [PATCH 11/69] more Indexes safety Since its constructor can now be used publicly. Copy input mappings and check the type of input indexes. --- xarray/core/indexes.py | 23 ++++++++++++++++++----- xarray/tests/test_indexes.py | 7 ++++++- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index 1aefb15aca9..b542da25cab 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -1087,10 +1087,12 @@ class Indexes(collections.abc.Mapping, Generic[T_PandasOrXarrayIndex]): """ + _index_type: type[Index] | type[pd.Index] _indexes: dict[Any, T_PandasOrXarrayIndex] _variables: dict[Any, Variable] __slots__ = ( + "_index_type", "_indexes", "_variables", "_dims", @@ -1101,8 +1103,9 @@ class Indexes(collections.abc.Mapping, Generic[T_PandasOrXarrayIndex]): def __init__( self, - indexes: dict[Any, T_PandasOrXarrayIndex] | None = None, - variables: dict[Any, Variable] | None = None, + indexes: Mapping[Any, T_PandasOrXarrayIndex] | None = None, + variables: Mapping[Any, Variable] | None = None, + index_type: type[Index] | type[pd.Index] = Index, ): """Constructor not for public consumption. @@ -1113,6 +1116,9 @@ def __init__( variables : dict Indexed coordinate variables in this object. Entries must match those of `indexes`. + index_type : type + The type of all indexes, i.e., either :py:class:`xarray.indexes.Index` + or :py:class:`pandas.Index`. """ if indexes is None: @@ -1126,8 +1132,15 @@ def __init__( f"unmatched keys found in indexes and variables: {unmatched_keys}" ) - self._indexes = indexes - self._variables = variables + if any(not isinstance(idx, index_type) for idx in indexes.values()): + index_type_str = f"{index_type.__module__}.{index_type.__name__}" + raise TypeError( + f"values of indexes must all be instances of {index_type_str}" + ) + + self._index_type = index_type + self._indexes = dict(**indexes) + self._variables = dict(**variables) self._dims: Mapping[Hashable, int] | None = None self.__coord_name_id: dict[Any, int] | None = None @@ -1275,7 +1288,7 @@ def to_pandas_indexes(self) -> Indexes[pd.Index]: elif isinstance(idx, Index): indexes[k] = idx.to_pandas_index() - return Indexes(indexes, self._variables) + return Indexes(indexes, self._variables, index_type=pd.Index) def copy_indexes( self, deep: bool = True diff --git a/xarray/tests/test_indexes.py b/xarray/tests/test_indexes.py index f9ca1d1bc2e..5bc82136f98 100644 --- a/xarray/tests/test_indexes.py +++ b/xarray/tests/test_indexes.py @@ -584,7 +584,12 @@ def indexes( _, variables = indexes_and_vars - return Indexes(indexes, variables) + if isinstance(x_idx, Index): + index_type = Index + else: + index_type = pd.Index + + return Indexes(indexes, variables, index_type=index_type) def test_interface(self, unique_indexes, indexes) -> None: x_idx = unique_indexes[0] From 9beeea748b9cbaa764a80b5d80df7454aa7833d2 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Thu, 27 Oct 2022 12:25:53 +0200 Subject: [PATCH 12/69] ensure input indexes are Xarray indexes --- xarray/core/dataarray.py | 2 ++ xarray/core/dataset.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 7b71368006d..bd652afffe7 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -428,6 +428,8 @@ def __init__( raise TypeError( "non-empty indexes must be an instance of `Indexes`" ) + elif indexes._index_type != Index: + raise TypeError("indexes must only contain Xarray `Index` objects") data = _check_data_shape(data, coords, dims) data = as_compatible_data(data) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index e0093fad40e..46e6eb676be 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -606,6 +606,8 @@ def __init__( create_default_indexes = True if not isinstance(indexes, Indexes): raise TypeError("non-empty indexes must be an instance of `Indexes`") + elif indexes._index_type != Index: + raise TypeError("indexes must only contain Xarray `Index` objects") both_data_and_coords = set(data_vars) & set(coords) if both_data_and_coords: From c6e94b46363b79cb24c94c6ef74c7a2b8a48d9e3 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Thu, 27 Oct 2022 15:25:32 +0200 Subject: [PATCH 13/69] add .assign_indexes() method --- xarray/core/dataarray.py | 22 ++++++++++++++++++++++ xarray/core/dataset.py | 24 ++++++++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index bd652afffe7..075cec36b9e 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -3490,6 +3490,28 @@ def reduce( var = self.variable.reduce(func, dim, axis, keep_attrs, keepdims, **kwargs) return self._replace_maybe_drop_dims(var) + def assign_indexes(self, indexes: Indexes[Index]): + """Assign new indexes to this dataarray. + + Returns a new dataarray with all the original data in addition to the new + indexes (and their corresponding coordinates). + + Parameters + ---------- + indexes : :py:class:`~xarray.Indexes`. + A collection of :py:class:`~xarray.indexes.Index` objects + to assign (including their coordinate variables). + + Returns + ------- + assigned : DataArray + A new dataarray with the new indexes and coordinates in addition to + the existing data. + """ + # TODO: check indexes.dims must be a subset of self.dims + ds = self._to_temp_dataset().assign_indexes(indexes) + return self._from_temp_dataset(ds) + def to_pandas(self) -> DataArray | pd.Series | pd.DataFrame: """Convert this array into a pandas object with the same shape. diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 46e6eb676be..7c726797875 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -6089,6 +6089,30 @@ def assign( data.update(results) return data + def assign_indexes(self, indexes: Indexes[Index]): + """Assign new indexes to this dataset. + + Returns a new dataset with all the original data in addition to the new + indexes (and their corresponding coordinates). + + Parameters + ---------- + indexes : :py:class:`~xarray.Indexes`. + A collection of :py:class:`~xarray.indexes.Index` objects + to assign (including their coordinate variables). + + Returns + ------- + assigned : Dataset + A new dataset with the new indexes and coordinates in addition to + the existing data. + """ + ds_indexes = Dataset(indexes=indexes) + dropped = self.drop_vars(indexes, errors="ignore") + return dropped.merge( + ds_indexes, compat="minimal", join="override", combine_attrs="no_conflicts" + ) + def to_array( self, dim: Hashable = "variable", name: Hashable | None = None ) -> DataArray: From f97adb5186057b2d840d2f77255b8a4ff30fbb7a Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Thu, 8 Dec 2022 17:31:28 +0100 Subject: [PATCH 14/69] add `IndexedCoordinates` subclass + add `IndexedCoordinates.from_pandas_multiindex` helper. --- xarray/__init__.py | 2 + xarray/core/coordinates.py | 115 +++++++++++++++++++++++++++++++++++++ xarray/core/indexes.py | 30 ---------- xarray/indexes/__init__.py | 9 +-- 4 files changed, 119 insertions(+), 37 deletions(-) diff --git a/xarray/__init__.py b/xarray/__init__.py index d064502c20b..1e75a947320 100644 --- a/xarray/__init__.py +++ b/xarray/__init__.py @@ -27,6 +27,7 @@ where, ) from xarray.core.concat import concat +from xarray.core.coordinates import IndexedCoordinates from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset from xarray.core.extensions import ( @@ -102,6 +103,7 @@ "Coordinate", "DataArray", "Dataset", + "IndexedCoordinates", "IndexVariable", "Variable", # Exceptions diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index 986b4cab443..3822e98c6a6 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -482,3 +482,118 @@ def assert_coordinate_consistent( f"dimension coordinate {k!r} conflicts between " f"indexed and indexing objects:\n{obj[k]}\nvs.\n{coords[k]}" ) + + +class IndexedCoordinates(Coordinates): + """Dictionary like container for indexed coordinates. + + Essentially an immutable dictionary with keys given by variable + names and the values given by the corresponding xarray.Variable + objects. + + All coordinate variables in this collection are backed by one or more Xarray + indexes. + + This collection can be passed directly to the :py:class:`~xarray.Dataset` + and :py:class:`~xarray.DataArray` constructors via their `coords` argument. + This will add both the coordinates and their index. + """ + + _data: Dataset + _indexes: Indexes[Index] + + __slots__ = ("_data", "_indexes") + + def __init__( + self, + indexes: Mapping[Any, Index] | None = None, + variables: Mapping[Any, Variable] | None = None, + ): + self._indexes = Indexes(indexes, variables) + self._data = self._to_dataset() + + @classmethod + def from_pandas_multiindex( + cls, midx: pd.MultiIndex, dim: str + ) -> IndexedCoordinates: + """Wrap a pandas multi-index as Xarray-compatible indexes + and coordinates. + + This function returns an object that + + Parameters + ---------- + midx : :py:class:`pandas.MultiIndex` + Pandas multi-index object. + dim : str + Dimension name. + + Returns + ------- + coords : :py:class`~xarray.IndexedCoordinates` + A collection of Xarray indexed coordinates created from the multi-index. + The returned coordinates can be directly assigned to a + :py:class:`~xarray.Dataset` or :py:class:`~xarray.DataArray` (via the + ``coords`` argument of their constructor). + """ + xr_idx = PandasMultiIndex(midx, dim) + + variables = xr_idx.create_variables() + indexes = {k: xr_idx for k in variables} + + return cls(indexes=indexes, variables=variables) + + @property + def _names(self) -> set[Hashable]: + return self._data._coord_names + + @property + def dims(self) -> Mapping[Hashable, int]: + return self._data.dims + + @property + def dtypes(self) -> Frozen[Hashable, np.dtype]: + """Mapping from coordinate names to dtypes. + + Cannot be modified directly. + + See Also + -------- + Dataset.dtypes + """ + return Frozen({n: v.dtype for n, v in self._data.variables.items()}) + + @property + def variables(self) -> Mapping[Hashable, Variable]: + return self._data.variables + + def _to_dataset(self) -> Dataset: + """Convert these coordinates into a new Dataset""" + from xarray.core.dataset import Dataset + + return Dataset._construct_direct( + coord_names=set(self._indexes), + dims=dict(self._indexes.dims), + variables=self._indexes._variables, + indexes=self._indexes._indexes, + ) + + def to_dataset(self) -> Dataset: + return self._data.copy() + + def __getitem__(self, key: Hashable) -> DataArray: + return self._data[key] + + def update(self, other: Mapping[Any, Any]) -> None: + raise TypeError( + "IndexedCoordinates is immutable and can not be modified inplace" + ) + + def __delitem__(self, key: Hashable) -> None: + raise TypeError( + "IndexedCoordinates is immutable and can not be modified inplace" + ) + + def _ipython_key_completions_(self): + """Provide method for the key-autocompletions in IPython.""" + return self._data._ipython_key_completions_() diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index 29dd675480e..15fdefa6974 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -1540,33 +1540,3 @@ def assert_no_index_corrupted( f"the following index built from coordinates {index_names_str}:\n" f"{index}" ) - - -def wrap_pandas_multiindex(midx: pd.MultiIndex, dim: str) -> Indexes: - """Wrap a pandas multi-index as Xarray-compatible indexes - and coordinates. - - This function returns an object that can be directly assigned to a - :py:class:`~xarray.Dataset` or :py:class:`~xarray.DataArray` (via the - ``indexes`` argument of their constructor). - - Parameters - ---------- - midx : :py:class:`pandas.MultiIndex` - The pandas multi-index object to wrap. - dim : str - Dimension name. - - Returns - ------- - indexes : :py:class`~xarray.Indexes` - An object that contains both the wrapped Xarray index and - its coordinate variables (dimension + levels). - - """ - xr_idx = PandasMultiIndex(midx, dim) - - variables = xr_idx.create_variables() - indexes = {k: xr_idx for k in variables} - - return Indexes(indexes=indexes, variables=variables) diff --git a/xarray/indexes/__init__.py b/xarray/indexes/__init__.py index 2e114e9854e..143d7a58fda 100644 --- a/xarray/indexes/__init__.py +++ b/xarray/indexes/__init__.py @@ -2,11 +2,6 @@ DataArray objects. """ -from xarray.core.indexes import ( - Index, - PandasIndex, - PandasMultiIndex, - wrap_pandas_multiindex, -) +from xarray.core.indexes import Index, PandasIndex, PandasMultiIndex -__all__ = ["Index", "PandasIndex", "PandasMultiIndex", "wrap_pandas_multiindex"] +__all__ = ["Index", "PandasIndex", "PandasMultiIndex"] From 45709efb51c42da20a71e1ceabb115dcab3bc2da Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Thu, 8 Dec 2022 17:32:48 +0100 Subject: [PATCH 15/69] rollback/update Dataset and DataArray constructors Drop the `indexes` argument or keep it as private API. When a `Coordinates` object is passed as `coords` argument, extract both coordinate variables and indexes and add them to the new Dataset or DataArray. --- xarray/core/dataarray.py | 107 +++++++++++++-------------------------- xarray/core/dataset.py | 73 ++++---------------------- xarray/core/merge.py | 24 ++++----- 3 files changed, 56 insertions(+), 148 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index da45e0ac1d8..662aafedb7f 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -34,7 +34,11 @@ from xarray.core.arithmetic import DataArrayArithmetic from xarray.core.common import AbstractArray, DataWithCoords, get_chunksizes from xarray.core.computation import unify_chunks -from xarray.core.coordinates import DataArrayCoordinates, assert_coordinate_consistent +from xarray.core.coordinates import ( + Coordinates, + DataArrayCoordinates, + assert_coordinate_consistent, +) from xarray.core.dataset import Dataset from xarray.core.formatting import format_item from xarray.core.indexes import ( @@ -136,7 +140,7 @@ def _check_coords_dims(shape, coords, dims): def _infer_coords_and_dims( shape, coords, dims -) -> tuple[dict[Hashable, Variable], tuple[Hashable, ...]]: +) -> tuple[Mapping[Hashable, Any], tuple[Hashable, ...]]: """All the logic for creating a new DataArray""" if ( @@ -174,16 +178,20 @@ def _infer_coords_and_dims( if not isinstance(d, str): raise TypeError(f"dimension {d} is not a string") - new_coords: dict[Hashable, Variable] = {} + new_coords: Mapping[Hashable, Any] - if utils.is_dict_like(coords): - for k, v in coords.items(): - new_coords[k] = as_variable(v, name=k) - elif coords is not None: - for dim, coord in zip(dims, coords): - var = as_variable(coord, name=dim) - var.dims = (dim,) - new_coords[dim] = var.to_index_variable() + if isinstance(coords, Coordinates): + new_coords = coords + else: + new_coords = {} + if utils.is_dict_like(coords): + for k, v in coords.items(): + new_coords[k] = as_variable(v, name=k) + elif coords is not None: + for dim, coord in zip(dims, coords): + var = as_variable(coord, name=dim) + var.dims = (dim,) + new_coords[dim] = var.to_index_variable() _check_coords_dims(shape, new_coords, dims) @@ -398,18 +406,22 @@ def __init__( dims: Hashable | Sequence[Hashable] | None = None, name: Hashable | None = None, attrs: Mapping | None = None, - indexes: Mapping[Any, Index] | None = None, # internal parameters + indexes: Mapping[Any, Index] | None = None, fastpath: bool = False, ) -> None: if fastpath: variable = data assert dims is None assert attrs is None - assert isinstance(indexes, dict) - da_indexes = indexes - da_coords = coords + assert indexes is not None else: + if indexes is not None: + raise ValueError( + "Explicitly passing indexes via the `indexes` argument is not supported " + "when `fastpath=False`. Use the `coords` argument instead." + ) + # try to fill in arguments from data if they weren't supplied if coords is None: @@ -429,50 +441,23 @@ def __init__( if attrs is None and not isinstance(data, PANDAS_TYPES): attrs = getattr(data, "attrs", None) - if indexes is None: - create_default_indexes = True - indexes = Indexes() - elif len(indexes) == 0: - create_default_indexes = False - indexes = Indexes() - else: - create_default_indexes = True - if not isinstance(indexes, Indexes): - raise TypeError( - "non-empty indexes must be an instance of `Indexes`" - ) - elif indexes._index_type != Index: - raise TypeError("indexes must only contain Xarray `Index` objects") - data = _check_data_shape(data, coords, dims) data = as_compatible_data(data) - da_coords, dims = _infer_coords_and_dims(data.shape, coords, dims) + coords, dims = _infer_coords_and_dims(data.shape, coords, dims) variable = Variable(dims, data, attrs, fastpath=True) - if create_default_indexes: - da_indexes, da_coords = _create_indexes_from_coords(da_coords) + if isinstance(coords, Coordinates): + indexes = dict(coords.xindexes) + coords = {k: v.copy() for k, v in coords.variables.items()} else: - da_indexes = {} - - both_indexes_and_coords = set(indexes) & set(da_coords) - if both_indexes_and_coords: - raise ValueError( - f"{both_indexes_and_coords} are found in both indexes and coords" - ) - - _check_coords_dims(data.shape, indexes.variables, dims) - - da_coords.update( - {k: v.copy(deep=False) for k, v in indexes.variables.items()} - ) - da_indexes.update(indexes) + indexes, coords = _create_indexes_from_coords(coords) # These fully describe a DataArray self._variable = variable - assert isinstance(da_coords, dict) - self._coords = da_coords + assert isinstance(coords, dict) + self._coords = coords self._name = name - self._indexes = da_indexes # type: ignore[assignment] + self._indexes = indexes # type: ignore[assignment] self._close = None @@ -3702,28 +3687,6 @@ def reduce( var = self.variable.reduce(func, dim, axis, keep_attrs, keepdims, **kwargs) return self._replace_maybe_drop_dims(var) - def assign_indexes(self, indexes: Indexes[Index]): - """Assign new indexes to this dataarray. - - Returns a new dataarray with all the original data in addition to the new - indexes (and their corresponding coordinates). - - Parameters - ---------- - indexes : :py:class:`~xarray.Indexes`. - A collection of :py:class:`~xarray.indexes.Index` objects - to assign (including their coordinate variables). - - Returns - ------- - assigned : DataArray - A new dataarray with the new indexes and coordinates in addition to - the existing data. - """ - # TODO: check indexes.dims must be a subset of self.dims - ds = self._to_temp_dataset().assign_indexes(indexes) - return self._from_temp_dataset(ds) - def to_pandas(self) -> DataArray | pd.Series | pd.DataFrame: """Convert this array into a pandas object with the same shape. diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index a9ebe6c1470..33538c8aa57 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -454,8 +454,12 @@ class Dataset( By default, pandas indexes are created for one dimensional variables with name equal to their dimension so those variables can be used as coordinates - for label based indexing. Xarray-compatible indexes may also be provided - via the `indexes` argument. + for label based indexing. When an Xarray ``Coordinates`` object is passed to + ``coords``, any existing index(es) built from those coordinates will be + added to the Dataset (such ``Coordinates`` objects are returned by the + :py:attr:`~xarray.Dataset.coords` and :py:attr:`~xarray.DataArray.coords` + properties or may be created directly, e.g., with + :py:meth:`~xarray.IndexedCoordinates.from_pandas_multiindex`). To load data from a file or file-like object, use the `open_dataset` function. @@ -488,8 +492,8 @@ class Dataset( varying/measured/dependent quantities that belong in `variables`. Coordinates values may be given by 1-dimensional arrays or scalars, in which case `dims` do not need to be - supplied: 1D arrays will be assumed to give index values along - the dimension with the same name. + supplied: by default 1D arrays will be assumed to give index + values along the dimension with the same name. The following notations are accepted: @@ -601,26 +605,12 @@ def __init__( data_vars: Mapping[Any, Any] | None = None, coords: Mapping[Any, Any] | None = None, attrs: Mapping[Any, Any] | None = None, - indexes: Mapping[Any, Index] | None = None, ) -> None: if data_vars is None: data_vars = {} if coords is None: coords = {} - if indexes is None: - create_default_indexes = True - indexes = Indexes() - elif len(indexes) == 0: - create_default_indexes = False - indexes = Indexes() - else: - create_default_indexes = True - if not isinstance(indexes, Indexes): - raise TypeError("non-empty indexes must be an instance of `Indexes`") - elif indexes._index_type != Index: - raise TypeError("indexes must only contain Xarray `Index` objects") - both_data_and_coords = set(data_vars) & set(coords) if both_data_and_coords: raise ValueError( @@ -630,34 +620,17 @@ def __init__( if isinstance(coords, Dataset): coords = coords.variables - variables, coord_names, dims, ds_indexes, _ = merge_data_and_coords( - data_vars, - coords, - compat="broadcast_equals", - create_default_indexes=create_default_indexes, + variables, coord_names, dims, indexes, _ = merge_data_and_coords( + data_vars, coords, compat="broadcast_equals" ) - both_indexes_and_coords = set(indexes) & coord_names - if both_indexes_and_coords: - raise ValueError( - f"{both_indexes_and_coords} are found in both indexes and coords" - ) - - variables.update({k: v.copy(deep=False) for k, v in indexes.variables.items()}) - coord_names.update(indexes.variables) - ds_indexes.update(indexes) - - # re-calculate dimensions if indexes are given explicitly - if indexes: - dims = calculate_dimensions(variables) - self._attrs = dict(attrs) if attrs is not None else None self._close = None self._encoding = None self._variables = variables self._coord_names = coord_names self._dims = dims - self._indexes = ds_indexes + self._indexes = indexes @classmethod def load_store(cls: type[T_Dataset], store, decoder=None) -> T_Dataset: @@ -6117,30 +6090,6 @@ def assign( data.update(results) return data - def assign_indexes(self, indexes: Indexes[Index]): - """Assign new indexes to this dataset. - - Returns a new dataset with all the original data in addition to the new - indexes (and their corresponding coordinates). - - Parameters - ---------- - indexes : :py:class:`~xarray.Indexes`. - A collection of :py:class:`~xarray.indexes.Index` objects - to assign (including their coordinate variables). - - Returns - ------- - assigned : Dataset - A new dataset with the new indexes and coordinates in addition to - the existing data. - """ - ds_indexes = Dataset(indexes=indexes) - dropped = self.drop_vars(indexes, errors="ignore") - return dropped.merge( - ds_indexes, compat="minimal", join="override", combine_attrs="no_conflicts" - ) - def to_array( self, dim: Hashable = "variable", name: Hashable | None = None ) -> DataArray: diff --git a/xarray/core/merge.py b/xarray/core/merge.py index b04fcf57a52..b2b838f489d 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -320,7 +320,6 @@ def merge_collected( def collect_variables_and_indexes( list_of_mappings: list[DatasetLike], indexes: Mapping[Any, Any] | None = None, - create_default_indexes: bool = True, ) -> dict[Hashable, list[MergeElement]]: """Collect variables and indexes from list of mappings of xarray objects. @@ -367,7 +366,7 @@ def append_all(variables, indexes): variable = as_variable(variable, name=name) if name in indexes: append(name, variable, indexes[name]) - elif variable.dims == (name,) and create_default_indexes: + elif variable.dims == (name,): idx, idx_vars = create_default_index_implicit(variable) append_all(idx_vars, {k: idx for k in idx_vars}) else: @@ -574,13 +573,16 @@ def merge_data_and_coords( coords, compat="broadcast_equals", join="outer", - create_default_indexes=True, ): """Used in Dataset.__init__.""" - if create_default_indexes: - indexes, coords = _create_indexes_from_coords(coords, data_vars) + from xarray.core.coordinates import Coordinates + + if isinstance(coords, Coordinates): + indexes = coords.xindexes + coords = coords.variables else: - indexes = {} + indexes, coords = _create_indexes_from_coords(coords, data_vars) + objects = [data_vars, coords] explicit_coords = coords.keys() return merge_core( @@ -588,8 +590,7 @@ def merge_data_and_coords( compat, join, explicit_coords=explicit_coords, - indexes=Indexes(indexes, {k: coords[k] for k in indexes}), - create_default_indexes=create_default_indexes, + indexes=Indexes(indexes, coords), ) @@ -716,7 +717,6 @@ def merge_core( explicit_coords: Sequence | None = None, indexes: Mapping[Any, Any] | None = None, fill_value: object = dtypes.NA, - create_default_indexes: bool = True, ) -> _MergeResult: """Core logic for merging labeled objects. @@ -742,8 +742,6 @@ def merge_core( may be cast to pandas.Index objects. fill_value : scalar, optional Value to use for newly missing values - create_default_indexes : bool, optional - If True, create default (pandas) indexes for dimension coordinates. Returns ------- @@ -769,9 +767,7 @@ def merge_core( aligned = deep_align( coerced, join=join, copy=False, indexes=indexes, fill_value=fill_value ) - collected = collect_variables_and_indexes( - aligned, indexes=indexes, create_default_indexes=create_default_indexes - ) + collected = collect_variables_and_indexes(aligned, indexes=indexes) prioritized = _get_priority_vars_and_indexes(aligned, priority_arg, compat=compat) variables, out_indexes = merge_collected( collected, prioritized, compat=compat, combine_attrs=combine_attrs From 4c559f16ed39aa7f5c4deb37ff1762fd30c919b0 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Thu, 8 Dec 2022 18:04:30 +0100 Subject: [PATCH 16/69] update docstrings --- xarray/core/dataarray.py | 8 ++++---- xarray/core/dataset.py | 5 ----- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 662aafedb7f..3f433c2fb8c 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -314,10 +314,10 @@ class DataArray( Attributes to assign to the new instance. By default, an empty attribute dictionary is initialized. indexes : py:class:`~xarray.Indexes` or dict-like, optional - A collection of :py:class:`~xarray.indexes.Index` objects and - their coordinates variables. If an empty collection is given, - it will skip the creation of default (pandas) indexes for - dimension coordinates. + For internal use only. For passing indexes objects to the + new DataArray, use the ``coords`` argument instead with an + Xarray ``Coordinate`` object (both coordinate variables and + indexes will be extracted from the latter). Examples -------- diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 33538c8aa57..fac7eba4f72 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -510,11 +510,6 @@ class Dataset( attrs : dict-like, optional Global attributes to save on this dataset. - indexes : py:class:`~xarray.Indexes` or dict-like, optional - A collection of :py:class:`~xarray.indexes.Index` objects and - their coordinates variables. If an empty collection is given, - it will skip the creation of default (pandas) indexes for - dimension coordinates. Examples -------- From 1192948402a7b13e3fb6523566ab4a8232dac20a Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Thu, 8 Dec 2022 18:24:08 +0100 Subject: [PATCH 17/69] fix Dataset creation internal error --- xarray/core/merge.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/xarray/core/merge.py b/xarray/core/merge.py index b2b838f489d..4181f985666 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -585,12 +585,13 @@ def merge_data_and_coords( objects = [data_vars, coords] explicit_coords = coords.keys() + indexed_coords = {k: v for k, v in coords.items() if k in indexes} return merge_core( objects, compat, join, explicit_coords=explicit_coords, - indexes=Indexes(indexes, coords), + indexes=Indexes(indexes, indexed_coords), ) From a877a7479e3fa55bbd6a061f39a6f3770ae62b33 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Fri, 9 Dec 2022 09:53:11 +0100 Subject: [PATCH 18/69] add IndexedCoordinates.merge_coords --- xarray/core/coordinates.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index 3822e98c6a6..a78e4e5d365 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -581,6 +581,14 @@ def _to_dataset(self) -> Dataset: def to_dataset(self) -> Dataset: return self._data.copy() + def merge_coords(self, other: Mapping[Any, Any] | None = None) -> Coordinates: + from xarray.core.dataset import Dataset + + if not isinstance(other, Coordinates): + other = Dataset(coords=other).coords + + return self.merge(other).coords + def __getitem__(self, key: Hashable) -> DataArray: return self._data[key] From 9d6d2ae81ed2154364d54ab3fe2657b755e4691f Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Mon, 12 Dec 2022 12:26:23 +0100 Subject: [PATCH 19/69] drop IndexedCoordinates and reuse Coordinates --- xarray/__init__.py | 4 +- xarray/core/coordinates.py | 318 +++++++++++++++++++++---------------- 2 files changed, 182 insertions(+), 140 deletions(-) diff --git a/xarray/__init__.py b/xarray/__init__.py index 1e75a947320..64522d73fdd 100644 --- a/xarray/__init__.py +++ b/xarray/__init__.py @@ -27,7 +27,7 @@ where, ) from xarray.core.concat import concat -from xarray.core.coordinates import IndexedCoordinates +from xarray.core.coordinates import Coordinates from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset from xarray.core.extensions import ( @@ -101,9 +101,9 @@ "CFTimeIndex", "Context", "Coordinate", + "Coordinates", "DataArray", "Dataset", - "IndexedCoordinates", "IndexVariable", "Variable", # Exceptions diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index a78e4e5d365..10f7b4f51db 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -2,7 +2,16 @@ import warnings from contextlib import contextmanager -from typing import TYPE_CHECKING, Any, Hashable, Iterator, List, Mapping, Sequence +from typing import ( + TYPE_CHECKING, + Any, + Generic, + Hashable, + Iterator, + List, + Mapping, + Sequence, +) import numpy as np import pandas as pd @@ -15,14 +24,14 @@ assert_no_index_corrupted, ) from xarray.core.merge import merge_coordinates_without_align, merge_coords +from xarray.core.types import T_DataArray from xarray.core.utils import Frozen, ReprObject -from xarray.core.variable import Variable, calculate_dimensions +from xarray.core.variable import Variable, as_variable, calculate_dimensions if TYPE_CHECKING: from xarray.core.common import DataWithCoords from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset - from xarray.core.types import T_DataArray # Used as the key corresponding to a DataArray's variable when converting # arbitrary DataArray objects to datasets @@ -32,7 +41,7 @@ GenericAlias = type(List[int]) -class Coordinates(Mapping[Hashable, "T_DataArray"]): +class AbstractCoordinates(Mapping[Hashable, "T_DataArray"]): _data: DataWithCoords __slots__ = ("_data",) @@ -176,6 +185,125 @@ def update(self, other: Mapping[Any, Any]) -> None: ) self._update_coords(coords, indexes) + +class Coordinates(AbstractCoordinates): + """Dictionary like container for Xarray coordinates (variables + indexes). + + This collection can be passed directly to the :py:class:`~xarray.Dataset` + and :py:class:`~xarray.DataArray` constructors via their `coords` argument. + This will add both the coordinates variables and their index. + + Most often coordinates are returned via the :py:attr:`Dataset.coords` and + :py:attr:`DataArray.coords` properties. In occasional cases they are built + from index objects (e.g., :py:meth:`Coordinates.from_pandas_multiindex`). In + rare cases they are built directly from coordinate data and index objects + (beware that no consistency check is done on those inputs). + + Parameters + ---------- + coords: dict-like + Mapping of coordinate names to any object that can be converted + into a :py:class:`Variable`. + indexes: dict-like + Mapping of coordinate names to :py:class:`Index` objects. + + """ + + _data: DataWithCoords + + __slots__ = ("_data",) + + def __init__(self, coords: Mapping[Any, Any], indexes: Mapping[Any, Index]): + from xarray.core.dataset import Dataset + + if isinstance(coords, Coordinates): + variables = dict(coords.variables) + else: + variables = {k: as_variable(v) for k, v in coords.items()} + + indexes = {} + for k, idx in indexes.items(): + if not isinstance(idx, Index): + raise TypeError(f"'{k}' is not an Xarray Index") + indexes[k] = idx + + self._data = Dataset._construct_direct( + coord_names=set(coords), variables=variables, indexes=indexes + ) + + @classmethod + def from_pandas_multiindex(cls, midx: pd.MultiIndex, dim: str) -> Coordinates: + """Wrap a pandas multi-index as Xarray coordinates (dimension + levels). + + The returned coordinates can be directly assigned to a + :py:class:`~xarray.Dataset` or :py:class:`~xarray.DataArray` via the + ``coords`` argument of their constructor. + + Parameters + ---------- + midx : :py:class:`pandas.MultiIndex` + Pandas multi-index object. + dim : str + Dimension name. + + Returns + ------- + coords : Coordinates + A collection of Xarray indexed coordinates created from the multi-index. + + """ + xr_idx = PandasMultiIndex(midx, dim) + + variables = xr_idx.create_variables() + indexes = {k: xr_idx for k in variables} + + return cls(coords=variables, indexes=indexes) + + @property + def _names(self) -> set[Hashable]: + return self._data._coord_names + + @property + def dims(self) -> Mapping[Hashable, int] | tuple[Hashable, ...]: + return self._data.dims + + @property + def dtypes(self) -> Frozen[Hashable, np.dtype]: + """Mapping from coordinate names to dtypes. + + Cannot be modified directly. + + See Also + -------- + Dataset.dtypes + """ + return Frozen({n: v.dtype for n, v in self._data.variables.items()}) + + @property + def variables(self) -> Mapping[Hashable, Variable]: + return self._data.variables + + def to_dataset(self) -> Dataset: + """Convert these coordinates into a new Dataset""" + return self._data.copy() + + def __getitem__(self, key: Hashable) -> DataArray: + return self._data[key] + + def __delitem__(self, key: Hashable) -> None: + # redirect to DatasetCoordinates.__delitem__ + del self._data.coords[key] + + def _update_coords( + self, coords: dict[Hashable, Variable], indexes: Mapping[Any, Index] + ) -> None: + # redirect to DatasetCoordinates._update_coords + self._data.coords._update_coords(coords, indexes) + + def _maybe_drop_multiindex_coords(self, coords: set[Hashable]) -> None: + # redirect to DatasetCoordinates._maybe_drop_multiindex_coords + self._data.coords._maybe_drop_multiindex_coords(coords) + def _merge_raw(self, other, reflexive): """For use with binary arithmetic.""" if other is None: @@ -205,7 +333,7 @@ def _merge_inplace(self, other): yield self._update_coords(variables, indexes) - def merge(self, other: Coordinates | None) -> Dataset: + def merge(self, other: Mapping[Any, Any] | None) -> Dataset: """Merge two sets of coordinates to create a new Dataset The method implements the logic used for joining coordinates in the @@ -219,8 +347,9 @@ def merge(self, other: Coordinates | None) -> Dataset: Parameters ---------- - other : DatasetCoordinates or DataArrayCoordinates - The coordinates from another dataset or data array. + other : dict-like, optional + A :py:class:`Coordinates` object or any mapping that can be turned + into coordinates. Returns ------- @@ -241,13 +370,48 @@ def merge(self, other: Coordinates | None) -> Dataset: variables=coords, coord_names=coord_names, indexes=indexes ) + def merge_coords(self, other: Mapping[Any, Any] | None = None) -> Coordinates: + """Merge two sets of coordinates to create a new :py:class:`Coordinates` + object. + + The method implements the logic used for joining coordinates in the + result of a binary operation performed on xarray objects: + + - If two index coordinates conflict (are not equal), an exception is + raised. You must align your data before passing it to this method. + - If an index coordinate and a non-index coordinate conflict, the non- + index coordinate is dropped. + - If two non-index coordinates conflict, both are dropped. + + Parameters + ---------- + other : dict-like, optional + A :py:class:`Coordinates` object or any mapping that can be turned + into coordinates. + + Returns + ------- + merged : Coordinates + A new Coordinates object with merged coordinates. + """ + from xarray.core.dataset import Dataset + + if not isinstance(other, Coordinates): + other = Dataset(coords=other).coords + + return self.merge(other).coords + + def _ipython_key_completions_(self): + """Provide method for the key-autocompletions in IPython.""" + return self._data._ipython_key_completions_() + class DatasetCoordinates(Coordinates): - """Dictionary like container for Dataset coordinates. + """Dictionary like container for Dataset coordinates (variables + indexes). - Essentially an immutable dictionary with keys given by the array's - dimensions and the values given by the corresponding xarray.Coordinate - objects. + This collection can be passed directly to the :py:class:`~xarray.Dataset` + and :py:class:`~xarray.DataArray` constructors via their `coords` argument. + This will add both the coordinates variables and their index. """ _data: Dataset @@ -348,11 +512,12 @@ def _ipython_key_completions_(self): ] -class DataArrayCoordinates(Coordinates["T_DataArray"]): - """Dictionary like container for DataArray coordinates. +class DataArrayCoordinates(Coordinates, Generic[T_DataArray]): + """Dictionary like container for DataArray coordinates (variables + indexes). - Essentially a dict with keys given by the array's - dimensions and the values given by corresponding DataArray objects. + This collection can be passed directly to the :py:class:`~xarray.Dataset` + and :py:class:`~xarray.DataArray` constructors via their `coords` argument. + This will add both the coordinates variables and their index. """ _data: T_DataArray @@ -482,126 +647,3 @@ def assert_coordinate_consistent( f"dimension coordinate {k!r} conflicts between " f"indexed and indexing objects:\n{obj[k]}\nvs.\n{coords[k]}" ) - - -class IndexedCoordinates(Coordinates): - """Dictionary like container for indexed coordinates. - - Essentially an immutable dictionary with keys given by variable - names and the values given by the corresponding xarray.Variable - objects. - - All coordinate variables in this collection are backed by one or more Xarray - indexes. - - This collection can be passed directly to the :py:class:`~xarray.Dataset` - and :py:class:`~xarray.DataArray` constructors via their `coords` argument. - This will add both the coordinates and their index. - """ - - _data: Dataset - _indexes: Indexes[Index] - - __slots__ = ("_data", "_indexes") - - def __init__( - self, - indexes: Mapping[Any, Index] | None = None, - variables: Mapping[Any, Variable] | None = None, - ): - self._indexes = Indexes(indexes, variables) - self._data = self._to_dataset() - - @classmethod - def from_pandas_multiindex( - cls, midx: pd.MultiIndex, dim: str - ) -> IndexedCoordinates: - """Wrap a pandas multi-index as Xarray-compatible indexes - and coordinates. - - This function returns an object that - - Parameters - ---------- - midx : :py:class:`pandas.MultiIndex` - Pandas multi-index object. - dim : str - Dimension name. - - Returns - ------- - coords : :py:class`~xarray.IndexedCoordinates` - A collection of Xarray indexed coordinates created from the multi-index. - The returned coordinates can be directly assigned to a - :py:class:`~xarray.Dataset` or :py:class:`~xarray.DataArray` (via the - ``coords`` argument of their constructor). - """ - xr_idx = PandasMultiIndex(midx, dim) - - variables = xr_idx.create_variables() - indexes = {k: xr_idx for k in variables} - - return cls(indexes=indexes, variables=variables) - - @property - def _names(self) -> set[Hashable]: - return self._data._coord_names - - @property - def dims(self) -> Mapping[Hashable, int]: - return self._data.dims - - @property - def dtypes(self) -> Frozen[Hashable, np.dtype]: - """Mapping from coordinate names to dtypes. - - Cannot be modified directly. - - See Also - -------- - Dataset.dtypes - """ - return Frozen({n: v.dtype for n, v in self._data.variables.items()}) - - @property - def variables(self) -> Mapping[Hashable, Variable]: - return self._data.variables - - def _to_dataset(self) -> Dataset: - """Convert these coordinates into a new Dataset""" - from xarray.core.dataset import Dataset - - return Dataset._construct_direct( - coord_names=set(self._indexes), - dims=dict(self._indexes.dims), - variables=self._indexes._variables, - indexes=self._indexes._indexes, - ) - - def to_dataset(self) -> Dataset: - return self._data.copy() - - def merge_coords(self, other: Mapping[Any, Any] | None = None) -> Coordinates: - from xarray.core.dataset import Dataset - - if not isinstance(other, Coordinates): - other = Dataset(coords=other).coords - - return self.merge(other).coords - - def __getitem__(self, key: Hashable) -> DataArray: - return self._data[key] - - def update(self, other: Mapping[Any, Any]) -> None: - raise TypeError( - "IndexedCoordinates is immutable and can not be modified inplace" - ) - - def __delitem__(self, key: Hashable) -> None: - raise TypeError( - "IndexedCoordinates is immutable and can not be modified inplace" - ) - - def _ipython_key_completions_(self): - """Provide method for the key-autocompletions in IPython.""" - return self._data._ipython_key_completions_() From 3ee26ef5e39dc1610b322a8ecca4bc7554c07701 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Mon, 12 Dec 2022 12:38:28 +0100 Subject: [PATCH 20/69] update api docs --- doc/api-hidden.rst | 42 +++++++++++++++++++++++++++++--------- doc/api.rst | 1 + xarray/core/coordinates.py | 2 +- 3 files changed, 34 insertions(+), 11 deletions(-) diff --git a/doc/api-hidden.rst b/doc/api-hidden.rst index 04013d545c3..753ccfd1683 100644 --- a/doc/api-hidden.rst +++ b/doc/api-hidden.rst @@ -9,17 +9,36 @@ .. autosummary:: :toctree: generated/ + Coordinates.from_pandas_multiindex + Coordinates.get + Coordinates.items + Coordinates.keys + Coordinates.values + Coordinates.dims + Coordinates.dtypes + Coordinates.variables + Coordinates.xindexes + Coordinates.indexes + Coordinates.to_dataset + Coordinates.to_index + Coordinates.update + Coordinates.merge + Coordinates.merge_coords + core.coordinates.DatasetCoordinates.get core.coordinates.DatasetCoordinates.items core.coordinates.DatasetCoordinates.keys - core.coordinates.DatasetCoordinates.merge - core.coordinates.DatasetCoordinates.to_dataset - core.coordinates.DatasetCoordinates.to_index - core.coordinates.DatasetCoordinates.update core.coordinates.DatasetCoordinates.values core.coordinates.DatasetCoordinates.dims - core.coordinates.DatasetCoordinates.indexes + core.coordinates.DatasetCoordinates.dtypes core.coordinates.DatasetCoordinates.variables + core.coordinates.DatasetCoordinates.xindexes + core.coordinates.DatasetCoordinates.indexes + core.coordinates.DatasetCoordinates.to_dataset + core.coordinates.DatasetCoordinates.to_index + core.coordinates.DatasetCoordinates.update + core.coordinates.DatasetCoordinates.merge + core.coordinates.DatasetCoordinates.merge_coords core.rolling.DatasetCoarsen.boundary core.rolling.DatasetCoarsen.coord_func @@ -47,14 +66,17 @@ core.coordinates.DataArrayCoordinates.get core.coordinates.DataArrayCoordinates.items core.coordinates.DataArrayCoordinates.keys - core.coordinates.DataArrayCoordinates.merge - core.coordinates.DataArrayCoordinates.to_dataset - core.coordinates.DataArrayCoordinates.to_index - core.coordinates.DataArrayCoordinates.update core.coordinates.DataArrayCoordinates.values core.coordinates.DataArrayCoordinates.dims - core.coordinates.DataArrayCoordinates.indexes + core.coordinates.DataArrayCoordinates.dtypes core.coordinates.DataArrayCoordinates.variables + core.coordinates.DataArrayCoordinates.xindexes + core.coordinates.DataArrayCoordinates.indexes + core.coordinates.DataArrayCoordinates.to_dataset + core.coordinates.DataArrayCoordinates.to_index + core.coordinates.DataArrayCoordinates.update + core.coordinates.DataArrayCoordinates.merge + core.coordinates.DataArrayCoordinates.merge_coords core.rolling.DataArrayCoarsen.boundary core.rolling.DataArrayCoarsen.coord_func diff --git a/doc/api.rst b/doc/api.rst index 0d56fc73997..3e052c6b51a 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -1083,6 +1083,7 @@ Advanced API .. autosummary:: :toctree: generated/ + Coordinates Dataset.variables DataArray.variable Variable diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index 10f7b4f51db..92fff48e39d 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -205,7 +205,7 @@ class Coordinates(AbstractCoordinates): Mapping of coordinate names to any object that can be converted into a :py:class:`Variable`. indexes: dict-like - Mapping of coordinate names to :py:class:`Index` objects. + Mapping of coordinate names to :py:class:`~indexes.Index` objects. """ From dd02ecac323df503efc67a54203053ff08d4f44d Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Mon, 12 Dec 2022 13:17:07 +0100 Subject: [PATCH 21/69] make Coordinates init args optional --- xarray/core/coordinates.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index 92fff48e39d..dd3bd4cff11 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -213,22 +213,31 @@ class Coordinates(AbstractCoordinates): __slots__ = ("_data",) - def __init__(self, coords: Mapping[Any, Any], indexes: Mapping[Any, Index]): + def __init__( + self, + coords: Mapping[Any, Any] | None = None, + indexes: Mapping[Any, Index] | None = None, + ): from xarray.core.dataset import Dataset - if isinstance(coords, Coordinates): + if coords is None: + variables = {} + elif isinstance(coords, Coordinates): variables = dict(coords.variables) else: variables = {k: as_variable(v) for k, v in coords.items()} - indexes = {} + if indexes is None: + indexes = {} + else: + indexes = dict(indexes) + for k, idx in indexes.items(): if not isinstance(idx, Index): raise TypeError(f"'{k}' is not an Xarray Index") - indexes[k] = idx self._data = Dataset._construct_direct( - coord_names=set(coords), variables=variables, indexes=indexes + coord_names=set(variables), variables=variables, indexes=indexes ) @classmethod From 0ee8f95658ceb8f6108440ee9d71bb70f0aa7fb9 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Mon, 12 Dec 2022 13:35:43 +0100 Subject: [PATCH 22/69] docstrings updates --- xarray/core/coordinates.py | 29 ++++++++++++++++++++--------- xarray/core/dataarray.py | 18 +++++++++++++----- xarray/core/dataset.py | 21 ++++++++++++--------- 3 files changed, 45 insertions(+), 23 deletions(-) diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index dd3bd4cff11..438f9ed9ba6 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -189,20 +189,27 @@ def update(self, other: Mapping[Any, Any]) -> None: class Coordinates(AbstractCoordinates): """Dictionary like container for Xarray coordinates (variables + indexes). - This collection can be passed directly to the :py:class:`~xarray.Dataset` - and :py:class:`~xarray.DataArray` constructors via their `coords` argument. - This will add both the coordinates variables and their index. + This collection is a mapping of coordinate names to + :py:class:`~xarray.DataArray` objects. + + It can be passed directly to the :py:class:`~xarray.Dataset` and + :py:class:`~xarray.DataArray` constructors via their `coords` argument. This + will add both the coordinates variables and their index. + + Coordinates are either: + + - returned via the :py:attr:`Dataset.coords` and :py:attr:`DataArray.coords` + properties. + - built from index objects (e.g., :py:meth:`Coordinates.from_pandas_multiindex`). + - built directly from coordinate data and index objects (beware that no consistency + check is done on those inputs). - Most often coordinates are returned via the :py:attr:`Dataset.coords` and - :py:attr:`DataArray.coords` properties. In occasional cases they are built - from index objects (e.g., :py:meth:`Coordinates.from_pandas_multiindex`). In - rare cases they are built directly from coordinate data and index objects - (beware that no consistency check is done on those inputs). + In the latter case, no default (pandas) index is created. Parameters ---------- coords: dict-like - Mapping of coordinate names to any object that can be converted + Mapping of coordinate names to any objects that can be converted into a :py:class:`Variable`. indexes: dict-like Mapping of coordinate names to :py:class:`~indexes.Index` objects. @@ -218,6 +225,10 @@ def __init__( coords: Mapping[Any, Any] | None = None, indexes: Mapping[Any, Index] | None = None, ): + # When coordinates are constructed directly, an internal Dataset is + # created so that it is compatible with the DatasetCoordinates and + # DataArrayCoordinates classes serving as a proxy for the data. + # TODO: refactor DataArray / Dataset so that Coordinates store the data. from xarray.core.dataset import Dataset if coords is None: diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 3f433c2fb8c..79187fcf0fe 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -315,9 +315,9 @@ class DataArray( attribute dictionary is initialized. indexes : py:class:`~xarray.Indexes` or dict-like, optional For internal use only. For passing indexes objects to the - new DataArray, use the ``coords`` argument instead with an - Xarray ``Coordinate`` object (both coordinate variables and - indexes will be extracted from the latter). + new DataArray, use the ``coords`` argument instead with a + :py:class:`~xarray.Coordinate` object (both coordinate variables + and indexes will be extracted from the latter). Examples -------- @@ -925,12 +925,20 @@ def indexes(self) -> Indexes: @property def xindexes(self) -> Indexes: - """Mapping of xarray Index objects used for label based indexing.""" + """Mapping of :py:class:`~xarray.indexes.Index` objects + used for label based indexing. + """ return Indexes(self._indexes, {k: self._coords[k] for k in self._indexes}) @property def coords(self) -> DataArrayCoordinates: - """Dictionary-like container of coordinate arrays.""" + """Mapping of :py:class:`~xarray.DataArray` objects corresponding to + coordinate variables. + + See Also + -------- + Coordinates + """ return DataArrayCoordinates(self) @overload diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index fac7eba4f72..42905c98b2a 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -454,12 +454,9 @@ class Dataset( By default, pandas indexes are created for one dimensional variables with name equal to their dimension so those variables can be used as coordinates - for label based indexing. When an Xarray ``Coordinates`` object is passed to - ``coords``, any existing index(es) built from those coordinates will be - added to the Dataset (such ``Coordinates`` objects are returned by the - :py:attr:`~xarray.Dataset.coords` and :py:attr:`~xarray.DataArray.coords` - properties or may be created directly, e.g., with - :py:meth:`~xarray.IndexedCoordinates.from_pandas_multiindex`). + for label based indexing. When a :py:class:`~xarray.Coordinates` object is + passed to ``coords``, any existing index(es) built from those coordinates + will be added to the Dataset. To load data from a file or file-like object, use the `open_dataset` function. @@ -1679,13 +1676,19 @@ def indexes(self) -> Indexes[pd.Index]: @property def xindexes(self) -> Indexes[Index]: - """Mapping of xarray Index objects used for label based indexing.""" + """Mapping of :py:class:`~xarray.indexes.Index` objects + used for label based indexing. + """ return Indexes(self._indexes, {k: self._variables[k] for k in self._indexes}) @property def coords(self) -> DatasetCoordinates: - """Dictionary of xarray.DataArray objects corresponding to coordinate - variables + """Mapping of :py:class:`~xarray.DataArray` objects corresponding to + coordinate variables. + + See Also + -------- + Coordinates """ return DatasetCoordinates(self) From fc6c948486e432e68df2b34e5ae9d99f62b9a4f1 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Mon, 12 Dec 2022 16:04:48 +0100 Subject: [PATCH 23/69] convert to base variable when no index is given --- xarray/core/coordinates.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index 438f9ed9ba6..83a611e6232 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -247,6 +247,11 @@ def __init__( if not isinstance(idx, Index): raise TypeError(f"'{k}' is not an Xarray Index") + # maybe convert to base variable + for k, v in variables.items(): + if k not in indexes: + variables[k] = v.to_base_variable() + self._data = Dataset._construct_direct( coord_names=set(variables), variables=variables, indexes=indexes ) From 0572b9679ad2e325943205b82b83015314d147fa Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Mon, 12 Dec 2022 16:05:23 +0100 Subject: [PATCH 24/69] raise when an index is given with no variable --- xarray/core/coordinates.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index 83a611e6232..194ea0b17c6 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -243,6 +243,12 @@ def __init__( else: indexes = dict(indexes) + no_coord_index = set(indexes) - set(variables) + if no_coord_index: + raise ValueError( + f"no coordinate variables found for these indexes: {no_coord_index}" + ) + for k, idx in indexes.items(): if not isinstance(idx, Index): raise TypeError(f"'{k}' is not an Xarray Index") From 6f5114bea30ae3d24e797c23383657a44257e396 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Mon, 12 Dec 2022 16:06:22 +0100 Subject: [PATCH 25/69] skip create default indexes... ... When a Coordinates object is given to the Dataset constructor --- xarray/core/merge.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/xarray/core/merge.py b/xarray/core/merge.py index 4181f985666..8bda33cea3f 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -320,6 +320,7 @@ def merge_collected( def collect_variables_and_indexes( list_of_mappings: list[DatasetLike], indexes: Mapping[Any, Any] | None = None, + create_default_indexes: bool = True, ) -> dict[Hashable, list[MergeElement]]: """Collect variables and indexes from list of mappings of xarray objects. @@ -366,7 +367,7 @@ def append_all(variables, indexes): variable = as_variable(variable, name=name) if name in indexes: append(name, variable, indexes[name]) - elif variable.dims == (name,): + elif variable.dims == (name,) and create_default_indexes: idx, idx_vars = create_default_index_implicit(variable) append_all(idx_vars, {k: idx for k in idx_vars}) else: @@ -592,6 +593,7 @@ def merge_data_and_coords( join, explicit_coords=explicit_coords, indexes=Indexes(indexes, indexed_coords), + create_default_indexes=False, ) @@ -718,6 +720,7 @@ def merge_core( explicit_coords: Sequence | None = None, indexes: Mapping[Any, Any] | None = None, fill_value: object = dtypes.NA, + create_default_indexes: bool = True, ) -> _MergeResult: """Core logic for merging labeled objects. @@ -743,6 +746,8 @@ def merge_core( may be cast to pandas.Index objects. fill_value : scalar, optional Value to use for newly missing values + create_default_indexes : bool, optional + If True, create default (pandas) indexes for dimension coordinates. Returns ------- @@ -768,7 +773,9 @@ def merge_core( aligned = deep_align( coerced, join=join, copy=False, indexes=indexes, fill_value=fill_value ) - collected = collect_variables_and_indexes(aligned, indexes=indexes) + collected = collect_variables_and_indexes( + aligned, indexes=indexes, create_default_indexes=create_default_indexes + ) prioritized = _get_priority_vars_and_indexes(aligned, priority_arg, compat=compat) variables, out_indexes = merge_collected( collected, prioritized, compat=compat, combine_attrs=combine_attrs From e27830ab66b5a995603880e105970c0179596ce0 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Mon, 12 Dec 2022 16:07:41 +0100 Subject: [PATCH 26/69] invariant checks: maybe skip IndexVariable checks ... when check_default_indexes is False. --- xarray/testing.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/xarray/testing.py b/xarray/testing.py index 481a23340fd..bd2a7f6a698 100644 --- a/xarray/testing.py +++ b/xarray/testing.py @@ -363,14 +363,16 @@ def _assert_dataset_invariants(ds: Dataset, check_default_indexes: bool): assert all( ds._dims[k] == v.sizes[k] for v in ds._variables.values() for k in v.sizes ), (ds._dims, {k: v.sizes for k, v in ds._variables.items()}) - assert all( - isinstance(v, IndexVariable) - for (k, v) in ds._variables.items() - if v.dims == (k,) - ), {k: type(v) for k, v in ds._variables.items() if v.dims == (k,)} - assert all(v.dims == (k,) for (k, v) in ds._variables.items() if k in ds._dims), { - k: v.dims for k, v in ds._variables.items() if k in ds._dims - } + + if check_default_indexes: + assert all( + isinstance(v, IndexVariable) + for (k, v) in ds._variables.items() + if v.dims == (k,) + ), {k: type(v) for k, v in ds._variables.items() if v.dims == (k,)} + assert all( + v.dims == (k,) for (k, v) in ds._variables.items() if k in ds._dims + ), {k: v.dims for k, v in ds._variables.items() if k in ds._dims} if ds._indexes is not None: _assert_indexes_invariants_checks( From 1649fb8027dafea0242a9678a768b96d9c75410c Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Mon, 12 Dec 2022 16:08:47 +0100 Subject: [PATCH 27/69] add Coordinates tests --- xarray/tests/test_coordinates.py | 93 ++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 xarray/tests/test_coordinates.py diff --git a/xarray/tests/test_coordinates.py b/xarray/tests/test_coordinates.py new file mode 100644 index 00000000000..01072e57fb2 --- /dev/null +++ b/xarray/tests/test_coordinates.py @@ -0,0 +1,93 @@ +from __future__ import annotations + +import pandas as pd +import pytest + +from xarray.core.coordinates import Coordinates +from xarray.core.dataarray import DataArray +from xarray.core.dataset import Dataset +from xarray.core.indexes import PandasIndex, PandasMultiIndex +from xarray.tests import assert_identical + + +class TestCoordinates: + @pytest.fixture + def coords(self) -> Coordinates: + ds = Dataset(coords={"x": [0, 1, 2]}) + return Coordinates(coords=ds.coords, indexes=ds.xindexes) + + def test_init_noindex(self) -> None: + coords = Coordinates(coords={"foo": ("x", [0, 1, 2])}) + expected = Dataset(coords={"foo": ("x", [0, 1, 2])}) + assert_identical(coords.to_dataset(), expected) + + def test_init_from_coords(self) -> None: + expected = Dataset(coords={"foo": ("x", [0, 1, 2])}) + coords = Coordinates(coords=expected.coords) + assert_identical(coords.to_dataset(), expected) + + # default index + expected = Dataset(coords={"x": ("x", [0, 1, 2])}) + coords = Coordinates(coords=expected.coords, indexes=expected.xindexes) + assert_identical(coords.to_dataset(), expected) + + def test_init_empty(self) -> None: + coords = Coordinates() + assert len(coords) == 0 + + def test_init_index_error(self) -> None: + idx = PandasIndex([1, 2, 3], "x") + with pytest.raises(ValueError, match="no coordinate variables found"): + Coordinates(indexes={"x": idx}) + + with pytest.raises(TypeError, match=".* is not an Xarray Index"): + Coordinates(coords={"x": ("x", [1, 2, 3])}, indexes={"x": "not_an_xarray_index"}) # type: ignore + + def test_from_pandas_multiindex(self) -> None: + midx = pd.MultiIndex.from_product([["a", "b"], [1, 2]], names=("one", "two")) + coords = Coordinates.from_pandas_multiindex(midx, "x") + + assert coords.xindexes["x"].index.equals(midx) + assert coords.xindexes["x"].dim == "x" + + expected = PandasMultiIndex(midx, "x").create_variables() + assert list(coords.variables) == list(expected) + for name in ("x", "one", "two"): + assert_identical(expected[name], coords.variables[name]) + + def test_dims(self, coords): + assert coords.dims == {"x": 3} + + def test_dtypes(self, coords): + assert coords.dtypes == {"x": int} + + def test_getitem(self, coords): + assert_identical( + coords["x"], + DataArray([0, 1, 2], coords={"x": [0, 1, 2]}, name="x"), + ) + + def test_delitem(self, coords): + del coords["x"] + assert "x" not in coords + + def test_update(self, coords): + coords.update({"y": ("y", [4, 5, 6])}) + assert "y" in coords + assert "y" in coords.xindexes + expected = DataArray([4, 5, 6], coords={"y": [4, 5, 6]}, name="y") + assert_identical(coords["y"], expected) + + def test_merge_coords(self, coords): + other = {"y": ("y", [4, 5, 6])} + actual = coords.merge_coords(other) + expected = coords.merge(other).coords + assert_identical(actual.to_dataset(), expected.to_dataset()) + + other = Coordinates(other) + actual = coords.merge_coords(other) + expected = coords.merge(other).coords + assert_identical( + actual.to_dataset(), expected.to_dataset(), check_default_indexes=False + ) + assert "y" not in actual.xindexes From 298fccd9ec68341a60558ca8e747fb397798167d Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Mon, 12 Dec 2022 16:17:59 +0100 Subject: [PATCH 28/69] more Coordinates tests --- xarray/tests/test_coordinates.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/xarray/tests/test_coordinates.py b/xarray/tests/test_coordinates.py index 01072e57fb2..ee588d50fa1 100644 --- a/xarray/tests/test_coordinates.py +++ b/xarray/tests/test_coordinates.py @@ -43,6 +43,10 @@ def test_init_index_error(self) -> None: with pytest.raises(TypeError, match=".* is not an Xarray Index"): Coordinates(coords={"x": ("x", [1, 2, 3])}, indexes={"x": "not_an_xarray_index"}) # type: ignore + def test_init_dim_sizes_conflict(self) -> None: + with pytest.raises(ValueError): + Coordinates(coords={"foo": ("x", [1, 2]), "bar": ("x", [1, 2, 3, 4])}) + def test_from_pandas_multiindex(self) -> None: midx = pd.MultiIndex.from_product([["a", "b"], [1, 2]], names=("one", "two")) coords = Coordinates.from_pandas_multiindex(midx, "x") From e8c627c5d587d001a839d87218fb5d506df66b27 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Mon, 12 Dec 2022 16:18:39 +0100 Subject: [PATCH 29/69] add Dataset constructor tests with Coordinates --- xarray/tests/test_dataset.py | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 0d3be9d378b..6dcf9d08275 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -30,7 +30,7 @@ from xarray.coding.cftimeindex import CFTimeIndex from xarray.core import dtypes, indexing, utils from xarray.core.common import duck_array_ops, full_like -from xarray.core.coordinates import DatasetCoordinates +from xarray.core.coordinates import Coordinates, DatasetCoordinates from xarray.core.indexes import Index, PandasIndex from xarray.core.pycompat import array_type, integer_types from xarray.core.utils import is_scalar @@ -577,6 +577,29 @@ def test_constructor_with_coords(self) -> None: Dataset({}, {"x": mindex, "y": mindex}) Dataset({}, {"x": mindex, "level_1": range(4)}) + def test_constructor_no_default_index(self) -> None: + # explicitly passing a Coordinates object skips the creation of default index + ds = Dataset(coords=Coordinates({"x": ("x", [1, 2, 3])})) + assert "x" in ds + assert "x" not in ds.xindexes + + def test_constructor_multiindex(self) -> None: + midx = pd.MultiIndex.from_product([["a", "b"], [1, 2]], names=("one", "two")) + coords = Coordinates.from_pandas_multiindex(midx, "x") + + ds = Dataset(coords=coords) + assert_identical(ds, coords.to_dataset()) + + def test_constructor_custom_index(self) -> None: + class CustomIndex(Index): + ... + + coords = Coordinates( + coords={"x": ("x", [1, 2, 3])}, indexes={"x": CustomIndex()} + ) + ds = Dataset(coords=coords) + assert isinstance(ds.xindexes["x"], CustomIndex) + def test_properties(self) -> None: ds = create_test_data() @@ -6072,6 +6095,13 @@ def test_ipython_key_completion(self) -> None: ds["var3"].coords[item] # should not raise assert sorted(actual) == sorted(expected) + coords = Coordinates(ds.coords) + actual = coords._ipython_key_completions_() + expected = ["time", "dim2", "dim3", "numbers"] + for item in actual: + coords[item] # should not raise + assert sorted(actual) == sorted(expected) + # data_vars actual = ds.data_vars._ipython_key_completions_() expected = ["var1", "var2", "var3", "dim1"] From be86f878c502fd0169214e7e9cceb51b67d11402 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Mon, 12 Dec 2022 16:44:16 +0100 Subject: [PATCH 30/69] fix mypy --- xarray/tests/test_coordinates.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xarray/tests/test_coordinates.py b/xarray/tests/test_coordinates.py index ee588d50fa1..27bb23ebdb8 100644 --- a/xarray/tests/test_coordinates.py +++ b/xarray/tests/test_coordinates.py @@ -51,6 +51,7 @@ def test_from_pandas_multiindex(self) -> None: midx = pd.MultiIndex.from_product([["a", "b"], [1, 2]], names=("one", "two")) coords = Coordinates.from_pandas_multiindex(midx, "x") + assert isinstance(coords.xindexes["x"], PandasMultiIndex) assert coords.xindexes["x"].index.equals(midx) assert coords.xindexes["x"].dim == "x" From 75e25237dcf5bf065eaffaa19ae9d40e811d6535 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Mon, 12 Dec 2022 16:48:42 +0100 Subject: [PATCH 31/69] assign_coords: do not create default indexes... ... when passing a Coordinates object --- xarray/core/common.py | 10 +++++++++- xarray/core/coordinates.py | 38 +++++++++++++++++++++++++++----------- xarray/core/merge.py | 5 ++++- 3 files changed, 40 insertions(+), 13 deletions(-) diff --git a/xarray/core/common.py b/xarray/core/common.py index 783847cd60d..87e4f5bfb92 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -609,9 +609,17 @@ def assign_coords( Dataset.swap_dims Dataset.set_coords """ + from xarray.core.coordinates import Coordinates + coords_combined = either_dict_or_kwargs(coords, coords_kwargs, "assign_coords") data = self.copy(deep=False) - results: dict[Hashable, Any] = self._calc_assign_results(coords_combined) + + results: Coordinates | dict[Hashable, Any] + if isinstance(coords, Coordinates): + results = coords + else: + results = self._calc_assign_results(coords_combined) + data.coords.update(results) return data diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index 194ea0b17c6..5138f3dc2e2 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -51,9 +51,6 @@ class AbstractCoordinates(Mapping[Hashable, "T_DataArray"]): def __getitem__(self, key: Hashable) -> T_DataArray: raise NotImplementedError() - def __setitem__(self, key: Hashable, value: Any) -> None: - self.update({key: value}) - @property def _names(self) -> set[Hashable]: raise NotImplementedError() @@ -177,14 +174,6 @@ def to_index(self, ordered_dims: Sequence[Hashable] | None = None) -> pd.Index: return pd.MultiIndex(level_list, code_list, names=names) - def update(self, other: Mapping[Any, Any]) -> None: - other_vars = getattr(other, "variables", other) - self._maybe_drop_multiindex_coords(set(other_vars)) - coords, indexes = merge_coords( - [self.variables, other_vars], priority_arg=1, indexes=self.xindexes - ) - self._update_coords(coords, indexes) - class Coordinates(AbstractCoordinates): """Dictionary like container for Xarray coordinates (variables + indexes). @@ -432,6 +421,33 @@ def merge_coords(self, other: Mapping[Any, Any] | None = None) -> Coordinates: return self.merge(other).coords + def __setitem__(self, key: Hashable, value: Any) -> None: + self.update({key: value}) + + def update(self, other: Mapping[Any, Any]) -> None: + other_obj: Dataset | Mapping[Hashable, Variable] + + if isinstance(other, Coordinates): + # special case: do not create default indexes + # converting to Dataset will allow reusing existing indexes + # when merging coordinates below + other_obj = other.to_dataset() + create_default_indexes = False + else: + other_obj = getattr(other, "variables", other) + create_default_indexes = True + + self._maybe_drop_multiindex_coords(set(other_obj)) + + coords, indexes = merge_coords( + [self.variables, other_obj], + priority_arg=1, + indexes=self.xindexes, + create_default_indexes=create_default_indexes, + ) + + self._update_coords(coords, indexes) + def _ipython_key_completions_(self): """Provide method for the key-autocompletions in IPython.""" return self._data._ipython_key_completions_() diff --git a/xarray/core/merge.py b/xarray/core/merge.py index 8bda33cea3f..4fc6d9e03c3 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -551,6 +551,7 @@ def merge_coords( priority_arg: int | None = None, indexes: Mapping[Any, Index] | None = None, fill_value: object = dtypes.NA, + create_default_indexes: bool = True, ) -> tuple[dict[Hashable, Variable], dict[Hashable, Index]]: """Merge coordinate variables. @@ -563,7 +564,9 @@ def merge_coords( aligned = deep_align( coerced, join=join, copy=False, indexes=indexes, fill_value=fill_value ) - collected = collect_variables_and_indexes(aligned, indexes=indexes) + collected = collect_variables_and_indexes( + aligned, indexes=indexes, create_default_indexes=create_default_indexes + ) prioritized = _get_priority_vars_and_indexes(aligned, priority_arg, compat=compat) variables, out_indexes = merge_collected(collected, prioritized, compat=compat) return variables, out_indexes From 82f0fb2455a7ec6511b425bcc87bd9bdb5914880 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Mon, 12 Dec 2022 20:42:55 +0100 Subject: [PATCH 32/69] support alignment of Coordinates --- xarray/core/alignment.py | 7 ++- xarray/core/coordinates.py | 89 ++++++++++++++++++++++++++++++---- xarray/core/dataarray.py | 18 ++++--- xarray/core/dataset.py | 27 +++++++++-- xarray/core/merge.py | 97 +++++--------------------------------- 5 files changed, 130 insertions(+), 108 deletions(-) diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index 1f00eecfdbe..b4d2b18bf2c 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -526,7 +526,7 @@ def _reindex_one( new_indexes, new_variables = self._get_indexes_and_vars(obj, matching_indexes) dim_pos_indexers = self._get_dim_pos_indexers(matching_indexes) - new_obj = obj._reindex_callback( + return obj._reindex_callback( self, dim_pos_indexers, new_variables, @@ -535,8 +535,6 @@ def _reindex_one( self.exclude_dims, self.exclude_vars, ) - new_obj.encoding = obj.encoding - return new_obj def reindex_all(self) -> None: self.results = tuple( @@ -786,6 +784,7 @@ def deep_align( This function is not public API. """ + from xarray.core.coordinates import Coordinates from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset @@ -793,7 +792,7 @@ def deep_align( indexes = {} def is_alignable(obj): - return isinstance(obj, (DataArray, Dataset)) + return isinstance(obj, (Coordinates, DataArray, Dataset)) positions = [] keys = [] diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index 5138f3dc2e2..f67f5d91b84 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -17,11 +17,13 @@ import pandas as pd from xarray.core import formatting +from xarray.core.alignment import Aligner from xarray.core.indexes import ( Index, Indexes, PandasMultiIndex, assert_no_index_corrupted, + create_default_index_implicit, ) from xarray.core.merge import merge_coordinates_without_align, merge_coords from xarray.core.types import T_DataArray @@ -287,6 +289,10 @@ def _names(self) -> set[Hashable]: def dims(self) -> Mapping[Hashable, int] | tuple[Hashable, ...]: return self._data.dims + @property + def sizes(self) -> Frozen[Hashable, int]: + return self._data.sizes + @property def dtypes(self) -> Frozen[Hashable, np.dtype]: """Mapping from coordinate names to dtypes. @@ -425,17 +431,13 @@ def __setitem__(self, key: Hashable, value: Any) -> None: self.update({key: value}) def update(self, other: Mapping[Any, Any]) -> None: - other_obj: Dataset | Mapping[Hashable, Variable] + other_obj: Coordinates | Mapping[Hashable, Variable] if isinstance(other, Coordinates): - # special case: do not create default indexes - # converting to Dataset will allow reusing existing indexes - # when merging coordinates below - other_obj = other.to_dataset() - create_default_indexes = False + # special case: default indexes won't be created + other_obj = other else: other_obj = getattr(other, "variables", other) - create_default_indexes = True self._maybe_drop_multiindex_coords(set(other_obj)) @@ -443,15 +445,52 @@ def update(self, other: Mapping[Any, Any]) -> None: [self.variables, other_obj], priority_arg=1, indexes=self.xindexes, - create_default_indexes=create_default_indexes, ) self._update_coords(coords, indexes) + def _overwrite_indexes( + self, + indexes: Mapping[Any, Index], + coords: Mapping[Any, Variable] | None = None, + drop_coords: list[Hashable] | None = None, + rename_dims: Mapping[Any, Any] | None = None, + ) -> Coordinates: + results = self._data._overwrite_indexes( + indexes, coords, drop_coords, rename_dims + ) + return results.coords + + def _reindex_callback( + self, + aligner: Aligner, + dim_pos_indexers: dict[Hashable, Any], + variables: dict[Hashable, Variable], + indexes: dict[Hashable, Index], + fill_value: Any, + exclude_dims: frozenset[Hashable], + exclude_vars: frozenset[Hashable], + ) -> Coordinates: + """Callback called from ``Aligner`` to create a new reindexed Coordinates.""" + aligned = self._data._reindex_callback( + aligner, + dim_pos_indexers, + variables, + indexes, + fill_value, + exclude_dims, + exclude_vars, + ) + return aligned.coords + def _ipython_key_completions_(self): """Provide method for the key-autocompletions in IPython.""" return self._data._ipython_key_completions_() + def copy(self, deep=False): + # TODO: improve implementation + return self.to_dataset().coords + class DatasetCoordinates(Coordinates): """Dictionary like container for Dataset coordinates (variables + indexes). @@ -694,3 +733,37 @@ def assert_coordinate_consistent( f"dimension coordinate {k!r} conflicts between " f"indexed and indexing objects:\n{obj[k]}\nvs.\n{coords[k]}" ) + + +def create_coords_with_default_indexes( + coords: Mapping[Any, Variable], data_vars: Mapping[Any, Variable] | None = None +) -> Coordinates: + """Maybe create default indexes from a mapping of coordinates.""" + all_variables = dict(coords) + if data_vars is not None: + all_variables.update(data_vars) + + indexes = {} + updated_coords = {} + + # this is needed for backward compatibility: when a pandas multi-index + # is given as data variable, it is promoted as index / level coordinates + # TODO: depreciate this implicit behavior + index_vars = { + k: v + for k, v in all_variables.items() + if k in coords or isinstance(v, pd.MultiIndex) + } + + for name, obj in index_vars.items(): + variable = as_variable(obj, name=name) + + if variable.dims == (name,): + idx, idx_vars = create_default_index_implicit(variable, all_variables) + indexes.update({k: idx for k in idx_vars}) + updated_coords.update(idx_vars) + all_variables.update(idx_vars) + else: + updated_coords[name] = obj + + return Coordinates(coords=updated_coords, indexes=indexes) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 79187fcf0fe..86fdf6af3d8 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -38,6 +38,7 @@ Coordinates, DataArrayCoordinates, assert_coordinate_consistent, + create_coords_with_default_indexes, ) from xarray.core.dataset import Dataset from xarray.core.formatting import format_item @@ -49,7 +50,7 @@ isel_indexes, ) from xarray.core.indexing import is_fancy_indexer, map_index_queries -from xarray.core.merge import PANDAS_TYPES, MergeError, _create_indexes_from_coords +from xarray.core.merge import PANDAS_TYPES, MergeError from xarray.core.options import OPTIONS, _get_keep_attrs from xarray.core.utils import ( Default, @@ -446,11 +447,10 @@ def __init__( coords, dims = _infer_coords_and_dims(data.shape, coords, dims) variable = Variable(dims, data, attrs, fastpath=True) - if isinstance(coords, Coordinates): - indexes = dict(coords.xindexes) - coords = {k: v.copy() for k, v in coords.variables.items()} - else: - indexes, coords = _create_indexes_from_coords(coords) + if not isinstance(coords, Coordinates): + coords = create_coords_with_default_indexes(coords) + indexes = dict(coords.xindexes) + coords = {k: v.copy() for k, v in coords.variables.items()} # These fully describe a DataArray self._variable = variable @@ -1817,7 +1817,11 @@ def _reindex_callback( exclude_dims, exclude_vars, ) - return self._from_temp_dataset(reindexed) + + da = self._from_temp_dataset(reindexed) + da.encoding = self.encoding + + return da def reindex_like( self: T_DataArray, diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 42905c98b2a..82899b0ca93 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -51,7 +51,12 @@ get_chunksizes, ) from xarray.core.computation import unify_chunks -from xarray.core.coordinates import DatasetCoordinates, assert_coordinate_consistent +from xarray.core.coordinates import ( + Coordinates, + DatasetCoordinates, + assert_coordinate_consistent, + create_coords_with_default_indexes, +) from xarray.core.duck_array_ops import datetime_to_numeric from xarray.core.indexes import ( Index, @@ -70,7 +75,7 @@ dataset_merge_method, dataset_update_method, merge_coordinates_without_align, - merge_data_and_coords, + merge_core, ) from xarray.core.missing import get_clean_interp_index from xarray.core.options import OPTIONS, _get_keep_attrs @@ -104,7 +109,6 @@ from xarray.backends import AbstractDataStore, ZarrStore from xarray.backends.api import T_NetcdfEngine, T_NetcdfTypes - from xarray.core.coordinates import Coordinates from xarray.core.dataarray import DataArray from xarray.core.groupby import DatasetGroupBy from xarray.core.merge import CoercibleMapping @@ -361,6 +365,19 @@ def _initialize_feasible(lb, ub): return param_defaults, bounds_defaults +def merge_data_and_coords(data_vars, coords): + """Used in Dataset.__init__.""" + if not isinstance(coords, Coordinates): + coords = create_coords_with_default_indexes(coords, data_vars) + + return merge_core( + [data_vars, coords], + compat="broadcast_equals", + join="outer", + explicit_coords=tuple(coords), + ) + + class DataVariables(Mapping[Any, "DataArray"]): __slots__ = ("_dataset",) @@ -613,7 +630,7 @@ def __init__( coords = coords.variables variables, coord_names, dims, indexes, _ = merge_data_and_coords( - data_vars, coords, compat="broadcast_equals" + data_vars, coords ) self._attrs = dict(attrs) if attrs is not None else None @@ -2860,6 +2877,8 @@ def _reindex_callback( new_variables, new_coord_names, indexes=new_indexes ) + reindexed.encoding = self.encoding + return reindexed def reindex_like( diff --git a/xarray/core/merge.py b/xarray/core/merge.py index 4fc6d9e03c3..fc7e5ed9e82 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -22,8 +22,6 @@ from xarray.core.duck_array_ops import lazy_array_equiv from xarray.core.indexes import ( Index, - Indexes, - PandasIndex, create_default_index_implicit, filter_indexes_from_coords, indexes_equal, @@ -46,7 +44,7 @@ Tuple[DimsLike, ArrayLike, Mapping, Mapping], ] XarrayValue = Union[DataArray, Variable, VariableLike] - DatasetLike = Union[Dataset, Mapping[Any, XarrayValue]] + DatasetLike = Union[Dataset, Coordinates, Mapping[Any, XarrayValue]] CoercibleValue = Union[XarrayValue, pd.Series, pd.DataFrame] CoercibleMapping = Union[Dataset, Mapping[Any, CoercibleValue]] @@ -320,12 +318,12 @@ def merge_collected( def collect_variables_and_indexes( list_of_mappings: list[DatasetLike], indexes: Mapping[Any, Any] | None = None, - create_default_indexes: bool = True, + create_coords_with_default_indexes: bool = True, ) -> dict[Hashable, list[MergeElement]]: """Collect variables and indexes from list of mappings of xarray objects. - Mappings must either be Dataset objects, or have values of one of the - following types: + Mappings must either be Dataset or Coordinates objects, + or have values of one of the following types: - an xarray.Variable - a tuple `(dims, data[, attrs[, encoding]])` that can be converted in an xarray.Variable @@ -335,6 +333,7 @@ def collect_variables_and_indexes( with a matching key/name. """ + from xarray.core.coordinates import Coordinates from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset @@ -351,8 +350,8 @@ def append_all(variables, indexes): append(name, variable, indexes.get(name)) for mapping in list_of_mappings: - if isinstance(mapping, Dataset): - append_all(mapping.variables, mapping._indexes) + if isinstance(mapping, (Coordinates, Dataset)): + append_all(mapping.variables, mapping.xindexes) continue for name, variable in mapping.items(): @@ -367,7 +366,7 @@ def append_all(variables, indexes): variable = as_variable(variable, name=name) if name in indexes: append(name, variable, indexes[name]) - elif variable.dims == (name,) and create_default_indexes: + elif variable.dims == (name,) and create_coords_with_default_indexes: idx, idx_vars = create_default_index_implicit(variable) append_all(idx_vars, {k: idx for k in idx_vars}) else: @@ -479,12 +478,13 @@ def coerce_pandas_values(objects: Iterable[CoercibleMapping]) -> list[DatasetLik List of Dataset or dictionary objects. Any inputs or values in the inputs that were pandas objects have been converted into native xarray objects. """ + from xarray.core.coordinates import Coordinates from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset out = [] for obj in objects: - if isinstance(obj, Dataset): + if isinstance(obj, (Dataset, Coordinates)): variables: DatasetLike = obj else: variables = {} @@ -551,7 +551,6 @@ def merge_coords( priority_arg: int | None = None, indexes: Mapping[Any, Index] | None = None, fill_value: object = dtypes.NA, - create_default_indexes: bool = True, ) -> tuple[dict[Hashable, Variable], dict[Hashable, Index]]: """Merge coordinate variables. @@ -564,79 +563,12 @@ def merge_coords( aligned = deep_align( coerced, join=join, copy=False, indexes=indexes, fill_value=fill_value ) - collected = collect_variables_and_indexes( - aligned, indexes=indexes, create_default_indexes=create_default_indexes - ) + collected = collect_variables_and_indexes(aligned, indexes=indexes) prioritized = _get_priority_vars_and_indexes(aligned, priority_arg, compat=compat) variables, out_indexes = merge_collected(collected, prioritized, compat=compat) return variables, out_indexes -def merge_data_and_coords( - data_vars, - coords, - compat="broadcast_equals", - join="outer", -): - """Used in Dataset.__init__.""" - from xarray.core.coordinates import Coordinates - - if isinstance(coords, Coordinates): - indexes = coords.xindexes - coords = coords.variables - else: - indexes, coords = _create_indexes_from_coords(coords, data_vars) - - objects = [data_vars, coords] - explicit_coords = coords.keys() - indexed_coords = {k: v for k, v in coords.items() if k in indexes} - return merge_core( - objects, - compat, - join, - explicit_coords=explicit_coords, - indexes=Indexes(indexes, indexed_coords), - create_default_indexes=False, - ) - - -def _create_indexes_from_coords( - coords: Mapping[Any, Variable], data_vars: Mapping[Any, Variable] | None = None -) -> tuple[dict[Any, PandasIndex], dict[Any, Variable]]: - """Maybe create default indexes from a mapping of coordinates. - - Return those indexes and updated coordinates. - """ - all_variables = dict(coords) - if data_vars is not None: - all_variables.update(data_vars) - - indexes = {} - updated_coords = {} - - # this is needed for backward compatibility: when a pandas multi-index - # is given as data variable, it is promoted as index / level coordinates - # TODO: depreciate this implicit behavior - index_vars = { - k: v - for k, v in all_variables.items() - if k in coords or isinstance(v, pd.MultiIndex) - } - - for name, obj in index_vars.items(): - variable = as_variable(obj, name=name) - - if variable.dims == (name,): - idx, idx_vars = create_default_index_implicit(variable, all_variables) - indexes.update({k: idx for k in idx_vars}) - updated_coords.update(idx_vars) - all_variables.update(idx_vars) - else: - updated_coords[name] = obj - - return indexes, updated_coords - - def assert_valid_explicit_coords(variables, dims, explicit_coords): """Validate explicit coordinate names/dims. @@ -723,7 +655,6 @@ def merge_core( explicit_coords: Sequence | None = None, indexes: Mapping[Any, Any] | None = None, fill_value: object = dtypes.NA, - create_default_indexes: bool = True, ) -> _MergeResult: """Core logic for merging labeled objects. @@ -749,8 +680,6 @@ def merge_core( may be cast to pandas.Index objects. fill_value : scalar, optional Value to use for newly missing values - create_default_indexes : bool, optional - If True, create default (pandas) indexes for dimension coordinates. Returns ------- @@ -776,9 +705,7 @@ def merge_core( aligned = deep_align( coerced, join=join, copy=False, indexes=indexes, fill_value=fill_value ) - collected = collect_variables_and_indexes( - aligned, indexes=indexes, create_default_indexes=create_default_indexes - ) + collected = collect_variables_and_indexes(aligned, indexes=indexes) prioritized = _get_priority_vars_and_indexes(aligned, priority_arg, compat=compat) variables, out_indexes = merge_collected( collected, prioritized, compat=compat, combine_attrs=combine_attrs From 883e67cf97fb34a7b43fdc48d9bf131801f7670a Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Mon, 12 Dec 2022 21:48:33 +0100 Subject: [PATCH 33/69] clean-up --- xarray/core/merge.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/xarray/core/merge.py b/xarray/core/merge.py index fc7e5ed9e82..f84fc636710 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -318,19 +318,22 @@ def merge_collected( def collect_variables_and_indexes( list_of_mappings: list[DatasetLike], indexes: Mapping[Any, Any] | None = None, - create_coords_with_default_indexes: bool = True, ) -> dict[Hashable, list[MergeElement]]: """Collect variables and indexes from list of mappings of xarray objects. - Mappings must either be Dataset or Coordinates objects, - or have values of one of the following types: + Mappings can be Dataset or Coordinates objects, in which case both + variables and indexes are extracted from it. + + It can also have values of one of the following types: - an xarray.Variable - a tuple `(dims, data[, attrs[, encoding]])` that can be converted in an xarray.Variable - or an xarray.DataArray If a mapping of indexes is given, those indexes are assigned to all variables - with a matching key/name. + with a matching key/name. For dimension variables with no matching index, a + default (pandas) index is assigned. DataArray indexes that don't match mapping + keys are also extracted. """ from xarray.core.coordinates import Coordinates @@ -366,7 +369,7 @@ def append_all(variables, indexes): variable = as_variable(variable, name=name) if name in indexes: append(name, variable, indexes[name]) - elif variable.dims == (name,) and create_coords_with_default_indexes: + elif variable.dims == (name,): idx, idx_vars = create_default_index_implicit(variable) append_all(idx_vars, {k: idx for k in idx_vars}) else: From 28e98613306adee0299d008bef68621f9523307b Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Mon, 12 Dec 2022 21:48:44 +0100 Subject: [PATCH 34/69] fix failing test (dataarray coords not extracted) --- xarray/core/coordinates.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index f67f5d91b84..224d5888d15 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -311,7 +311,8 @@ def variables(self) -> Mapping[Hashable, Variable]: def to_dataset(self) -> Dataset: """Convert these coordinates into a new Dataset""" - return self._data.copy() + names = [name for name in self._data._variables if name in self._names] + return self._data._copy_listed(names) def __getitem__(self, key: Hashable) -> DataArray: return self._data[key] @@ -736,9 +737,11 @@ def assert_coordinate_consistent( def create_coords_with_default_indexes( - coords: Mapping[Any, Variable], data_vars: Mapping[Any, Variable] | None = None + coords: Mapping[Any, Any], data_vars: Mapping[Any, Variable] | None = None ) -> Coordinates: """Maybe create default indexes from a mapping of coordinates.""" + from xarray.core.dataarray import DataArray + all_variables = dict(coords) if data_vars is not None: all_variables.update(data_vars) @@ -756,6 +759,15 @@ def create_coords_with_default_indexes( } for name, obj in index_vars.items(): + if isinstance(obj, DataArray): + # extract all coords/indexes from DataArray objects + # except if explicitly overwritten by DataArray data + for k, v in obj._coords.items(): + if k != name: + updated_coords[k] = v.copy() + if k in obj._indexes: + indexes[k] = obj._indexes[k] + variable = as_variable(obj, name=name) if variable.dims == (name,): From 9a209a3451d125ea9d07df05bc83fc1d1be6465c Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Mon, 12 Dec 2022 23:00:19 +0100 Subject: [PATCH 35/69] fix tests: prevent index conflicts Do not extract multi-coordinate indexes from DataArray if they are overwritten or dropped (dimension coordinate). --- xarray/core/coordinates.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index 224d5888d15..2500989fada 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -766,7 +766,8 @@ def create_coords_with_default_indexes( if k != name: updated_coords[k] = v.copy() if k in obj._indexes: - indexes[k] = obj._indexes[k] + if name not in obj.xindexes.get_all_coords(k): + indexes[k] = obj._indexes[k] variable = as_variable(obj, name=name) From 4f337e3b4d0f5d999d57b381ed1204c12591a28b Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Tue, 13 Dec 2022 15:15:13 +0100 Subject: [PATCH 36/69] add Coordinates.equals and Coordinates.identical --- xarray/core/coordinates.py | 23 +++++++++++++++++++++++ xarray/testing.py | 17 +++++++++++++---- xarray/tests/test_coordinates.py | 10 +++++++++- 3 files changed, 45 insertions(+), 5 deletions(-) diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index 2500989fada..8e31ac88939 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -321,6 +321,29 @@ def __delitem__(self, key: Hashable) -> None: # redirect to DatasetCoordinates.__delitem__ del self._data.coords[key] + def equals(self, other: Coordinates) -> bool: + """Two Coordinates objects are equal if they have matching variables, + all of which are equal. + + See Also + -------- + Coordinates.identical + """ + if not isinstance(other, Coordinates): + return False + return self.to_dataset().equals(other.to_dataset()) + + def identical(self, other: Coordinates) -> bool: + """Like equals, but also checks all variable attributes. + + See Also + -------- + Coordinates.equals + """ + if not isinstance(other, Coordinates): + return False + return self.to_dataset().identical(other.to_dataset()) + def _update_coords( self, coords: dict[Hashable, Variable], indexes: Mapping[Any, Index] ) -> None: diff --git a/xarray/testing.py b/xarray/testing.py index bd2a7f6a698..4da0fba9315 100644 --- a/xarray/testing.py +++ b/xarray/testing.py @@ -7,6 +7,7 @@ import pandas as pd from xarray.core import duck_array_ops, formatting, utils +from xarray.core.coordinates import Coordinates from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset from xarray.core.indexes import Index, PandasIndex, PandasMultiIndex, default_indexes @@ -67,9 +68,9 @@ def assert_equal(a, b): Parameters ---------- - a : xarray.Dataset, xarray.DataArray or xarray.Variable + a : xarray.Dataset, xarray.DataArray, xarray.Variable or xarray.Coordinates The first object to compare. - b : xarray.Dataset, xarray.DataArray or xarray.Variable + b : xarray.Dataset, xarray.DataArray, xarray.Variable or xarray.Coordinates The second object to compare. See Also @@ -83,6 +84,8 @@ def assert_equal(a, b): assert a.equals(b), formatting.diff_array_repr(a, b, "equals") elif isinstance(a, Dataset): assert a.equals(b), formatting.diff_dataset_repr(a, b, "equals") + elif isinstance(a, Coordinates): + assert a.equals(b), formatting.diff_coords_repr(a, b, "equals") else: raise TypeError(f"{type(a)} not supported by assertion comparison") @@ -96,9 +99,9 @@ def assert_identical(a, b): Parameters ---------- - a : xarray.Dataset, xarray.DataArray or xarray.Variable + a : xarray.Dataset, xarray.DataArray, xarray.Variable or xarray.Coordinates The first object to compare. - b : xarray.Dataset, xarray.DataArray or xarray.Variable + b : xarray.Dataset, xarray.DataArray, xarray.Variable or xarray.Coordinates The second object to compare. See Also @@ -114,6 +117,8 @@ def assert_identical(a, b): assert a.identical(b), formatting.diff_array_repr(a, b, "identical") elif isinstance(a, (Dataset, Variable)): assert a.identical(b), formatting.diff_dataset_repr(a, b, "identical") + elif isinstance(a, Coordinates): + assert a.identical(b), formatting.diff_coords_repr(a, b, "identical") else: raise TypeError(f"{type(a)} not supported by assertion comparison") @@ -402,6 +407,10 @@ def _assert_internal_invariants( _assert_dataset_invariants( xarray_obj, check_default_indexes=check_default_indexes ) + elif isinstance(xarray_obj, Coordinates): + _assert_dataset_invariants( + xarray_obj.to_dataset(), check_default_indexes=check_default_indexes + ) else: raise TypeError( "{} is not a supported type for xarray invariant checks".format( diff --git a/xarray/tests/test_coordinates.py b/xarray/tests/test_coordinates.py index 27bb23ebdb8..490d461f173 100644 --- a/xarray/tests/test_coordinates.py +++ b/xarray/tests/test_coordinates.py @@ -83,7 +83,15 @@ def test_update(self, coords): expected = DataArray([4, 5, 6], coords={"y": [4, 5, 6]}, name="y") assert_identical(coords["y"], expected) - def test_merge_coords(self, coords): + def test_equals(self, coords): + assert coords.equals(coords) + assert not coords.equals("no_a_coords") + + def test_identical(self, coords): + assert coords.identical(coords) + assert not coords.identical("no_a_coords") + + def test_merge_coords(self, coords) -> None: other = {"y": ("y", [4, 5, 6])} actual = coords.merge_coords(other) expected = coords.merge(other).coords From 43ddcf602658e08cf15ef52dc3daa3dd3f2c586e Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Tue, 13 Dec 2022 15:15:50 +0100 Subject: [PATCH 37/69] more tests, docstrings, docs --- doc/api-hidden.rst | 9 +++++++ xarray/core/coordinates.py | 28 ++++++++++++++++--- xarray/tests/test_coordinates.py | 46 +++++++++++++++++++++++++------- 3 files changed, 70 insertions(+), 13 deletions(-) diff --git a/doc/api-hidden.rst b/doc/api-hidden.rst index 753ccfd1683..d75695d0d0c 100644 --- a/doc/api-hidden.rst +++ b/doc/api-hidden.rst @@ -24,6 +24,9 @@ Coordinates.update Coordinates.merge Coordinates.merge_coords + Coordinates.copy + Coordinates.equals + Coordinates.identical core.coordinates.DatasetCoordinates.get core.coordinates.DatasetCoordinates.items @@ -39,6 +42,9 @@ core.coordinates.DatasetCoordinates.update core.coordinates.DatasetCoordinates.merge core.coordinates.DatasetCoordinates.merge_coords + core.coordinates.DataArrayCoordinates.copy + core.coordinates.DatasetCoordinates.equals + core.coordinates.DatasetCoordinates.identical core.rolling.DatasetCoarsen.boundary core.rolling.DatasetCoarsen.coord_func @@ -77,6 +83,9 @@ core.coordinates.DataArrayCoordinates.update core.coordinates.DataArrayCoordinates.merge core.coordinates.DataArrayCoordinates.merge_coords + core.coordinates.DataArrayCoordinates.copy + core.coordinates.DataArrayCoordinates.equals + core.coordinates.DataArrayCoordinates.identical core.rolling.DataArrayCoarsen.boundary core.rolling.DataArrayCoarsen.coord_func diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index 8e31ac88939..cf8f4c6b709 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -67,10 +67,22 @@ def dtypes(self) -> Frozen[Hashable, np.dtype]: @property def indexes(self) -> Indexes[pd.Index]: + """Mapping of pandas.Index objects used for label based indexing. + + Raises an error if this Coordinates object has indexes that cannot + be coerced to pandas.Index objects. + + See Also + -------- + Coordinates.xindexes + """ return self._data.indexes @property def xindexes(self) -> Indexes[Index]: + """Mapping of :py:class:`~xarray.indexes.Index` objects + used for label based indexing. + """ return self._data.xindexes @property @@ -287,10 +299,12 @@ def _names(self) -> set[Hashable]: @property def dims(self) -> Mapping[Hashable, int] | tuple[Hashable, ...]: + """Mapping from dimension names to lengths or tuple of dimension names.""" return self._data.dims @property def sizes(self) -> Frozen[Hashable, int]: + """Mapping from dimension names to lengths.""" return self._data.sizes @property @@ -307,10 +321,14 @@ def dtypes(self) -> Frozen[Hashable, np.dtype]: @property def variables(self) -> Mapping[Hashable, Variable]: + """Low level interface to Coordinates contents as dict of Variable objects. + + This dictionary is frozen to prevent mutation. + """ return self._data.variables def to_dataset(self) -> Dataset: - """Convert these coordinates into a new Dataset""" + """Convert these coordinates into a new Dataset.""" names = [name for name in self._data._variables if name in self._names] return self._data._copy_listed(names) @@ -455,6 +473,7 @@ def __setitem__(self, key: Hashable, value: Any) -> None: self.update({key: value}) def update(self, other: Mapping[Any, Any]) -> None: + """Update this Coordinates variables with other coordinate variables.""" other_obj: Coordinates | Mapping[Hashable, Variable] if isinstance(other, Coordinates): @@ -511,9 +530,10 @@ def _ipython_key_completions_(self): """Provide method for the key-autocompletions in IPython.""" return self._data._ipython_key_completions_() - def copy(self, deep=False): - # TODO: improve implementation - return self.to_dataset().coords + def copy(self, deep: bool = False) -> Coordinates: + """Return a copy of this Coordinates object.""" + # TODO: improve implementation? + return self.to_dataset().copy(deep=deep).coords class DatasetCoordinates(Coordinates): diff --git a/xarray/tests/test_coordinates.py b/xarray/tests/test_coordinates.py index 490d461f173..64e69158002 100644 --- a/xarray/tests/test_coordinates.py +++ b/xarray/tests/test_coordinates.py @@ -3,11 +3,12 @@ import pandas as pd import pytest +from xarray.core.alignment import align from xarray.core.coordinates import Coordinates from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset from xarray.core.indexes import PandasIndex, PandasMultiIndex -from xarray.tests import assert_identical +from xarray.tests import assert_identical, source_ndarray class TestCoordinates: @@ -60,23 +61,26 @@ def test_from_pandas_multiindex(self) -> None: for name in ("x", "one", "two"): assert_identical(expected[name], coords.variables[name]) - def test_dims(self, coords): + def test_dims(self, coords) -> None: assert coords.dims == {"x": 3} - def test_dtypes(self, coords): + def test_sizes(self, coords) -> None: + assert coords.sizes == {"x": 3} + + def test_dtypes(self, coords) -> None: assert coords.dtypes == {"x": int} - def test_getitem(self, coords): + def test_getitem(self, coords) -> None: assert_identical( coords["x"], DataArray([0, 1, 2], coords={"x": [0, 1, 2]}, name="x"), ) - def test_delitem(self, coords): + def test_delitem(self, coords) -> None: del coords["x"] assert "x" not in coords - def test_update(self, coords): + def test_update(self, coords) -> None: coords.update({"y": ("y", [4, 5, 6])}) assert "y" in coords assert "y" in coords.xindexes @@ -100,7 +104,31 @@ def test_merge_coords(self, coords) -> None: other = Coordinates(other) actual = coords.merge_coords(other) expected = coords.merge(other).coords - assert_identical( - actual.to_dataset(), expected.to_dataset(), check_default_indexes=False - ) + assert_identical(actual, expected, check_default_indexes=False) assert "y" not in actual.xindexes + + def test_copy(self, coords) -> None: + copied = coords.copy() + assert_identical(coords.to_dataset(), copied.to_dataset()) + v0 = coords.variables["x"] + v1 = copied.variables["x"] + assert v0 is not v1 + assert source_ndarray(v0.data) is source_ndarray(v1.data) + + # deep copy: use non-indexed coordinates + # (indexes are immutable so not deep-copied?) + no_index_coords = Coordinates({"foo": ("x", [1, 2, 3])}) + deep_copied = no_index_coords.copy(deep=True) + assert_identical(no_index_coords.to_dataset(), deep_copied.to_dataset()) + v0 = no_index_coords.variables["foo"] + v1 = deep_copied.variables["foo"] + assert v0 is not v1 + assert source_ndarray(v0.data) is not source_ndarray(v1.data) + + def test_align(self, coords) -> None: + left = coords + right = coords.to_dataset().isel(x=[0, 1]).coords + + # test Coordinates._reindex_callback + left2, right2 = align(left, right, join="inner") + assert_identical(left2, right2) From 2437456ce05420c841d0993b96cfa66c830ff897 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Tue, 13 Dec 2022 15:30:42 +0100 Subject: [PATCH 38/69] fix assert_* (Coordinates subclasses) --- xarray/testing.py | 10 ++++++++-- xarray/tests/test_coordinates.py | 8 +++++++- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/xarray/testing.py b/xarray/testing.py index 4da0fba9315..7ca87471436 100644 --- a/xarray/testing.py +++ b/xarray/testing.py @@ -79,7 +79,10 @@ def assert_equal(a, b): numpy.testing.assert_array_equal """ __tracebackhide__ = True - assert type(a) == type(b) + try: + assert type(a) == type(b) + except AssertionError: + assert isinstance(a, Coordinates) and isinstance(b, Coordinates) if isinstance(a, (Variable, DataArray)): assert a.equals(b), formatting.diff_array_repr(a, b, "equals") elif isinstance(a, Dataset): @@ -109,7 +112,10 @@ def assert_identical(a, b): assert_equal, assert_allclose, Dataset.equals, DataArray.equals """ __tracebackhide__ = True - assert type(a) == type(b) + try: + assert type(a) == type(b) + except AssertionError: + assert isinstance(a, Coordinates) and isinstance(b, Coordinates) if isinstance(a, Variable): assert a.identical(b), formatting.diff_array_repr(a, b, "identical") elif isinstance(a, DataArray): diff --git a/xarray/tests/test_coordinates.py b/xarray/tests/test_coordinates.py index 64e69158002..9eb5600883a 100644 --- a/xarray/tests/test_coordinates.py +++ b/xarray/tests/test_coordinates.py @@ -127,8 +127,14 @@ def test_copy(self, coords) -> None: def test_align(self, coords) -> None: left = coords - right = coords.to_dataset().isel(x=[0, 1]).coords # test Coordinates._reindex_callback + right = coords.to_dataset().isel(x=[0, 1]).coords left2, right2 = align(left, right, join="inner") assert_identical(left2, right2) + + # test Coordinates._overwrite_indexes + right.update({"x": ("x", [4, 5, 6])}) + left2, right2 = align(left, right, join="override") + assert_identical(left2, left) + assert_identical(left2, right2) From e60570f1c098c45f8cf6ac8f2fc653435abeeaf8 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Tue, 13 Dec 2022 17:10:07 +0100 Subject: [PATCH 39/69] review copy --- xarray/core/coordinates.py | 13 +++++++++---- xarray/core/dataset.py | 4 +++- xarray/tests/test_coordinates.py | 17 +++++++++-------- xarray/tests/test_dataset.py | 3 +++ 4 files changed, 24 insertions(+), 13 deletions(-) diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index cf8f4c6b709..241f4ef02ee 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -237,7 +237,7 @@ def __init__( if coords is None: variables = {} elif isinstance(coords, Coordinates): - variables = dict(coords.variables) + variables = {k: v.copy() for k, v in coords.variables.items()} else: variables = {k: as_variable(v) for k, v in coords.items()} @@ -530,10 +530,15 @@ def _ipython_key_completions_(self): """Provide method for the key-autocompletions in IPython.""" return self._data._ipython_key_completions_() - def copy(self, deep: bool = False) -> Coordinates: + def copy( + self, deep: bool = False, memo: dict[int, Any] | None = None + ) -> Coordinates: """Return a copy of this Coordinates object.""" - # TODO: improve implementation? - return self.to_dataset().copy(deep=deep).coords + variables = { + k: v._copy(deep=deep, memo=memo) for k, v in self.variables.items() + } + indexes = {k: v._copy(deep=deep, memo=memo) for k, v in self.xindexes.items()} + return Coordinates(coords=variables, indexes=indexes) class DatasetCoordinates(Coordinates): diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 82899b0ca93..efa048281dc 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -367,7 +367,9 @@ def _initialize_feasible(lb, ub): def merge_data_and_coords(data_vars, coords): """Used in Dataset.__init__.""" - if not isinstance(coords, Coordinates): + if isinstance(coords, Coordinates): + coords = coords.copy() + else: coords = create_coords_with_default_indexes(coords, data_vars) return merge_core( diff --git a/xarray/tests/test_coordinates.py b/xarray/tests/test_coordinates.py index 9eb5600883a..7ea23b43528 100644 --- a/xarray/tests/test_coordinates.py +++ b/xarray/tests/test_coordinates.py @@ -27,6 +27,9 @@ def test_init_from_coords(self) -> None: coords = Coordinates(coords=expected.coords) assert_identical(coords.to_dataset(), expected) + # test variables copied + assert coords.variables["foo"] is not expected.variables["foo"] + # default index expected = Dataset(coords={"x": ("x", [0, 1, 2])}) coords = Coordinates(coords=expected.coords, indexes=expected.xindexes) @@ -107,17 +110,15 @@ def test_merge_coords(self, coords) -> None: assert_identical(actual, expected, check_default_indexes=False) assert "y" not in actual.xindexes - def test_copy(self, coords) -> None: - copied = coords.copy() - assert_identical(coords.to_dataset(), copied.to_dataset()) - v0 = coords.variables["x"] - v1 = copied.variables["x"] + def test_copy(self) -> None: + no_index_coords = Coordinates({"foo": ("x", [1, 2, 3])}) + copied = no_index_coords.copy() + assert_identical(no_index_coords, copied) + v0 = no_index_coords.variables["foo"] + v1 = copied.variables["foo"] assert v0 is not v1 assert source_ndarray(v0.data) is source_ndarray(v1.data) - # deep copy: use non-indexed coordinates - # (indexes are immutable so not deep-copied?) - no_index_coords = Coordinates({"foo": ("x", [1, 2, 3])}) deep_copied = no_index_coords.copy(deep=True) assert_identical(no_index_coords.to_dataset(), deep_copied.to_dataset()) v0 = no_index_coords.variables["foo"] diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 6dcf9d08275..f79a56e19e9 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -600,6 +600,9 @@ class CustomIndex(Index): ds = Dataset(coords=coords) assert isinstance(ds.xindexes["x"], CustomIndex) + # test coordinate variables copied + assert ds.variables["x"] is not coords.variables["x"] + def test_properties(self) -> None: ds = create_test_data() From d01cf010e47049de1b0f3e131f608e82b01b02a8 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Tue, 13 Dec 2022 17:10:56 +0100 Subject: [PATCH 40/69] another few tests --- xarray/tests/test_dataarray.py | 45 ++++++++++++++++++++++++++++++++++ xarray/tests/test_dataset.py | 19 ++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 8184fe1955c..465391a6183 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -26,6 +26,7 @@ from xarray.convert import from_cdms2 from xarray.core import dtypes from xarray.core.common import full_like +from xarray.core.coordinates import Coordinates from xarray.core.indexes import Index, PandasIndex, filter_indexes_from_coords from xarray.core.types import QueryEngineOptions, QueryParserOptions from xarray.core.utils import is_scalar @@ -465,6 +466,32 @@ def test_constructor_dask_coords(self) -> None: expected = DataArray(data, coords={"x": ecoord, "y": ecoord}, dims=["x", "y"]) assert_equal(actual, expected) + def test_constructor_no_default_index(self) -> None: + # explicitly passing a Coordinates object skips the creation of default index + da = DataArray(range(3), coords=Coordinates({"x": ("x", [1, 2, 3])})) + assert "x" in da.coords + assert "x" not in da.xindexes + + def test_constructor_multiindex(self) -> None: + midx = pd.MultiIndex.from_product([["a", "b"], [1, 2]], names=("one", "two")) + coords = Coordinates.from_pandas_multiindex(midx, "x") + + da = DataArray(range(4), coords=coords, dims="x") + assert_identical(da.coords, coords) + + def test_constructor_custom_index(self) -> None: + class CustomIndex(Index): + ... + + coords = Coordinates( + coords={"x": ("x", [1, 2, 3])}, indexes={"x": CustomIndex()} + ) + da = DataArray(range(3), coords=coords) + assert isinstance(da.xindexes["x"], CustomIndex) + + # test coordinate variables copied + assert da.coords["x"] is not coords.variables["x"] + def test_equals_and_identical(self) -> None: orig = DataArray(np.arange(5.0), {"a": 42}, dims="x") @@ -1504,6 +1531,24 @@ def test_assign_coords_existing_multiindex(self) -> None: with pytest.warns(FutureWarning, match=r"Updating MultiIndexed coordinate"): data.assign_coords(x=range(4)) + def test_assign_coords_custom_index(self) -> None: + class CustomIndex(Index): + pass + + coords = Coordinates( + coords={"x": ("x", [1, 2, 3])}, indexes={"x": CustomIndex()} + ) + da = xr.DataArray([0, 1, 2], dims="x") + actual = da.assign_coords(coords) + assert isinstance(actual.xindexes["x"], CustomIndex) + + def test_assign_coords_no_default_index(self) -> None: + coords = Coordinates({"y": ("y", [1, 2, 3])}) + da = DataArray([1, 2, 3], dims="y") + actual = da.assign_coords(coords) + assert_identical(actual.coords, coords, check_default_indexes=False) + assert "y" not in actual.xindexes + def test_coords_alignment(self) -> None: lhs = DataArray([1, 2, 3], [("x", [0, 1, 2])]) rhs = DataArray([2, 3, 4], [("x", [1, 2, 3])]) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index f79a56e19e9..6a5f57547fa 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -4209,6 +4209,25 @@ class CustomIndex(PandasIndex): actual = ds.assign_coords(y=[4, 5, 6]) assert isinstance(actual.xindexes["x"], CustomIndex) + def test_assign_coords_custom_index(self) -> None: + class CustomIndex(Index): + pass + + coords = Coordinates( + coords={"x": ("x", [1, 2, 3])}, indexes={"x": CustomIndex()} + ) + ds = Dataset() + actual = ds.assign_coords(coords) + assert isinstance(actual.xindexes["x"], CustomIndex) + + def test_assign_coords_no_default_index(self) -> None: + coords = Coordinates({"y": ("y", [1, 2, 3])}) + ds = Dataset() + actual = ds.assign_coords(coords) + expected = coords.to_dataset() + assert_identical(expected, actual, check_default_indexes=False) + assert "y" not in actual.xindexes + def test_merge_multiindex_level(self) -> None: data = create_test_multiindex() From 9fc49fff305751100f1e21e17d480b099919384d Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Tue, 13 Dec 2022 17:28:42 +0100 Subject: [PATCH 41/69] fix mypy --- xarray/tests/test_coordinates.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xarray/tests/test_coordinates.py b/xarray/tests/test_coordinates.py index 7ea23b43528..eea6b2ed569 100644 --- a/xarray/tests/test_coordinates.py +++ b/xarray/tests/test_coordinates.py @@ -104,9 +104,9 @@ def test_merge_coords(self, coords) -> None: expected = coords.merge(other).coords assert_identical(actual.to_dataset(), expected.to_dataset()) - other = Coordinates(other) - actual = coords.merge_coords(other) - expected = coords.merge(other).coords + other_coords = Coordinates(other) + actual = coords.merge_coords(other_coords) + expected = coords.merge(other_coords).coords assert_identical(actual, expected, check_default_indexes=False) assert "y" not in actual.xindexes From 7873c77c8cbbbeb49fa99638c9e60ccac5d4fb66 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Tue, 13 Dec 2022 17:28:54 +0100 Subject: [PATCH 42/69] update what's new --- doc/whats-new.rst | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index b66a0239fd3..37eb875ae78 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -23,6 +23,18 @@ v2022.12.1 (unreleased) New Features ~~~~~~~~~~~~ +- :py:class:`Coordinates` can now be constructed independently of any Dataset or + DataArray (it is still returned from :py:attr:`Dataset.coords` and + :py:attrs:`DataArray.coords` properties). ``Coordinates`` objects are useful for + passing both coordinate variables and indexes to new Dataset / DataArray objects, + e.g., via their constructor or via :py:meth:`Dataset.assign_coords`. It is also + useful to wrap coordinate variables in a ``Coordinates`` object in order to skip + the creation of default (pandas) indexes for dimension coordinates. + The :py:class:`Coordinates.from_pandas_multiindex` constructor may be used to + create coordinates directly from a :py:class:`pandas.MultiIndex` object (it is + preferred over passing it directly as coordinate data, which may be deprecated soon). + (:issue:`6392`, :pull:`7368`). + By `BenoƮt Bovy `_. Breaking changes ~~~~~~~~~~~~~~~~ From f7ec33e0e913a8cd604735a8fd25772459ee0329 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Tue, 13 Dec 2022 17:58:11 +0100 Subject: [PATCH 43/69] do not copy indexes May corrupt multi-coordinate indexes. --- xarray/core/coordinates.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index 241f4ef02ee..fc036f61f9d 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -534,11 +534,13 @@ def copy( self, deep: bool = False, memo: dict[int, Any] | None = None ) -> Coordinates: """Return a copy of this Coordinates object.""" + # do not copy indexes (may corrupt multi-coordinate indexes) + # TODO: disable variables deepcopy? it may also be problematic when they + # encapsulate index objects like pd.Index variables = { k: v._copy(deep=deep, memo=memo) for k, v in self.variables.items() } - indexes = {k: v._copy(deep=deep, memo=memo) for k, v in self.xindexes.items()} - return Coordinates(coords=variables, indexes=indexes) + return Coordinates(coords=variables, indexes=self.xindexes) class DatasetCoordinates(Coordinates): From b1a96882af8dec28282cf94c08197932b06cfedd Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Wed, 14 Dec 2022 13:21:02 +0100 Subject: [PATCH 44/69] add Coordinates fastpath constructor --- xarray/core/coordinates.py | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index fc036f61f9d..eb3c9fe3339 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -265,6 +265,24 @@ def __init__( coord_names=set(variables), variables=variables, indexes=indexes ) + @classmethod + def _construct_direct( + cls, + coords: dict[Any, Variable], + indexes: dict[Any, Index], + dims: dict[Any, int] | None = None, + ) -> Coordinates: + from xarray.core.dataset import Dataset + + obj = object.__new__(cls) + obj._data = Dataset._construct_direct( + coord_names=set(coords), + variables=coords, + indexes=indexes, + dims=dims, + ) + return obj + @classmethod def from_pandas_multiindex(cls, midx: pd.MultiIndex, dim: str) -> Coordinates: """Wrap a pandas multi-index as Xarray coordinates (dimension + levels). @@ -540,7 +558,9 @@ def copy( variables = { k: v._copy(deep=deep, memo=memo) for k, v in self.variables.items() } - return Coordinates(coords=variables, indexes=self.xindexes) + return Coordinates._construct_direct( + coords=variables, indexes=dict(self.xindexes), dims=dict(self.sizes) + ) class DatasetCoordinates(Coordinates): @@ -827,6 +847,6 @@ def create_coords_with_default_indexes( updated_coords.update(idx_vars) all_variables.update(idx_vars) else: - updated_coords[name] = obj + updated_coords[name] = variable - return Coordinates(coords=updated_coords, indexes=indexes) + return Coordinates._construct_direct(coords=updated_coords, indexes=indexes) From 38fdf1eb6cd43d10daf46685285a57102ded4c2f Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Wed, 14 Dec 2022 13:54:51 +0100 Subject: [PATCH 45/69] fix sphinx directive --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 7d343965232..88b8a39308a 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -25,7 +25,7 @@ New Features - :py:class:`Coordinates` can now be constructed independently of any Dataset or DataArray (it is still returned from :py:attr:`Dataset.coords` and - :py:attrs:`DataArray.coords` properties). ``Coordinates`` objects are useful for + :py:attr:`DataArray.coords` properties). ``Coordinates`` objects are useful for passing both coordinate variables and indexes to new Dataset / DataArray objects, e.g., via their constructor or via :py:meth:`Dataset.assign_coords`. It is also useful to wrap coordinate variables in a ``Coordinates`` object in order to skip From d9e9e341017329a40f59715141ab63dbf1a70021 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Wed, 14 Dec 2022 16:18:21 +0100 Subject: [PATCH 46/69] re-add coord indexes in merge (dataset constructor) This re-enables the optimization in deep_align that skips alignment for any alignable (DataArray) in a dict that matches an index key. --- xarray/core/dataset.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index d7a65c856a0..02c954ea0e3 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -377,6 +377,7 @@ def merge_data_and_coords(data_vars, coords): compat="broadcast_equals", join="outer", explicit_coords=tuple(coords), + indexes=coords.xindexes, ) From 3999efff76b4d61af49d275d8c6b00101b7ce1cc Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Wed, 14 Dec 2022 19:38:02 +0100 Subject: [PATCH 47/69] create coords with default idx: try a cleaner impl Coordinate variables and indexes extracted from DataArrays should be merged more properly. --- xarray/core/coordinates.py | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index eb3c9fe3339..39e9db1ea84 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -817,7 +817,7 @@ def create_coords_with_default_indexes( all_variables.update(data_vars) indexes = {} - updated_coords = {} + variables = {} # this is needed for backward compatibility: when a pandas multi-index # is given as data variable, it is promoted as index / level coordinates @@ -828,25 +828,31 @@ def create_coords_with_default_indexes( if k in coords or isinstance(v, pd.MultiIndex) } + dataarray_coords = [] + for name, obj in index_vars.items(): if isinstance(obj, DataArray): - # extract all coords/indexes from DataArray objects - # except if explicitly overwritten by DataArray data - for k, v in obj._coords.items(): - if k != name: - updated_coords[k] = v.copy() - if k in obj._indexes: - if name not in obj.xindexes.get_all_coords(k): - indexes[k] = obj._indexes[k] + dataarray_coords.append(obj.coords) variable = as_variable(obj, name=name) if variable.dims == (name,): idx, idx_vars = create_default_index_implicit(variable, all_variables) indexes.update({k: idx for k in idx_vars}) - updated_coords.update(idx_vars) + variables.update(idx_vars) all_variables.update(idx_vars) else: - updated_coords[name] = variable + variables[name] = variable + + new_coords = Coordinates._construct_direct(coords=variables, indexes=indexes) + + # extract and merge coordinates and indexes from input DataArrays + if dataarray_coords: + prioritized = {k: (v, indexes.get(k, None)) for k, v in variables.items()} + variables, indexes = merge_coordinates_without_align( + dataarray_coords + [new_coords], + prioritized=prioritized, + ) + new_coords = Coordinates._construct_direct(coords=variables, indexes=indexes) - return Coordinates._construct_direct(coords=updated_coords, indexes=indexes) + return new_coords From d5d823360b5067d0a1cc5727bda5fdb3867f1100 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Wed, 14 Dec 2022 20:28:19 +0100 Subject: [PATCH 48/69] some useful comments for later --- xarray/core/coordinates.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index 39e9db1ea84..30597485cd4 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -810,6 +810,12 @@ def create_coords_with_default_indexes( coords: Mapping[Any, Any], data_vars: Mapping[Any, Variable] | None = None ) -> Coordinates: """Maybe create default indexes from a mapping of coordinates.""" + + # Note: data_vars are needed here only because a pd.MultiIndex object + # can be promoted as coordinates. + # TODO: It won't be relevant anymore when this behavior will be dropped + # in favor of the more explicit ``Coordinates.from_pandas_multiindex()``. + from xarray.core.dataarray import DataArray all_variables = dict(coords) @@ -821,7 +827,6 @@ def create_coords_with_default_indexes( # this is needed for backward compatibility: when a pandas multi-index # is given as data variable, it is promoted as index / level coordinates - # TODO: depreciate this implicit behavior index_vars = { k: v for k, v in all_variables.items() From d2fcaa3ef0c6800769a8c09d710763e5fc277bcc Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Wed, 14 Dec 2022 21:48:24 +0100 Subject: [PATCH 49/69] xr.merge: add support for Coordinates objects --- xarray/core/merge.py | 9 +++++++-- xarray/tests/test_merge.py | 7 +++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/xarray/core/merge.py b/xarray/core/merge.py index f84fc636710..405146f2cfb 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -964,18 +964,23 @@ def merge( combine_nested combine_by_coords """ + + from xarray.core.coordinates import Coordinates from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset dict_like_objects = [] for obj in objects: - if not isinstance(obj, (DataArray, Dataset, dict)): + if not isinstance(obj, (DataArray, Dataset, Coordinates, dict)): raise TypeError( "objects must be an iterable containing only " "Dataset(s), DataArray(s), and dictionaries." ) - obj = obj.to_dataset(promote_attrs=True) if isinstance(obj, DataArray) else obj + if isinstance(obj, DataArray): + obj = obj.to_dataset(promote_attrs=True) + elif isinstance(obj, Coordinates): + obj = obj.to_dataset() dict_like_objects.append(obj) merge_result = merge_core( diff --git a/xarray/tests/test_merge.py b/xarray/tests/test_merge.py index 8957f9c829a..63449708a79 100644 --- a/xarray/tests/test_merge.py +++ b/xarray/tests/test_merge.py @@ -235,6 +235,13 @@ def test_merge_dicts_dims(self): expected = xr.Dataset({"x": [12], "y": ("x", [13])}) assert_identical(actual, expected) + def test_merge_coordinates(self): + coords1 = xr.Coordinates({"x": ("x", [0, 1, 2])}) + coords2 = xr.Coordinates({"y": ("y", [3, 4, 5])}) + expected = xr.Dataset(coords={"x": [0, 1, 2], "y": [3, 4, 5]}) + actual = xr.merge([coords1, coords2]) + assert_identical(actual, expected) + def test_merge_error(self): ds = xr.Dataset({"x": 0}) with pytest.raises(xr.MergeError): From 193dad3393565b6c007c0eb0a2d47b5ade874571 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Thu, 15 Dec 2022 10:15:18 +0100 Subject: [PATCH 50/69] allow skip align for object(s) in merge_core This fixes the decrease in performance observed in Dataset creation benchmarks. When creating a new Dataset, the variables and indexes in `Coordinates` should already be aligned together so it doesn't need to go through the complex alignment logic once again. `Coordinates` indexes are still used to align data variables. --- xarray/core/dataset.py | 4 ++++ xarray/core/merge.py | 13 +++++++++++++ 2 files changed, 17 insertions(+) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 02c954ea0e3..c6618c80ee7 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -372,12 +372,16 @@ def merge_data_and_coords(data_vars, coords): else: coords = create_coords_with_default_indexes(coords, data_vars) + # exclude coords from alignment (all variables in a Coordinates object should + # already be aligned together) and use coordinates' indexes to align data_vars return merge_core( [data_vars, coords], compat="broadcast_equals", join="outer", explicit_coords=tuple(coords), indexes=coords.xindexes, + priority_arg=1, + skip_align_args=[1], ) diff --git a/xarray/core/merge.py b/xarray/core/merge.py index 405146f2cfb..d18d06cc41d 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -658,6 +658,7 @@ def merge_core( explicit_coords: Sequence | None = None, indexes: Mapping[Any, Any] | None = None, fill_value: object = dtypes.NA, + skip_align_args: list[int] | None = None, ) -> _MergeResult: """Core logic for merging labeled objects. @@ -683,6 +684,8 @@ def merge_core( may be cast to pandas.Index objects. fill_value : scalar, optional Value to use for newly missing values + skip_align_args : list of int, optional + Optional arguments in `objects` that are not included in alignment. Returns ------- @@ -704,10 +707,20 @@ def merge_core( _assert_compat_valid(compat) + objects = list(objects) + if skip_align_args is None: + skip_align_args = [] + + skip_align_objs = [(pos, objects.pop(pos)) for pos in skip_align_args] + coerced = coerce_pandas_values(objects) aligned = deep_align( coerced, join=join, copy=False, indexes=indexes, fill_value=fill_value ) + + for pos, obj in skip_align_objs: + aligned.insert(pos, obj) + collected = collect_variables_and_indexes(aligned, indexes=indexes) prioritized = _get_priority_vars_and_indexes(aligned, priority_arg, compat=compat) variables, out_indexes = merge_collected( From 84c77a4626e6342c4c0cf6b31b78198bc645d389 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Thu, 15 Dec 2022 10:37:23 +0100 Subject: [PATCH 51/69] fix mypy --- xarray/core/coordinates.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index 30597485cd4..493671d75a6 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -822,8 +822,8 @@ def create_coords_with_default_indexes( if data_vars is not None: all_variables.update(data_vars) - indexes = {} - variables = {} + indexes: dict[Hashable, Index] = {} + variables: dict[Hashable, Variable] = {} # this is needed for backward compatibility: when a pandas multi-index # is given as data variable, it is promoted as index / level coordinates From 5e82d61cee1045ab2c989e6418495d8b53dfaf41 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Thu, 15 Dec 2022 10:47:07 +0100 Subject: [PATCH 52/69] what's new tweaks --- doc/whats-new.rst | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 88b8a39308a..bd7ee741964 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -24,15 +24,17 @@ New Features ~~~~~~~~~~~~ - :py:class:`Coordinates` can now be constructed independently of any Dataset or - DataArray (it is still returned from :py:attr:`Dataset.coords` and + DataArray (it is also returned by the :py:attr:`Dataset.coords` and :py:attr:`DataArray.coords` properties). ``Coordinates`` objects are useful for passing both coordinate variables and indexes to new Dataset / DataArray objects, - e.g., via their constructor or via :py:meth:`Dataset.assign_coords`. It is also - useful to wrap coordinate variables in a ``Coordinates`` object in order to skip - the creation of default (pandas) indexes for dimension coordinates. + e.g., via their constructor or via :py:meth:`Dataset.assign_coords`. We may also + wrap coordinate variables in a ``Coordinates`` object in order to skip + the automatic creation of (pandas) indexes for dimension coordinates. The :py:class:`Coordinates.from_pandas_multiindex` constructor may be used to create coordinates directly from a :py:class:`pandas.MultiIndex` object (it is preferred over passing it directly as coordinate data, which may be deprecated soon). + Like Dataset and DataArray objects, ``Coordinates`` objects may now be used in + :py:func:`align` and :py:func:`merge`. (:issue:`6392`, :pull:`7368`). By `BenoƮt Bovy `_. From c6409fd01192a04c13a5df05a4053c124971da90 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Thu, 15 Dec 2022 10:55:43 +0100 Subject: [PATCH 53/69] align Coordinates callbacks: don't reindex data vars --- xarray/core/coordinates.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index 493671d75a6..14916ff4d63 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -517,7 +517,7 @@ def _overwrite_indexes( drop_coords: list[Hashable] | None = None, rename_dims: Mapping[Any, Any] | None = None, ) -> Coordinates: - results = self._data._overwrite_indexes( + results = self.to_dataset()._overwrite_indexes( indexes, coords, drop_coords, rename_dims ) return results.coords @@ -533,7 +533,7 @@ def _reindex_callback( exclude_vars: frozenset[Hashable], ) -> Coordinates: """Callback called from ``Aligner`` to create a new reindexed Coordinates.""" - aligned = self._data._reindex_callback( + aligned = self.to_dataset()._reindex_callback( aligner, dim_pos_indexers, variables, From 39294fcc17a208c0f17e35afe9a9705e1d715e98 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Thu, 15 Dec 2022 11:13:55 +0100 Subject: [PATCH 54/69] fix Coordinates._overwrite_indexes callback mypy was rightfully complaining. This callback is called from Aligner only, which passes the first two arguments and ignores the rest. --- xarray/core/coordinates.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index 14916ff4d63..7e0a2530c57 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -514,12 +514,8 @@ def _overwrite_indexes( self, indexes: Mapping[Any, Index], coords: Mapping[Any, Variable] | None = None, - drop_coords: list[Hashable] | None = None, - rename_dims: Mapping[Any, Any] | None = None, ) -> Coordinates: - results = self.to_dataset()._overwrite_indexes( - indexes, coords, drop_coords, rename_dims - ) + results = self.to_dataset()._overwrite_indexes(indexes, coords) return results.coords def _reindex_callback( From 8c65f85813d904c11faee865c0be08a02def055b Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Fri, 13 Jan 2023 15:09:55 +0100 Subject: [PATCH 55/69] remove merge_coords --- xarray/core/coordinates.py | 31 ------------------------------- xarray/tests/test_coordinates.py | 12 ------------ 2 files changed, 43 deletions(-) diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index 7e0a2530c57..2242a27572b 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -456,37 +456,6 @@ def merge(self, other: Mapping[Any, Any] | None) -> Dataset: variables=coords, coord_names=coord_names, indexes=indexes ) - def merge_coords(self, other: Mapping[Any, Any] | None = None) -> Coordinates: - """Merge two sets of coordinates to create a new :py:class:`Coordinates` - object. - - The method implements the logic used for joining coordinates in the - result of a binary operation performed on xarray objects: - - - If two index coordinates conflict (are not equal), an exception is - raised. You must align your data before passing it to this method. - - If an index coordinate and a non-index coordinate conflict, the non- - index coordinate is dropped. - - If two non-index coordinates conflict, both are dropped. - - Parameters - ---------- - other : dict-like, optional - A :py:class:`Coordinates` object or any mapping that can be turned - into coordinates. - - Returns - ------- - merged : Coordinates - A new Coordinates object with merged coordinates. - """ - from xarray.core.dataset import Dataset - - if not isinstance(other, Coordinates): - other = Dataset(coords=other).coords - - return self.merge(other).coords - def __setitem__(self, key: Hashable, value: Any) -> None: self.update({key: value}) diff --git a/xarray/tests/test_coordinates.py b/xarray/tests/test_coordinates.py index eea6b2ed569..26cecf56959 100644 --- a/xarray/tests/test_coordinates.py +++ b/xarray/tests/test_coordinates.py @@ -98,18 +98,6 @@ def test_identical(self, coords): assert coords.identical(coords) assert not coords.identical("no_a_coords") - def test_merge_coords(self, coords) -> None: - other = {"y": ("y", [4, 5, 6])} - actual = coords.merge_coords(other) - expected = coords.merge(other).coords - assert_identical(actual.to_dataset(), expected.to_dataset()) - - other_coords = Coordinates(other) - actual = coords.merge_coords(other_coords) - expected = coords.merge(other_coords).coords - assert_identical(actual, expected, check_default_indexes=False) - assert "y" not in actual.xindexes - def test_copy(self) -> None: no_index_coords = Coordinates({"foo": ("x", [1, 2, 3])}) copied = no_index_coords.copy() From cf6fcbb19c6ffb4274724607455d67a6fc5c8af3 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Fri, 13 Jan 2023 15:36:43 +0100 Subject: [PATCH 56/69] futurewarning: pass multi-index via data vars --- xarray/core/coordinates.py | 25 +++++++++++++++++++++---- xarray/tests/test_dataset.py | 5 +++++ 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index 2242a27572b..a2d878983a1 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -790,9 +790,26 @@ def create_coords_with_default_indexes( indexes: dict[Hashable, Index] = {} variables: dict[Hashable, Variable] = {} - # this is needed for backward compatibility: when a pandas multi-index - # is given as data variable, it is promoted as index / level coordinates - index_vars = { + maybe_index_vars: dict[Hashable, Variable] = {} + mindex_data_vars: list[Hashable] = [] + + for k, v in all_variables.items(): + if k in coords: + maybe_index_vars[k] = v + elif isinstance(v, pd.MultiIndex): + # TODO: eventually stop promoting multi-index passed via data variables + mindex_data_vars.append(k) + maybe_index_vars[k] = v + + if mindex_data_vars: + warnings.warn( + f"passing one or more `pandas.MultiIndex` via data variable(s) {mindex_data_vars} " + "will no longer create indexed coordinates in the future. " + "If you want to keep this behavior, pass it as coordinates instead.", + FutureWarning, + ) + + maybe_index_vars = { k: v for k, v in all_variables.items() if k in coords or isinstance(v, pd.MultiIndex) @@ -800,7 +817,7 @@ def create_coords_with_default_indexes( dataarray_coords = [] - for name, obj in index_vars.items(): + for name, obj in maybe_index_vars.items(): if isinstance(obj, DataArray): dataarray_coords.append(obj.coords) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index eb9b4024e16..105bb9da234 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -590,6 +590,11 @@ def test_constructor_multiindex(self) -> None: ds = Dataset(coords=coords) assert_identical(ds, coords.to_dataset()) + with pytest.warns( + FutureWarning, match=".*`pandas.MultiIndex` via data variable.*" + ): + Dataset(data_vars={"x": midx}) + def test_constructor_custom_index(self) -> None: class CustomIndex(Index): ... From 6a6444f81a3db97813cf56966b308af9b958c254 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Fri, 13 Jan 2023 15:52:23 +0100 Subject: [PATCH 57/69] review comments --- xarray/core/coordinates.py | 4 +-- xarray/testing.py | 14 ++++------ xarray/tests/test_coordinates.py | 47 ++++++++++++++++++++++---------- 3 files changed, 40 insertions(+), 25 deletions(-) diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index a2d878983a1..86c1aed4d27 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -254,7 +254,7 @@ def __init__( for k, idx in indexes.items(): if not isinstance(idx, Index): - raise TypeError(f"'{k}' is not an Xarray Index") + raise TypeError(f"'{k}' is not an `xarray.indexes.Index` object") # maybe convert to base variable for k, v in variables.items(): @@ -815,7 +815,7 @@ def create_coords_with_default_indexes( if k in coords or isinstance(v, pd.MultiIndex) } - dataarray_coords = [] + dataarray_coords: list[DataArrayCoordinates] = [] for name, obj in maybe_index_vars.items(): if isinstance(obj, DataArray): diff --git a/xarray/testing.py b/xarray/testing.py index 7ca87471436..9cbd804200e 100644 --- a/xarray/testing.py +++ b/xarray/testing.py @@ -79,10 +79,9 @@ def assert_equal(a, b): numpy.testing.assert_array_equal """ __tracebackhide__ = True - try: - assert type(a) == type(b) - except AssertionError: - assert isinstance(a, Coordinates) and isinstance(b, Coordinates) + assert ( + type(a) == type(b) or isinstance(a, Coordinates) and isinstance(b, Coordinates) + ) if isinstance(a, (Variable, DataArray)): assert a.equals(b), formatting.diff_array_repr(a, b, "equals") elif isinstance(a, Dataset): @@ -112,10 +111,9 @@ def assert_identical(a, b): assert_equal, assert_allclose, Dataset.equals, DataArray.equals """ __tracebackhide__ = True - try: - assert type(a) == type(b) - except AssertionError: - assert isinstance(a, Coordinates) and isinstance(b, Coordinates) + assert ( + type(a) == type(b) or isinstance(a, Coordinates) and isinstance(b, Coordinates) + ) if isinstance(a, Variable): assert a.identical(b), formatting.diff_array_repr(a, b, "identical") elif isinstance(a, DataArray): diff --git a/xarray/tests/test_coordinates.py b/xarray/tests/test_coordinates.py index 26cecf56959..bf68a5c1838 100644 --- a/xarray/tests/test_coordinates.py +++ b/xarray/tests/test_coordinates.py @@ -12,11 +12,6 @@ class TestCoordinates: - @pytest.fixture - def coords(self) -> Coordinates: - ds = Dataset(coords={"x": [0, 1, 2]}) - return Coordinates(coords=ds.coords, indexes=ds.xindexes) - def test_init_noindex(self) -> None: coords = Coordinates(coords={"foo": ("x", [0, 1, 2])}) expected = Dataset(coords={"foo": ("x", [0, 1, 2])}) @@ -44,7 +39,7 @@ def test_init_index_error(self) -> None: with pytest.raises(ValueError, match="no coordinate variables found"): Coordinates(indexes={"x": idx}) - with pytest.raises(TypeError, match=".* is not an Xarray Index"): + with pytest.raises(TypeError, match=".* is not an `xarray.indexes.Index`"): Coordinates(coords={"x": ("x", [1, 2, 3])}, indexes={"x": "not_an_xarray_index"}) # type: ignore def test_init_dim_sizes_conflict(self) -> None: @@ -64,37 +59,56 @@ def test_from_pandas_multiindex(self) -> None: for name in ("x", "one", "two"): assert_identical(expected[name], coords.variables[name]) - def test_dims(self, coords) -> None: + def test_dims(self) -> None: + _ds = Dataset(coords={"x": [0, 1, 2]}) + coords = Coordinates(coords=_ds.coords, indexes=_ds.xindexes) assert coords.dims == {"x": 3} - def test_sizes(self, coords) -> None: + def test_sizes(self) -> None: + _ds = Dataset(coords={"x": [0, 1, 2]}) + coords = Coordinates(coords=_ds.coords, indexes=_ds.xindexes) assert coords.sizes == {"x": 3} - def test_dtypes(self, coords) -> None: + def test_dtypes(self) -> None: + _ds = Dataset(coords={"x": [0, 1, 2]}) + coords = Coordinates(coords=_ds.coords, indexes=_ds.xindexes) assert coords.dtypes == {"x": int} - def test_getitem(self, coords) -> None: + def test_getitem(self) -> None: + _ds = Dataset(coords={"x": [0, 1, 2]}) + coords = Coordinates(coords=_ds.coords, indexes=_ds.xindexes) assert_identical( coords["x"], DataArray([0, 1, 2], coords={"x": [0, 1, 2]}, name="x"), ) - def test_delitem(self, coords) -> None: + def test_delitem(self) -> None: + _ds = Dataset(coords={"x": [0, 1, 2]}) + coords = Coordinates(coords=_ds.coords, indexes=_ds.xindexes) del coords["x"] assert "x" not in coords - def test_update(self, coords) -> None: + def test_update(self) -> None: + _ds = Dataset(coords={"x": [0, 1, 2]}) + coords = Coordinates(coords=_ds.coords, indexes=_ds.xindexes) + coords.update({"y": ("y", [4, 5, 6])}) assert "y" in coords assert "y" in coords.xindexes expected = DataArray([4, 5, 6], coords={"y": [4, 5, 6]}, name="y") assert_identical(coords["y"], expected) - def test_equals(self, coords): + def test_equals(self): + _ds = Dataset(coords={"x": [0, 1, 2]}) + coords = Coordinates(coords=_ds.coords, indexes=_ds.xindexes) + assert coords.equals(coords) assert not coords.equals("no_a_coords") - def test_identical(self, coords): + def test_identical(self): + _ds = Dataset(coords={"x": [0, 1, 2]}) + coords = Coordinates(coords=_ds.coords, indexes=_ds.xindexes) + assert coords.identical(coords) assert not coords.identical("no_a_coords") @@ -114,7 +128,10 @@ def test_copy(self) -> None: assert v0 is not v1 assert source_ndarray(v0.data) is not source_ndarray(v1.data) - def test_align(self, coords) -> None: + def test_align(self) -> None: + _ds = Dataset(coords={"x": [0, 1, 2]}) + coords = Coordinates(coords=_ds.coords, indexes=_ds.xindexes) + left = coords # test Coordinates._reindex_callback From 1759ac9724f5d7b1b80e77348ae401376d043a76 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 14 Jul 2023 17:05:13 +0000 Subject: [PATCH 58/69] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/core/coordinates.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index bd22c0ac891..b870924efec 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -7,11 +7,6 @@ TYPE_CHECKING, Any, Generic, - Hashable, - Iterator, - List, - Mapping, - Sequence, ) import numpy as np From 48f695090e8be2374848bc682229c42a7c7396d6 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Fri, 14 Jul 2023 19:27:11 +0200 Subject: [PATCH 59/69] Fix circulat imports --- xarray/core/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/common.py b/xarray/core/common.py index 9f2bac5ad27..5d9630d9240 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -14,7 +14,6 @@ from xarray.core.indexing import BasicIndexer, ExplicitlyIndexed from xarray.core.options import OPTIONS, _get_keep_attrs from xarray.core.parallelcompat import get_chunked_array_type, guess_chunkmanager -from xarray.core.pdcompat import _convert_base_to_offset from xarray.core.pycompat import is_chunked_array from xarray.core.utils import ( Frozen, @@ -961,6 +960,7 @@ def _resample( from xarray.core.dataarray import DataArray from xarray.core.groupby import ResolvedTimeResampleGrouper, TimeResampleGrouper from xarray.core.resample import RESAMPLE_DIM + from xarray.core.pdcompat import _convert_base_to_offset if keep_attrs is not None: warnings.warn( From fa384f7f7a253ec6a32aec07e51e33d69d41f969 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 14 Jul 2023 17:28:38 +0000 Subject: [PATCH 60/69] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/core/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/common.py b/xarray/core/common.py index 5d9630d9240..2bd91b56bda 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -959,8 +959,8 @@ def _resample( from xarray.core.dataarray import DataArray from xarray.core.groupby import ResolvedTimeResampleGrouper, TimeResampleGrouper - from xarray.core.resample import RESAMPLE_DIM from xarray.core.pdcompat import _convert_base_to_offset + from xarray.core.resample import RESAMPLE_DIM if keep_attrs is not None: warnings.warn( From 7628cb25ac3d0e9e4a1a873e09307b95a85d3f3d Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Mon, 17 Jul 2023 21:02:56 +0200 Subject: [PATCH 61/69] typing: add Alignable protocol class --- xarray/core/alignment.py | 44 ++++++++++---------- xarray/core/coordinates.py | 43 +++++++++++--------- xarray/core/dataarray.py | 8 ++-- xarray/core/dataset.py | 4 +- xarray/core/types.py | 83 +++++++++++++++++++++++++++++++------- 5 files changed, 120 insertions(+), 62 deletions(-) diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index bd403093d1c..39ff878b56d 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -5,13 +5,12 @@ from collections import defaultdict from collections.abc import Hashable, Iterable, Mapping from contextlib import suppress -from typing import TYPE_CHECKING, Any, Callable, Generic, TypeVar, cast +from typing import TYPE_CHECKING, Any, Callable, Generic, cast import numpy as np import pandas as pd from xarray.core import dtypes -from xarray.core.common import DataWithCoords from xarray.core.indexes import ( Index, Indexes, @@ -20,15 +19,14 @@ indexes_all_equal, safe_cast_to_index, ) +from xarray.core.types import T_Alignable from xarray.core.utils import is_dict_like, is_full_slice from xarray.core.variable import Variable, as_compatible_data, calculate_dimensions if TYPE_CHECKING: from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset - from xarray.core.types import JoinOptions, T_DataArray, T_Dataset, T_DataWithCoords - -DataAlignable = TypeVar("DataAlignable", bound=DataWithCoords) + from xarray.core.types import JoinOptions, T_DataArray, T_Dataset def reindex_variables( @@ -92,7 +90,7 @@ def reindex_variables( NormalizedIndexVars = dict[MatchingIndexKey, dict[Hashable, Variable]] -class Aligner(Generic[DataAlignable]): +class Aligner(Generic[T_Alignable]): """Implements all the complex logic for the re-indexing and alignment of Xarray objects. @@ -105,8 +103,8 @@ class Aligner(Generic[DataAlignable]): """ - objects: tuple[DataAlignable, ...] - results: tuple[DataAlignable, ...] + objects: tuple[T_Alignable, ...] + results: tuple[T_Alignable, ...] objects_matching_indexes: tuple[dict[MatchingIndexKey, Index], ...] join: str exclude_dims: frozenset[Hashable] @@ -127,7 +125,7 @@ class Aligner(Generic[DataAlignable]): def __init__( self, - objects: Iterable[DataAlignable], + objects: Iterable[T_Alignable], join: str = "inner", indexes: Mapping[Any, Any] | None = None, exclude_dims: Iterable = frozenset(), @@ -510,7 +508,7 @@ def _get_dim_pos_indexers( def _get_indexes_and_vars( self, - obj: DataAlignable, + obj: T_Alignable, matching_indexes: dict[MatchingIndexKey, Index], ) -> tuple[dict[Hashable, Index], dict[Hashable, Variable]]: new_indexes = {} @@ -533,9 +531,9 @@ def _get_indexes_and_vars( def _reindex_one( self, - obj: DataAlignable, + obj: T_Alignable, matching_indexes: dict[MatchingIndexKey, Index], - ) -> DataAlignable: + ) -> T_Alignable: new_indexes, new_variables = self._get_indexes_and_vars(obj, matching_indexes) dim_pos_indexers = self._get_dim_pos_indexers(matching_indexes) @@ -579,13 +577,13 @@ def align(self) -> None: def align( - *objects: DataAlignable, + *objects: T_Alignable, join: JoinOptions = "inner", copy: bool = True, indexes=None, exclude=frozenset(), fill_value=dtypes.NA, -) -> tuple[DataAlignable, ...]: +) -> tuple[T_Alignable, ...]: """ Given any number of Dataset and/or DataArray objects, returns new objects with aligned indexes and dimension sizes. @@ -865,7 +863,7 @@ def is_alignable(obj): def reindex( - obj: DataAlignable, + obj: T_Alignable, indexers: Mapping[Any, Any], method: str | None = None, tolerance: int | float | Iterable[int | float] | None = None, @@ -873,7 +871,7 @@ def reindex( fill_value: Any = dtypes.NA, sparse: bool = False, exclude_vars: Iterable[Hashable] = frozenset(), -) -> DataAlignable: +) -> T_Alignable: """Re-index either a Dataset or a DataArray. Not public API. @@ -904,13 +902,13 @@ def reindex( def reindex_like( - obj: DataAlignable, + obj: T_Alignable, other: Dataset | DataArray, method: str | None = None, tolerance: int | float | Iterable[int | float] | None = None, copy: bool = True, fill_value: Any = dtypes.NA, -) -> DataAlignable: +) -> T_Alignable: """Re-index either a Dataset or a DataArray like another Dataset/DataArray. Not public API. @@ -952,8 +950,8 @@ def _get_broadcast_dims_map_common_coords(args, exclude): def _broadcast_helper( - arg: T_DataWithCoords, exclude, dims_map, common_coords -) -> T_DataWithCoords: + arg: T_Alignable, exclude, dims_map, common_coords +) -> T_Alignable: from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset @@ -983,16 +981,16 @@ def _broadcast_dataset(ds: T_Dataset) -> T_Dataset: # remove casts once https://github.com/python/mypy/issues/12800 is resolved if isinstance(arg, DataArray): - return cast("T_DataWithCoords", _broadcast_array(arg)) + return cast(T_Alignable, _broadcast_array(arg)) elif isinstance(arg, Dataset): - return cast("T_DataWithCoords", _broadcast_dataset(arg)) + return cast(T_Alignable, _broadcast_dataset(arg)) else: raise ValueError("all input must be Dataset or DataArray objects") # TODO: this typing is too restrictive since it cannot deal with mixed # DataArray and Dataset types...? Is this a problem? -def broadcast(*args: T_DataWithCoords, exclude=None) -> tuple[T_DataWithCoords, ...]: +def broadcast(*args: T_Alignable, exclude=None) -> tuple[T_Alignable, ...]: """Explicitly broadcast any number of DataArray or Dataset objects against one another. diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index b870924efec..9033bb2a6a4 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -7,6 +7,7 @@ TYPE_CHECKING, Any, Generic, + cast, ) import numpy as np @@ -22,7 +23,7 @@ create_default_index_implicit, ) from xarray.core.merge import merge_coordinates_without_align, merge_coords -from xarray.core.types import T_DataArray +from xarray.core.types import T_Coordinates, T_DataArray from xarray.core.utils import Frozen, ReprObject from xarray.core.variable import Variable, as_variable, calculate_dimensions @@ -48,7 +49,7 @@ def _names(self) -> set[Hashable]: raise NotImplementedError() @property - def dims(self) -> Mapping[Hashable, int] | tuple[Hashable, ...]: + def dims(self) -> Frozen[Hashable, int] | tuple[Hashable, ...]: raise NotImplementedError() @property @@ -217,7 +218,7 @@ def __init__( self, coords: Mapping[Any, Any] | None = None, indexes: Mapping[Any, Index] | None = None, - ): + ) -> None: # When coordinates are constructed directly, an internal Dataset is # created so that it is compatible with the DatasetCoordinates and # DataArrayCoordinates classes serving as a proxy for the data. @@ -257,11 +258,11 @@ def __init__( @classmethod def _construct_direct( - cls, + cls: type[T_Coordinates], coords: dict[Any, Variable], indexes: dict[Any, Index], dims: dict[Any, int] | None = None, - ) -> Coordinates: + ) -> T_Coordinates: from xarray.core.dataset import Dataset obj = object.__new__(cls) @@ -274,7 +275,9 @@ def _construct_direct( return obj @classmethod - def from_pandas_multiindex(cls, midx: pd.MultiIndex, dim: str) -> Coordinates: + def from_pandas_multiindex( + cls: type[T_Coordinates], midx: pd.MultiIndex, dim: str + ) -> T_Coordinates: """Wrap a pandas multi-index as Xarray coordinates (dimension + levels). The returned coordinates can be directly assigned to a @@ -306,7 +309,7 @@ def _names(self) -> set[Hashable]: return self._data._coord_names @property - def dims(self) -> Mapping[Hashable, int] | tuple[Hashable, ...]: + def dims(self) -> Frozen[Hashable, int] | tuple[Hashable, ...]: """Mapping from dimension names to lengths or tuple of dimension names.""" return self._data.dims @@ -470,15 +473,15 @@ def update(self, other: Mapping[Any, Any]) -> None: self._update_coords(coords, indexes) def _overwrite_indexes( - self, + self: T_Coordinates, indexes: Mapping[Any, Index], - coords: Mapping[Any, Variable] | None = None, - ) -> Coordinates: - results = self.to_dataset()._overwrite_indexes(indexes, coords) - return results.coords + variables: Mapping[Any, Variable] | None = None, + ) -> T_Coordinates: + results = self.to_dataset()._overwrite_indexes(indexes, variables) + return cast(T_Coordinates, results.coords) def _reindex_callback( - self, + self: T_Coordinates, aligner: Aligner, dim_pos_indexers: dict[Hashable, Any], variables: dict[Hashable, Variable], @@ -486,7 +489,7 @@ def _reindex_callback( fill_value: Any, exclude_dims: frozenset[Hashable], exclude_vars: frozenset[Hashable], - ) -> Coordinates: + ) -> T_Coordinates: """Callback called from ``Aligner`` to create a new reindexed Coordinates.""" aligned = self.to_dataset()._reindex_callback( aligner, @@ -497,15 +500,17 @@ def _reindex_callback( exclude_dims, exclude_vars, ) - return aligned.coords + return cast(T_Coordinates, aligned.coords) def _ipython_key_completions_(self): """Provide method for the key-autocompletions in IPython.""" return self._data._ipython_key_completions_() def copy( - self, deep: bool = False, memo: dict[int, Any] | None = None - ) -> Coordinates: + self: T_Coordinates, + deep: bool = False, + memo: dict[int, Any] | None = None, + ) -> T_Coordinates: """Return a copy of this Coordinates object.""" # do not copy indexes (may corrupt multi-coordinate indexes) # TODO: disable variables deepcopy? it may also be problematic when they @@ -513,7 +518,7 @@ def copy( variables = { k: v._copy(deep=deep, memo=memo) for k, v in self.variables.items() } - return Coordinates._construct_direct( + return type(self)._construct_direct( coords=variables, indexes=dict(self.xindexes), dims=dict(self.sizes) ) @@ -538,7 +543,7 @@ def _names(self) -> set[Hashable]: return self._data._coord_names @property - def dims(self) -> Mapping[Hashable, int]: + def dims(self) -> Frozen[Hashable, int]: return self._data.dims @property diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index f7a5860017a..2d3694cad75 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -520,7 +520,7 @@ def _replace_maybe_drop_dims( def _overwrite_indexes( self: T_DataArray, indexes: Mapping[Any, Index], - coords: Mapping[Any, Variable] | None = None, + variables: Mapping[Any, Variable] | None = None, drop_coords: list[Hashable] | None = None, rename_dims: Mapping[Any, Any] | None = None, ) -> T_DataArray: @@ -528,8 +528,8 @@ def _overwrite_indexes( if not indexes: return self - if coords is None: - coords = {} + if variables is None: + variables = {} if drop_coords is None: drop_coords = [] @@ -538,7 +538,7 @@ def _overwrite_indexes( new_indexes = dict(self._indexes) for name in indexes: - new_coords[name] = coords[name] + new_coords[name] = variables[name] new_indexes[name] = indexes[name] for name in drop_coords: diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 660b3bcc8b3..d1a6702b646 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -3135,7 +3135,7 @@ def broadcast_like( ) def _reindex_callback( - self, + self: T_Dataset, aligner: alignment.Aligner, dim_pos_indexers: dict[Hashable, Any], variables: dict[Hashable, Variable], @@ -3143,7 +3143,7 @@ def _reindex_callback( fill_value: Any, exclude_dims: frozenset[Hashable], exclude_vars: frozenset[Hashable], - ) -> Dataset: + ) -> T_Dataset: """Callback called from ``Aligner`` to create a new reindexed Dataset.""" new_variables = variables.copy() diff --git a/xarray/core/types.py b/xarray/core/types.py index f3342071107..6afdd5b84a1 100644 --- a/xarray/core/types.py +++ b/xarray/core/types.py @@ -1,12 +1,13 @@ from __future__ import annotations import datetime -from collections.abc import Hashable, Iterable, Sequence +from collections.abc import Hashable, Iterable, Iterator, Mapping, Sequence from typing import ( TYPE_CHECKING, Any, Callable, Literal, + Protocol, SupportsIndex, TypeVar, Union, @@ -21,11 +22,14 @@ from numpy.typing import ArrayLike from xarray.backends.common import BackendEntrypoint + from xarray.core.alignment import Aligner from xarray.core.common import AbstractArray, DataWithCoords + from xarray.core.coordinates import Coordinates from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset from xarray.core.groupby import DataArrayGroupBy, GroupBy - from xarray.core.indexes import Index + from xarray.core.indexes import Index, Indexes + from xarray.core.utils import Frozen from xarray.core.variable import Variable try: @@ -43,18 +47,15 @@ except ImportError: ZarrArray = np.ndarray - # TODO: Turn on when https://github.com/python/mypy/issues/11871 is fixed. - # Can be uncommented if using pyright though. - # import sys - - # try: - # if sys.version_info >= (3, 11): - # from typing import Self - # else: - # from typing_extensions import Self - # except ImportError: - # Self: Any = None - Self: Any = None + import sys + + if sys.version_info >= (3, 11): + from typing import Self + else: + try: + from typing_extensions import Self + except ImportError: + Self: Any = None # Anything that can be coerced to a shape tuple _ShapeLike = Union[SupportsIndex, Sequence[SupportsIndex]] @@ -93,10 +94,63 @@ DTypeLikeSave: Any = None +class Alignable(Protocol): + """Represents any Xarray type that supports alignment. + + It may be ``Dataset``, ``DataArray`` or ``Coordinates``. This protocol class + is needed since those types do not all have a common base class. + + """ + + @property + def dims(self) -> Frozen[Hashable, int] | tuple[Hashable, ...]: + ... + + @property + def sizes(self) -> Frozen[Hashable, int]: + ... + + @property + def xindexes(self) -> Indexes[Index]: + ... + + def _reindex_callback( + self, + aligner: Aligner, + dim_pos_indexers: dict[Hashable, Any], + variables: dict[Hashable, Variable], + indexes: dict[Hashable, Index], + fill_value: Any, + exclude_dims: frozenset[Hashable], + exclude_vars: frozenset[Hashable], + ) -> Self: + ... + + def _overwrite_indexes( + self, + indexes: Mapping[Any, Index], + variables: Mapping[Any, Variable] | None = None, + ) -> Self: + ... + + def __len__(self) -> int: + ... + + def __iter__(self) -> Iterator[Hashable]: + ... + + def copy( + self, + deep: bool = False, + ) -> Self: + ... + + T_Backend = TypeVar("T_Backend", bound="BackendEntrypoint") T_Dataset = TypeVar("T_Dataset", bound="Dataset") T_DataArray = TypeVar("T_DataArray", bound="DataArray") T_Variable = TypeVar("T_Variable", bound="Variable") +T_Coordinates = TypeVar("T_Coordinates", bound="Coordinates") T_Array = TypeVar("T_Array", bound="AbstractArray") T_Index = TypeVar("T_Index", bound="Index") @@ -105,6 +159,7 @@ # Maybe we rename this to T_Data or something less Fortran-y? T_Xarray = TypeVar("T_Xarray", "DataArray", "Dataset") T_DataWithCoords = TypeVar("T_DataWithCoords", bound="DataWithCoords") +T_Alignable = TypeVar("T_Alignable", bound="Alignable") ScalarOrArray = Union["ArrayLike", np.generic, np.ndarray, "DaskArray"] DsCompatible = Union["Dataset", "DataArray", "Variable", "GroupBy", "ScalarOrArray"] From c8821f9c43ed8b3e03bb26582adc4ac94ba429f2 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Mon, 17 Jul 2023 21:28:08 +0200 Subject: [PATCH 62/69] try fixing mypy error (Self redefinition) --- xarray/core/types.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/xarray/core/types.py b/xarray/core/types.py index 6afdd5b84a1..fec257d9310 100644 --- a/xarray/core/types.py +++ b/xarray/core/types.py @@ -1,6 +1,7 @@ from __future__ import annotations import datetime +import sys from collections.abc import Hashable, Iterable, Iterator, Mapping, Sequence from typing import ( TYPE_CHECKING, @@ -17,6 +18,17 @@ import pandas as pd from packaging.version import Version +try: + if sys.version_info >= (3, 11): + from typing import Self + else: + from typing_extensions import Self +except ImportError: + if TYPE_CHECKING: + raise + else: + Self: Any = None + if TYPE_CHECKING: from numpy._typing import _SupportsDType from numpy.typing import ArrayLike @@ -47,16 +59,6 @@ except ImportError: ZarrArray = np.ndarray - import sys - - if sys.version_info >= (3, 11): - from typing import Self - else: - try: - from typing_extensions import Self - except ImportError: - Self: Any = None - # Anything that can be coerced to a shape tuple _ShapeLike = Union[SupportsIndex, Sequence[SupportsIndex]] _DTypeLikeNested = Any # TODO: wait for support for recursive types @@ -90,7 +92,6 @@ CFTimeDatetime = Any DatetimeLike = Union[pd.Timestamp, datetime.datetime, np.datetime64, CFTimeDatetime] else: - Self: Any = None DTypeLikeSave: Any = None From c71aadb19aa93fa4fe7770c7c4e2f7f9dc3dfcec Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Mon, 17 Jul 2023 21:30:27 +0200 Subject: [PATCH 63/69] remove Coordinate alias of Variable Much water has flowed under the bridge since it has been renamed. --- xarray/__init__.py | 2 +- xarray/core/variable.py | 4 ---- xarray/tests/test_variable.py | 7 +------ 3 files changed, 2 insertions(+), 11 deletions(-) diff --git a/xarray/__init__.py b/xarray/__init__.py index d726cf57924..492a152d71b 100644 --- a/xarray/__init__.py +++ b/xarray/__init__.py @@ -36,7 +36,7 @@ from xarray.core.merge import Context, MergeError, merge from xarray.core.options import get_options, set_options from xarray.core.parallel import map_blocks -from xarray.core.variable import Coordinate, IndexVariable, Variable, as_variable +from xarray.core.variable import IndexVariable, Variable, as_variable from xarray.util.print_versions import show_versions try: diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 9271d0c4dbd..06f0f6ff4f6 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -3138,10 +3138,6 @@ def _inplace_binary_op(self, other, f): ) -# for backwards compatibility -Coordinate = utils.alias(IndexVariable, "Coordinate") - - def _unified_dims(variables): # validate dimensions all_dims = {} diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index a6dffb82660..77a682f4a3b 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -12,7 +12,7 @@ import pytz from packaging.version import Version -from xarray import Coordinate, DataArray, Dataset, IndexVariable, Variable, set_options +from xarray import DataArray, Dataset, IndexVariable, Variable, set_options from xarray.core import dtypes, duck_array_ops, indexing from xarray.core.common import full_like, ones_like, zeros_like from xarray.core.indexing import ( @@ -2445,11 +2445,6 @@ def test_concat_str_dtype(self, dtype): assert actual.identical(expected) assert np.issubdtype(actual.dtype, dtype) - def test_coordinate_alias(self): - with pytest.warns(Warning, match="deprecated"): - x = Coordinate("x", [1, 2, 3]) - assert isinstance(x, IndexVariable) - def test_datetime64(self): # GH:1932 Make sure indexing keeps precision t = np.array([1518418799999986560, 1518418799999996560], dtype="datetime64[ns]") From 139b13a0b6e5192146610f59ad5f89429abf743e Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Mon, 17 Jul 2023 21:42:07 +0200 Subject: [PATCH 64/69] fix groupby test --- xarray/core/coordinates.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index 9033bb2a6a4..b583e67b460 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -507,10 +507,10 @@ def _ipython_key_completions_(self): return self._data._ipython_key_completions_() def copy( - self: T_Coordinates, + self, deep: bool = False, memo: dict[int, Any] | None = None, - ) -> T_Coordinates: + ) -> Coordinates: """Return a copy of this Coordinates object.""" # do not copy indexes (may corrupt multi-coordinate indexes) # TODO: disable variables deepcopy? it may also be problematic when they @@ -518,7 +518,7 @@ def copy( variables = { k: v._copy(deep=deep, memo=memo) for k, v in self.variables.items() } - return type(self)._construct_direct( + return Coordinates._construct_direct( coords=variables, indexes=dict(self.xindexes), dims=dict(self.sizes) ) From 7ed62790c167de9b875c5ab2b2058df9d38e4d8c Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Tue, 18 Jul 2023 11:18:17 +0200 Subject: [PATCH 65/69] doc: remove merge_coords in api reference --- doc/api-hidden.rst | 3 --- 1 file changed, 3 deletions(-) diff --git a/doc/api-hidden.rst b/doc/api-hidden.rst index f2d99cfe16f..42b572e998a 100644 --- a/doc/api-hidden.rst +++ b/doc/api-hidden.rst @@ -23,7 +23,6 @@ Coordinates.to_index Coordinates.update Coordinates.merge - Coordinates.merge_coords Coordinates.copy Coordinates.equals Coordinates.identical @@ -41,7 +40,6 @@ core.coordinates.DatasetCoordinates.to_index core.coordinates.DatasetCoordinates.update core.coordinates.DatasetCoordinates.merge - core.coordinates.DatasetCoordinates.merge_coords core.coordinates.DataArrayCoordinates.copy core.coordinates.DatasetCoordinates.equals core.coordinates.DatasetCoordinates.identical @@ -82,7 +80,6 @@ core.coordinates.DataArrayCoordinates.to_index core.coordinates.DataArrayCoordinates.update core.coordinates.DataArrayCoordinates.merge - core.coordinates.DataArrayCoordinates.merge_coords core.coordinates.DataArrayCoordinates.copy core.coordinates.DataArrayCoordinates.equals core.coordinates.DataArrayCoordinates.identical From 3d9435781b1be42d77be2227c0ba4cde52cb1c3f Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Tue, 18 Jul 2023 13:02:31 +0200 Subject: [PATCH 66/69] doc: improve docstrings and glossary --- doc/user-guide/terminology.rst | 69 ++++++++++++++++++++++------------ xarray/core/coordinates.py | 8 ++-- xarray/core/dataarray.py | 6 ++- xarray/core/dataset.py | 37 ++++++++++-------- 4 files changed, 76 insertions(+), 44 deletions(-) diff --git a/doc/user-guide/terminology.rst b/doc/user-guide/terminology.rst index 24e6ab69927..b313eff653f 100644 --- a/doc/user-guide/terminology.rst +++ b/doc/user-guide/terminology.rst @@ -54,23 +54,22 @@ complete examples, please consult the relevant documentation.* Coordinate An array that labels a dimension or set of dimensions of another ``DataArray``. In the usual one-dimensional case, the coordinate array's - values can loosely be thought of as tick labels along a dimension. There - are two types of coordinate arrays: *dimension coordinates* and - *non-dimension coordinates* (see below). A coordinate named ``x`` can be - retrieved from ``arr.coords[x]``. A ``DataArray`` can have more - coordinates than dimensions because a single dimension can be labeled by - multiple coordinate arrays. However, only one coordinate array can be a - assigned as a particular dimension's dimension coordinate array. As a + values can loosely be thought of as tick labels along a dimension. We + distinguish :term:`Dimension coordinate` vs. :term:`Non-dimension + coordinate` and :term:`Indexed coordinate` vs. :term:`Non-indexed + coordinate`. A coordinate named ``x`` can be retrieved from + ``arr.coords[x]``. A ``DataArray`` can have more coordinates than + dimensions because a single dimension can be labeled by multiple + coordinate arrays. However, only one coordinate array can be a assigned + as a particular dimension's dimension coordinate array. As a consequence, ``len(arr.dims) <= len(arr.coords)`` in general. Dimension coordinate A one-dimensional coordinate array assigned to ``arr`` with both a name - and dimension name in ``arr.dims``. Dimension coordinates are used for - label-based indexing and alignment, like the index found on a - :py:class:`pandas.DataFrame` or :py:class:`pandas.Series`. In fact, - dimension coordinates use :py:class:`pandas.Index` objects under the - hood for efficient computation. Dimension coordinates are marked by - ``*`` when printing a ``DataArray`` or ``Dataset``. + and dimension name in ``arr.dims``. Usually (but not always), a + dimension coordinate is also an :term:`Indexed coordinate` so that it can + be used for label-based indexing and alignment, like the index found on + a :py:class:`pandas.DataFrame` or :py:class:`pandas.Series`. Non-dimension coordinate A coordinate array assigned to ``arr`` with a name in ``arr.coords`` but @@ -79,20 +78,40 @@ complete examples, please consult the relevant documentation.* example, multidimensional coordinates are often used in geoscience datasets when :doc:`the data's physical coordinates (such as latitude and longitude) differ from their logical coordinates - <../examples/multidimensional-coords>`. However, non-dimension coordinates - are not indexed, and any operation on non-dimension coordinates that - leverages indexing will fail. Printing ``arr.coords`` will print all of - ``arr``'s coordinate names, with the corresponding dimension(s) in - parentheses. For example, ``coord_name (dim_name) 1 2 3 ...``. + <../examples/multidimensional-coords>`. Printing ``arr.coords`` will + print all of ``arr``'s coordinate names, with the corresponding + dimension(s) in parentheses. For example, ``coord_name (dim_name) 1 2 3 + ...``. + + Indexed coordinate + A coordinate which has an associated :term:`Index`. Generally this means + that the coordinate labels can be used for indexing (selection) and/or + alignment. An indexed coordinate may have one or more arbitrary + dimensions although in most cases it is also a :term:`Dimension + coordinate`. It may or may not be grouped with other indexed coordinates + depending on whether they share the same index. Indexed coordinates are + marked by ``*`` when printing a ``DataArray`` or ``Dataset``. + + Non-indexed coordinate + A coordinate which has no associated :term:`Index`. It may still + represent fixed labels along one or more dimensions but it cannot be + used for label-based indexing and alignment. Index - An *index* is a data structure optimized for efficient selecting and - slicing of an associated array. Xarray creates indexes for dimension - coordinates so that operations along dimensions are fast, while - non-dimension coordinates are not indexed. Under the hood, indexes are - implemented as :py:class:`pandas.Index` objects. The index associated - with dimension name ``x`` can be retrieved by ``arr.indexes[x]``. By - construction, ``len(arr.dims) == len(arr.indexes)`` + An *index* is a data structure optimized for efficient data selection + and alignment within a discrete or continuous space that is defined by + coordinate labels (unless it is a functional index). By default, Xarray + creates a :py:class:`~xarray.indexes.PandasIndex` object (i.e., a + :py:class:`pandas.Index` wrapper) for each :term:`Dimension coordinate`. + For more advanced use cases (e.g., staggered or irregular grids, + geospatial indexes), Xarray also accepts any instance of a specialized + :py:class:`~xarray.indexes.Index` subclass that is associated to one or + more arbitrary coordinates. The index associated with the coordinate + ``x`` can be retrieved by ``arr.xindexes[x]`` (or ``arr.indexes["x"]`` + if the index is convertible to a :py:class:`pandas.Index` object). If + two coordinates ``x`` and ``y`` share the same index, + ``arr.xindexes[x]`` and ``arr.xindexes[y]`` both return the same + :py:class:`~xarray.indexes.Index` object. name The names of dimensions, coordinates, DataArray objects and data diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index b583e67b460..e9363e8feb5 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -203,10 +203,12 @@ class Coordinates(AbstractCoordinates): Parameters ---------- coords: dict-like - Mapping of coordinate names to any objects that can be converted - into a :py:class:`Variable`. + Mapping where keys are coordinate names and values are objects that + can be converted into a :py:class:`~xarray.Variable` object + (see :py:func:`~xarray.as_variable`). indexes: dict-like - Mapping of coordinate names to :py:class:`~indexes.Index` objects. + Mapping of where keys are coordinate names and values are + :py:class:`~xarray.indexes.Index` objects. """ diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 2d3694cad75..ad165017a85 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -279,7 +279,7 @@ class DataArray( or pandas object, attempts are made to use this array's metadata to fill in other unspecified arguments. A view of the array's data is used instead of a copy if possible. - coords : sequence or dict of array_like, optional + coords : sequence or dict of array_like or :py:class:`~xarray.Coordinates`, optional Coordinates (tick labels) to use for indexing along each dimension. The following notations are accepted: @@ -299,6 +299,10 @@ class DataArray( - mapping {coord name: (dimension name, array-like)} - mapping {coord name: (tuple of dimension names, array-like)} + Alternatively, a :py:class:`~xarray.Coordinates` object may be used in + order to explicitly pass indexes (e.g., a multi-index or any custom + Xarray index) or to bypass the creation of a default index for any + :term:`Dimension coordinate` included in that object. dims : Hashable or sequence of Hashable, optional Name(s) of the data dimension(s). Must be either a Hashable (only for 1D data) or a sequence of Hashables with length equal diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index d1a6702b646..2ee9c1d17b7 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -516,10 +516,10 @@ class Dataset( names and values given by DataArray objects for each variable name. By default, pandas indexes are created for one dimensional variables with - name equal to their dimension so those variables can be used as coordinates - for label based indexing. When a :py:class:`~xarray.Coordinates` object is - passed to ``coords``, any existing index(es) built from those coordinates - will be added to the Dataset. + name equal to their dimension (i.e., :term:`Dimension coordinate`) so those + variables can be readily used as coordinates for label based indexing. When a + :py:class:`~xarray.Coordinates` object is passed to ``coords``, any existing + index(es) built from those coordinates will be added to the Dataset. To load data from a file or file-like object, use the `open_dataset` function. @@ -540,22 +540,21 @@ class Dataset( - mapping {var name: (dimension name, array-like)} - mapping {var name: (tuple of dimension names, array-like)} - mapping {dimension name: array-like} - (it will be automatically moved to coords, see below) + (if array-like is not a scalar it will be automatically moved to coords, + see below) Each dimension must have the same length in all variables in which it appears. - coords : dict-like, optional - Another mapping in similar form as the `data_vars` argument, - except the each item is saved on the dataset as a "coordinate". + coords : :py:class:`~xarray.Coordinates` or dict-like, optional + A :py:class:`~xarray.Coordinates` object or another mapping in + similar form as the `data_vars` argument, except that each item + is saved on the dataset as a "coordinate". These variables have an associated meaning: they describe constant/fixed/independent quantities, unlike the varying/measured/dependent quantities that belong in - `variables`. Coordinates values may be given by 1-dimensional - arrays or scalars, in which case `dims` do not need to be - supplied: by default 1D arrays will be assumed to give index - values along the dimension with the same name. + `variables`. - The following notations are accepted: + The following notations are accepted for arbitrary mappings: - mapping {coord name: DataArray} - mapping {coord name: Variable} @@ -565,8 +564,16 @@ class Dataset( (the dimension name is implicitly set to be the same as the coord name) - The last notation implies that the coord name is the same as - the dimension name. + The last notation implies either that the coordinate value is a scalar + or that it is a 1-dimensional array and the coord name is the same as + the dimension name (i.e., a :term:`Dimension coordinate`). In the latter + case, the 1-dimensional array will be assumed to give index values + along the dimension with the same name. + + Alternatively, a :py:class:`~xarray.Coordinates` object may be used in + order to explicitly pass indexes (e.g., a multi-index or any custom + Xarray index) or to bypass the creation of a default index for any + :term:`Dimension coordinate` included in that object. attrs : dict-like, optional Global attributes to save on this dataset. From 4a6e915d5e960b270a77ee9e8020d62f2675dad0 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Tue, 18 Jul 2023 13:20:32 +0200 Subject: [PATCH 67/69] use Self type annotation in Coordinate class --- xarray/core/coordinates.py | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index e9363e8feb5..e6727eb8d2d 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -23,7 +23,7 @@ create_default_index_implicit, ) from xarray.core.merge import merge_coordinates_without_align, merge_coords -from xarray.core.types import T_Coordinates, T_DataArray +from xarray.core.types import T_DataArray from xarray.core.utils import Frozen, ReprObject from xarray.core.variable import Variable, as_variable, calculate_dimensions @@ -31,6 +31,7 @@ from xarray.core.common import DataWithCoords from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset + from xarray.core.types import Self # Used as the key corresponding to a DataArray's variable when converting # arbitrary DataArray objects to datasets @@ -260,11 +261,11 @@ def __init__( @classmethod def _construct_direct( - cls: type[T_Coordinates], + cls, coords: dict[Any, Variable], indexes: dict[Any, Index], dims: dict[Any, int] | None = None, - ) -> T_Coordinates: + ) -> Self: from xarray.core.dataset import Dataset obj = object.__new__(cls) @@ -277,9 +278,7 @@ def _construct_direct( return obj @classmethod - def from_pandas_multiindex( - cls: type[T_Coordinates], midx: pd.MultiIndex, dim: str - ) -> T_Coordinates: + def from_pandas_multiindex(cls, midx: pd.MultiIndex, dim: str) -> Self: """Wrap a pandas multi-index as Xarray coordinates (dimension + levels). The returned coordinates can be directly assigned to a @@ -475,15 +474,21 @@ def update(self, other: Mapping[Any, Any]) -> None: self._update_coords(coords, indexes) def _overwrite_indexes( - self: T_Coordinates, + self, indexes: Mapping[Any, Index], variables: Mapping[Any, Variable] | None = None, - ) -> T_Coordinates: + ) -> Self: results = self.to_dataset()._overwrite_indexes(indexes, variables) - return cast(T_Coordinates, results.coords) + + # cast ``DatasetCoordinates`` as ``Coordinates`` + # TODO: not correct with + # ``results = align(dataset.coords, dataarray.coords, join='override')`` + # but lets assume that those are edge cases until we get rid of DatasetCoordinates + # and DataArrayCoordinates (i.e., Dataset and DataArray encapsulate Coordinates)? + return cast(Self, results.coords) def _reindex_callback( - self: T_Coordinates, + self, aligner: Aligner, dim_pos_indexers: dict[Hashable, Any], variables: dict[Hashable, Variable], @@ -491,8 +496,8 @@ def _reindex_callback( fill_value: Any, exclude_dims: frozenset[Hashable], exclude_vars: frozenset[Hashable], - ) -> T_Coordinates: - """Callback called from ``Aligner`` to create a new reindexed Coordinates.""" + ) -> Self: + """Callback called from ``Aligner`` to create a new reindexed Coordinate.""" aligned = self.to_dataset()._reindex_callback( aligner, dim_pos_indexers, @@ -502,7 +507,12 @@ def _reindex_callback( exclude_dims, exclude_vars, ) - return cast(T_Coordinates, aligned.coords) + + # cast ``DatasetCoordinates`` as ``Coordinates`` + # TODO: not correct with ``results = align(dataset.coords, dataarray.coords)`` + # but lets assume that those are edge cases until we get rid of DatasetCoordinates + # and DataArrayCoordinates (i.e., Dataset and DataArray encapsulate Coordinates)? + return cast(Self, aligned.coords) def _ipython_key_completions_(self): """Provide method for the key-autocompletions in IPython.""" From 31f66b4fee04d162b927239e98bbd097954b601e Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Tue, 18 Jul 2023 13:26:56 +0200 Subject: [PATCH 68/69] better comment --- xarray/core/coordinates.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index e6727eb8d2d..2b72ef89739 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -480,11 +480,8 @@ def _overwrite_indexes( ) -> Self: results = self.to_dataset()._overwrite_indexes(indexes, variables) - # cast ``DatasetCoordinates`` as ``Coordinates`` - # TODO: not correct with - # ``results = align(dataset.coords, dataarray.coords, join='override')`` - # but lets assume that those are edge cases until we get rid of DatasetCoordinates - # and DataArrayCoordinates (i.e., Dataset and DataArray encapsulate Coordinates)? + # TODO: remove cast once we get rid of DatasetCoordinates + # and DataArrayCoordinates (i.e., Dataset and DataArray encapsulate Coordinates) return cast(Self, results.coords) def _reindex_callback( @@ -508,10 +505,8 @@ def _reindex_callback( exclude_vars, ) - # cast ``DatasetCoordinates`` as ``Coordinates`` - # TODO: not correct with ``results = align(dataset.coords, dataarray.coords)`` - # but lets assume that those are edge cases until we get rid of DatasetCoordinates - # and DataArrayCoordinates (i.e., Dataset and DataArray encapsulate Coordinates)? + # TODO: remove cast once we get rid of DatasetCoordinates + # and DataArrayCoordinates (i.e., Dataset and DataArray encapsulate Coordinates) return cast(Self, aligned.coords) def _ipython_key_completions_(self): From 4cb70d08c418727492cbfc84c123fdb80264b4cc Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Tue, 18 Jul 2023 13:39:15 +0200 Subject: [PATCH 69/69] fix Self undefined error with python < 3.11 Pyright displays an info message "Self is not valid in this context" but most important this should avoid runtime errors with python < 3.11. --- xarray/core/coordinates.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index 2b72ef89739..9ae1024b374 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -23,7 +23,7 @@ create_default_index_implicit, ) from xarray.core.merge import merge_coordinates_without_align, merge_coords -from xarray.core.types import T_DataArray +from xarray.core.types import Self, T_DataArray from xarray.core.utils import Frozen, ReprObject from xarray.core.variable import Variable, as_variable, calculate_dimensions @@ -31,7 +31,6 @@ from xarray.core.common import DataWithCoords from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset - from xarray.core.types import Self # Used as the key corresponding to a DataArray's variable when converting # arbitrary DataArray objects to datasets