diff --git a/doc/api.rst b/doc/api.rst index 27f5d05d41c..74c0831f26b 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -1644,6 +1644,7 @@ Exceptions .. autosummary:: :toctree: generated/ + AlignmentError MergeError SerializationWarning diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 20bbdc7ec69..4427e10d994 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -31,7 +31,10 @@ New Features `Miguel Jimenez-Urias `_. - Improved support pandas Extension Arrays. (:issue:`9661`, :pull:`9671`) By `Ilan Gold `_. - +- Improved checks and errors raised when trying to align objects with conflicting indexes. + It is now possible to align objects each with multiple indexes sharing common dimension(s). + (:issue:`7695`, :pull:`10251`) + By `Benoit Bovy `_. Breaking changes ~~~~~~~~~~~~~~~~ diff --git a/xarray/__init__.py b/xarray/__init__.py index 07e6fe5b207..b08729f7478 100644 --- a/xarray/__init__.py +++ b/xarray/__init__.py @@ -50,7 +50,7 @@ ) from xarray.core.variable import IndexVariable, Variable, as_variable from xarray.namedarray.core import NamedArray -from xarray.structure.alignment import align, broadcast +from xarray.structure.alignment import AlignmentError, align, broadcast from xarray.structure.chunks import unify_chunks from xarray.structure.combine import combine_by_coords, combine_nested from xarray.structure.concat import concat @@ -128,6 +128,7 @@ "NamedArray", "Variable", # Exceptions + "AlignmentError", "InvalidTreeError", "MergeError", "NotFoundInTreeError", diff --git a/xarray/structure/alignment.py b/xarray/structure/alignment.py index a3c26a0d023..ea90519143c 100644 --- a/xarray/structure/alignment.py +++ b/xarray/structure/alignment.py @@ -35,6 +35,10 @@ ) +class AlignmentError(ValueError): + """Error class for alignment failures due to incompatible arguments.""" + + def reindex_variables( variables: Mapping[Any, Variable], dim_pos_indexers: Mapping[Any, Any], @@ -196,7 +200,7 @@ def _normalize_indexes( for k, idx in indexes.items(): if not isinstance(idx, Index): if getattr(idx, "dims", (k,)) != (k,): - raise ValueError( + raise AlignmentError( f"Indexer has dimensions {idx.dims} that are different " f"from that to be indexed along '{k}'" ) @@ -227,7 +231,7 @@ def _normalize_indexes( elif exclude_dims: excl_dims_str = ", ".join(str(d) for d in exclude_dims) incl_dims_str = ", ".join(str(d) for d in all_dims - exclude_dims) - raise ValueError( + raise AlignmentError( f"cannot exclude dimension(s) {excl_dims_str} from alignment because " "these are used by an index together with non-excluded dimensions " f"{incl_dims_str}" @@ -268,7 +272,7 @@ def find_matching_indexes(self) -> None: for dim_sizes in all_indexes_dim_sizes.values(): for dim, sizes in dim_sizes.items(): if len(sizes) > 1: - raise ValueError( + raise AlignmentError( "cannot align objects with join='override' with matching indexes " f"along dimension {dim!r} that don't have the same size" ) @@ -283,47 +287,6 @@ def find_matching_unindexed_dims(self) -> None: self.unindexed_dim_sizes = unindexed_dim_sizes - def assert_no_index_conflict(self) -> None: - """Check for uniqueness of both coordinate and dimension names across all sets - of matching indexes. - - We need to make sure that all indexes used for re-indexing or alignment - are fully compatible and do not conflict each other. - - Note: perhaps we could choose less restrictive constraints and instead - check for conflicts among the dimension (position) indexers returned by - `Index.reindex_like()` for each matching pair of object index / aligned - index? - (ref: https://github.com/pydata/xarray/issues/1603#issuecomment-442965602) - - """ - matching_keys = set(self.all_indexes) | set(self.indexes) - - coord_count: dict[Hashable, int] = defaultdict(int) - dim_count: dict[Hashable, int] = defaultdict(int) - for coord_names_dims, _ in matching_keys: - dims_set: set[Hashable] = set() - for name, dims in coord_names_dims: - coord_count[name] += 1 - dims_set.update(dims) - for dim in dims_set: - dim_count[dim] += 1 - - for count, msg in [(coord_count, "coordinates"), (dim_count, "dimensions")]: - dup = {k: v for k, v in count.items() if v > 1} - if dup: - items_msg = ", ".join( - f"{k!r} ({v} conflicting indexes)" for k, v in dup.items() - ) - raise ValueError( - "cannot re-index or align objects with conflicting indexes found for " - f"the following {msg}: {items_msg}\n" - "Conflicting indexes may occur when\n" - "- they relate to different sets of coordinate and/or dimension names\n" - "- they don't have the same type\n" - "- they may be used to reindex data along common dimensions" - ) - def _need_reindex(self, dim, cmp_indexes) -> bool: """Whether or not we need to reindex variables for a set of matching indexes. @@ -383,11 +346,33 @@ def _get_index_joiner(self, index_cls) -> Callable: def align_indexes(self) -> None: """Compute all aligned indexes and their corresponding coordinate variables.""" - aligned_indexes = {} - aligned_index_vars = {} - reindex = {} - new_indexes = {} - new_index_vars = {} + aligned_indexes: dict[MatchingIndexKey, Index] = {} + aligned_index_vars: dict[MatchingIndexKey, dict[Hashable, Variable]] = {} + reindex: dict[MatchingIndexKey, bool] = {} + new_indexes: dict[Hashable, Index] = {} + new_index_vars: dict[Hashable, Variable] = {} + + def update_dicts( + key: MatchingIndexKey, + idx: Index, + idx_vars: dict[Hashable, Variable], + need_reindex: bool, + ): + reindex[key] = need_reindex + aligned_indexes[key] = idx + aligned_index_vars[key] = idx_vars + + for name, var in idx_vars.items(): + if name in new_indexes: + other_idx = new_indexes[name] + other_var = new_index_vars[name] + raise AlignmentError( + f"cannot align objects on coordinate {name!r} because of conflicting indexes\n" + f"first index: {idx!r}\nsecond index: {other_idx!r}\n" + f"first variable: {var!r}\nsecond variable: {other_var!r}\n" + ) + new_indexes[name] = idx + new_index_vars[name] = var for key, matching_indexes in self.all_indexes.items(): matching_index_vars = self.all_index_vars[key] @@ -419,7 +404,7 @@ def align_indexes(self) -> None: need_reindex = False if need_reindex: if self.join == "exact": - raise ValueError( + raise AlignmentError( "cannot align objects with join='exact' where " "index/labels/sizes are not equal along " "these coordinates (dimensions): " @@ -437,25 +422,14 @@ def align_indexes(self) -> None: joined_index = matching_indexes[0] joined_index_vars = matching_index_vars[0] - reindex[key] = need_reindex - aligned_indexes[key] = joined_index - aligned_index_vars[key] = joined_index_vars - - for name, var in joined_index_vars.items(): - new_indexes[name] = joined_index - new_index_vars[name] = var + update_dicts(key, joined_index, joined_index_vars, need_reindex) # Explicitly provided indexes that are not found in objects to align # may relate to unindexed dimensions so we add them too for key, idx in self.indexes.items(): if key not in aligned_indexes: index_vars = self.index_vars[key] - reindex[key] = False - aligned_indexes[key] = idx - aligned_index_vars[key] = index_vars - for name, var in index_vars.items(): - new_indexes[name] = idx - new_index_vars[name] = var + update_dicts(key, idx, index_vars, False) self.aligned_indexes = aligned_indexes self.aligned_index_vars = aligned_index_vars @@ -474,7 +448,7 @@ def assert_unindexed_dim_sizes_equal(self) -> None: else: add_err_msg = "" if len(sizes) > 1: - raise ValueError( + raise AlignmentError( f"cannot reindex or align along dimension {dim!r} " f"because of conflicting dimension sizes: {sizes!r}" + add_err_msg ) @@ -502,14 +476,25 @@ def _get_dim_pos_indexers( self, matching_indexes: dict[MatchingIndexKey, Index], ) -> dict[Hashable, Any]: - dim_pos_indexers = {} + dim_pos_indexers: dict[Hashable, Any] = {} + dim_index: dict[Hashable, Index] = {} for key, aligned_idx in self.aligned_indexes.items(): obj_idx = matching_indexes.get(key) if obj_idx is not None: if self.reindex[key]: indexers = obj_idx.reindex_like(aligned_idx, **self.reindex_kwargs) - dim_pos_indexers.update(indexers) + for dim, idxer in indexers.items(): + if dim in dim_pos_indexers and not np.array_equal( + idxer, dim_pos_indexers[dim] + ): + raise AlignmentError( + f"cannot reindex or align along dimension {dim!r} because " + "of conflicting re-indexers returned by multiple indexes\n" + f"first index: {obj_idx!r}\nsecond index: {dim_index[dim]!r}\n" + ) + dim_pos_indexers[dim] = idxer + dim_index[dim] = obj_idx return dim_pos_indexers @@ -571,7 +556,6 @@ def align(self) -> None: self.find_matching_indexes() self.find_matching_unindexed_dims() - self.assert_no_index_conflict() self.align_indexes() self.assert_unindexed_dim_sizes_equal() @@ -735,7 +719,7 @@ def align( Raises ------ - ValueError + AlignmentError If any dimensions without labels on the arguments have different sizes, or a different size than the size of the aligned dimension labels. @@ -853,7 +837,7 @@ def align( >>> a, b = xr.align(x, y, join="exact") Traceback (most recent call last): ... - ValueError: cannot align objects with join='exact' ... + xarray.structure.alignment.AlignmentError: cannot align objects with join='exact' ... >>> a, b = xr.align(x, y, join="override") >>> a diff --git a/xarray/structure/merge.py b/xarray/structure/merge.py index 8f9835aaaa1..7d773ce0b4b 100644 --- a/xarray/structure/merge.py +++ b/xarray/structure/merge.py @@ -942,7 +942,7 @@ def merge( >>> xr.merge([x, y, z], join="exact") Traceback (most recent call last): ... - ValueError: cannot align objects with join='exact' where ... + xarray.structure.alignment.AlignmentError: cannot align objects with join='exact' where ... Raises ------ diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index ed8c4178ed0..f48e0bb6d00 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -23,6 +23,7 @@ import xarray as xr from xarray import ( + AlignmentError, DataArray, Dataset, IndexVariable, @@ -2543,6 +2544,28 @@ def test_align_indexes(self) -> None: assert_identical(expected_x2, x2) + def test_align_multiple_indexes_common_dim(self) -> None: + a = Dataset(coords={"x": [1, 2], "xb": ("x", [3, 4])}).set_xindex("xb") + b = Dataset(coords={"x": [1], "xb": ("x", [3])}).set_xindex("xb") + + (a2, b2) = align(a, b, join="inner") + assert_identical(a2, b, check_default_indexes=False) + assert_identical(b2, b, check_default_indexes=False) + + c = Dataset(coords={"x": [1, 3], "xb": ("x", [2, 4])}).set_xindex("xb") + + with pytest.raises(AlignmentError, match=".*conflicting re-indexers"): + align(a, c) + + def test_align_conflicting_indexes(self) -> None: + class CustomIndex(PandasIndex): ... + + a = Dataset(coords={"xb": ("x", [3, 4])}).set_xindex("xb") + b = Dataset(coords={"xb": ("x", [3])}).set_xindex("xb", CustomIndex) + + with pytest.raises(AlignmentError, match="cannot align.*conflicting indexes"): + align(a, b) + def test_align_non_unique(self) -> None: x = Dataset({"foo": ("x", [3, 4, 5]), "x": [0, 0, 1]}) x1, x2 = align(x, x)