diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 573247937b7..375931e1f9c 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -44,6 +44,18 @@ def _infer_tile_ids_from_nested_list(entry, current_pos): yield current_pos, entry +def _ensure_same_types(series, dim): + + if series.dtype == object: + types = set(series.map(type)) + if len(types) > 1: + types = ", ".join(t.__name__ for t in types) + raise TypeError( + f"Cannot combine along dimension '{dim}' with mixed types." + f" Found: {types}." + ) + + def _infer_concat_order_from_coords(datasets): concat_dims = [] @@ -88,11 +100,15 @@ def _infer_concat_order_from_coords(datasets): raise ValueError("Cannot handle size zero dimensions") first_items = pd.Index([index[0] for index in indexes]) + series = first_items.to_series() + + # ensure series does not contain mixed types, e.g. cftime calendars + _ensure_same_types(series, dim) + # Sort datasets along dim # We want rank but with identical elements given identical # position indices - they should be concatenated along another # dimension, not along this one - series = first_items.to_series() rank = series.rank( method="dense", ascending=ascending, numeric_only=False ) diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index 522b98cf864..7664e5e56db 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -1,4 +1,5 @@ from datetime import datetime +from distutils.version import LooseVersion from itertools import product import numpy as np @@ -865,5 +866,22 @@ def test_combine_by_coords_raises_for_differing_calendars(): da_1 = DataArray([0], dims=["time"], coords=[time_1], name="a").to_dataset() da_2 = DataArray([1], dims=["time"], coords=[time_2], name="a").to_dataset() - with raises_regex(TypeError, r"cannot compare .* \(different calendars\)"): + if LooseVersion(cftime.__version__) >= LooseVersion("1.5"): + error_msg = "Cannot combine along dimension 'time' with mixed types." + else: + error_msg = r"cannot compare .* \(different calendars\)" + + with raises_regex(TypeError, error_msg): + combine_by_coords([da_1, da_2]) + + +def test_combine_by_coords_raises_for_differing_types(): + + # str and byte cannot be compared + da_1 = DataArray([0], dims=["time"], coords=[["a"]], name="a").to_dataset() + da_2 = DataArray([1], dims=["time"], coords=[[b"b"]], name="a").to_dataset() + + with raises_regex( + TypeError, "Cannot combine along dimension 'time' with mixed types." + ): combine_by_coords([da_1, da_2])