diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 4b451fcbc18..78e49f711e1 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -86,6 +86,8 @@ Bug fixes By `Jens Svensmark `_ - Fix incorrect legend labels for :py:meth:`Dataset.plot.scatter` (:issue:`4126`). By `Peter Hausamann `_. +- Preserve dimension and coordinate order during :py:func:`xarray.concat` (:issue:`2811`, :issue:`4072`, :pull:`4419`). + By `Kai Mühlbauer `_. - Avoid relying on :py:class:`set` objects for the ordering of the coordinates (:pull:`4409`) By `Justus Magin `_. - Fix indexing with datetime64 scalars with pandas 1.1 (:issue:`4283`). diff --git a/xarray/core/concat.py b/xarray/core/concat.py index 0955a95fa8b..3a39369e793 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -463,6 +463,9 @@ def ensure_common_dims(vars): combined = concat_vars(vars, dim, positions) assert isinstance(combined, Variable) result_vars[k] = combined + elif k in result_vars: + # preserves original variable order + result_vars[k] = result_vars.pop(k) result = Dataset(result_vars, attrs=result_attrs) absent_coord_names = coord_names - set(result.variables) diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index 07ae83d3862..0d5507b6879 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -558,3 +558,36 @@ def test_concat_merge_single_non_dim_coord(): for coords in ["different", "all"]: with raises_regex(ValueError, "'y' not present in all datasets"): concat([da1, da2, da3], dim="x") + + +def test_concat_preserve_coordinate_order(): + x = np.arange(0, 5) + y = np.arange(0, 10) + time = np.arange(0, 4) + data = np.zeros((4, 10, 5), dtype=bool) + + ds1 = Dataset( + {"data": (["time", "y", "x"], data[0:2])}, + coords={"time": time[0:2], "y": y, "x": x}, + ) + ds2 = Dataset( + {"data": (["time", "y", "x"], data[2:4])}, + coords={"time": time[2:4], "y": y, "x": x}, + ) + + expected = Dataset( + {"data": (["time", "y", "x"], data)}, + coords={"time": time, "y": y, "x": x}, + ) + + actual = concat([ds1, ds2], dim="time") + + # check dimension order + for act, exp in zip(actual.dims, expected.dims): + assert act == exp + assert actual.dims[act] == expected.dims[exp] + + # check coordinate order + for act, exp in zip(actual.coords, expected.coords): + assert act == exp + assert_identical(actual.coords[act], expected.coords[exp])