From 24e4041919db0e2ff704d0b557b984ecf2d61179 Mon Sep 17 00:00:00 2001 From: Kai Muehlbauer Date: Mon, 14 Sep 2020 09:10:55 +0200 Subject: [PATCH 1/8] preserve original dimension, coordinate and variable order in ``concat`` --- xarray/core/concat.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/xarray/core/concat.py b/xarray/core/concat.py index 54bc686a322..025a53f7bdd 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -459,6 +459,9 @@ def ensure_common_dims(vars): combined = concat_vars(vars, dim, positions) assert isinstance(combined, Variable) result_vars[k] = combined + else: + # preserves original variable order + result_vars[k] = result_vars.pop(k) result = Dataset(result_vars, attrs=result_attrs) absent_coord_names = coord_names - set(result.variables) From 264fdb29e7de10fa93d217faf7b7380fa75b2626 Mon Sep 17 00:00:00 2001 From: Kai Muehlbauer Date: Mon, 14 Sep 2020 10:11:55 +0200 Subject: [PATCH 2/8] only re-insert into result_vars if already in --- xarray/core/concat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/concat.py b/xarray/core/concat.py index 025a53f7bdd..daf26b21ec9 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -459,7 +459,7 @@ def ensure_common_dims(vars): combined = concat_vars(vars, dim, positions) assert isinstance(combined, Variable) result_vars[k] = combined - else: + elif k in result_vars: # preserves original variable order result_vars[k] = result_vars.pop(k) From a3c5db6b42aa3894cdc4e9d74c66de230ce588cb Mon Sep 17 00:00:00 2001 From: Kai Muehlbauer Date: Mon, 14 Sep 2020 13:56:30 +0200 Subject: [PATCH 3/8] add test to check if dimension and coordinate order is preserved in concat --- xarray/tests/test_concat.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index 07ae83d3862..f09d9dc7efa 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -558,3 +558,36 @@ def test_concat_merge_single_non_dim_coord(): for coords in ["different", "all"]: with raises_regex(ValueError, "'y' not present in all datasets"): concat([da1, da2, da3], dim="x") + + +def test_concat_preserve_coordinate_order(): + x = np.arange(0, 5) + y = np.arange(0, 10) + time = [0, 1] + data = np.zeros((2, 10, 5), dtype=bool) + + ds1 = Dataset({"data": (['time', 'y', 'x'], [data[0]])}, + coords={"time": (['time'], [time[0]]), + "y": (['y'], y), + "x": (['x'], x)}) + ds2 = Dataset({"data": (['time', 'y', 'x'], [data[1]])}, + coords={"time": (['time'], [time[1]]), + "y": (['y'], y), + "x": (['x'], x)}) + + expected = Dataset({"data": (['time', 'y', 'x'], data)}, + coords={"time": (['time'], time), + "y": (['y'], y), + "x": (['x'], x)}) + + actual = concat([ds1, ds2], dim='time') + + # check dimension order + for act, exp in zip(actual.dims, expected.dims): + assert act == exp + assert actual.dims[act] == expected.dims[exp] + + # check coordinate order + for act, exp in zip(actual.coords, expected.coords): + assert act == exp + assert_identical(actual.coords[act], expected.coords[exp]) From 6434a649595ce62723c2a5c75779a94980d1e646 Mon Sep 17 00:00:00 2001 From: Kai Muehlbauer Date: Mon, 14 Sep 2020 14:27:04 +0200 Subject: [PATCH 4/8] black style --- xarray/tests/test_concat.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index f09d9dc7efa..038ff2e08c6 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -566,21 +566,21 @@ def test_concat_preserve_coordinate_order(): time = [0, 1] data = np.zeros((2, 10, 5), dtype=bool) - ds1 = Dataset({"data": (['time', 'y', 'x'], [data[0]])}, - coords={"time": (['time'], [time[0]]), - "y": (['y'], y), - "x": (['x'], x)}) - ds2 = Dataset({"data": (['time', 'y', 'x'], [data[1]])}, - coords={"time": (['time'], [time[1]]), - "y": (['y'], y), - "x": (['x'], x)}) - - expected = Dataset({"data": (['time', 'y', 'x'], data)}, - coords={"time": (['time'], time), - "y": (['y'], y), - "x": (['x'], x)}) - - actual = concat([ds1, ds2], dim='time') + ds1 = Dataset( + {"data": (["time", "y", "x"], [data[0]])}, + coords={"time": (["time"], [time[0]]), "y": (["y"], y), "x": (["x"], x)}, + ) + ds2 = Dataset( + {"data": (["time", "y", "x"], [data[1]])}, + coords={"time": (["time"], [time[1]]), "y": (["y"], y), "x": (["x"], x)}, + ) + + expected = Dataset( + {"data": (["time", "y", "x"], data)}, + coords={"time": (["time"], time), "y": (["y"], y), "x": (["x"], x)}, + ) + + actual = concat([ds1, ds2], dim="time") # check dimension order for act, exp in zip(actual.dims, expected.dims): From b3770f42b45c64b452c669a2e06ad2ecae11e383 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 14 Sep 2020 15:31:13 +0000 Subject: [PATCH 5/8] Update xarray/tests/test_concat.py Co-authored-by: keewis --- xarray/tests/test_concat.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index 038ff2e08c6..e1c75bd27ea 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -567,17 +567,17 @@ def test_concat_preserve_coordinate_order(): data = np.zeros((2, 10, 5), dtype=bool) ds1 = Dataset( - {"data": (["time", "y", "x"], [data[0]])}, - coords={"time": (["time"], [time[0]]), "y": (["y"], y), "x": (["x"], x)}, + {"data": (["time", "y", "x"], data[0])}, + coords={"time": time[0], "y": y, "x": x}, ) ds2 = Dataset( - {"data": (["time", "y", "x"], [data[1]])}, - coords={"time": (["time"], [time[1]]), "y": (["y"], y), "x": (["x"], x)}, + {"data": (["time", "y", "x"], data[1])}, + coords={"time": time[1], "y": y, "x": x}, ) expected = Dataset( {"data": (["time", "y", "x"], data)}, - coords={"time": (["time"], time), "y": (["y"], y), "x": (["x"], x)}, + coords={"time": time, "y": y, "x": x}, ) actual = concat([ds1, ds2], dim="time") From 1b79b985619ed89c1caa9d875076e5505c6d8661 Mon Sep 17 00:00:00 2001 From: keewis Date: Tue, 15 Sep 2020 00:51:03 +0200 Subject: [PATCH 6/8] Update xarray/tests/test_concat.py --- xarray/tests/test_concat.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index e1c75bd27ea..c3f96a3e17e 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -567,11 +567,11 @@ def test_concat_preserve_coordinate_order(): data = np.zeros((2, 10, 5), dtype=bool) ds1 = Dataset( - {"data": (["time", "y", "x"], data[0])}, + {"data": (["time", "y", "x"], [data[0]])}, coords={"time": time[0], "y": y, "x": x}, ) ds2 = Dataset( - {"data": (["time", "y", "x"], data[1])}, + {"data": (["time", "y", "x"], [data[1]])}, coords={"time": time[1], "y": y, "x": x}, ) From 73d1cf4e1065757529528c28db82b9a1081336b6 Mon Sep 17 00:00:00 2001 From: Kai Muehlbauer Date: Tue, 15 Sep 2020 07:34:44 +0200 Subject: [PATCH 7/8] add whats-new.rst entry --- doc/whats-new.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 74619529144..d82566083d0 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -86,6 +86,8 @@ Bug fixes By `Jens Svensmark `_ - Fix incorrect legend labels for :py:meth:`Dataset.plot.scatter` (:issue:`4126`). By `Peter Hausamann `_. +- Preserve dimension and coordinate order during :py:func:`xarray.concat` (:issue:`2811`, :issue:`4072`, :pull:`4419`). + By `Kai Mühlbauer `_. Documentation ~~~~~~~~~~~~~ From 7f3fc9cd28186cc00731cf194d73d3362e336eb4 Mon Sep 17 00:00:00 2001 From: Kai Muehlbauer Date: Tue, 15 Sep 2020 07:56:18 +0200 Subject: [PATCH 8/8] fix scalar variable problem in test_concat --- xarray/tests/test_concat.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index c3f96a3e17e..0d5507b6879 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -563,16 +563,16 @@ def test_concat_merge_single_non_dim_coord(): def test_concat_preserve_coordinate_order(): x = np.arange(0, 5) y = np.arange(0, 10) - time = [0, 1] - data = np.zeros((2, 10, 5), dtype=bool) + time = np.arange(0, 4) + data = np.zeros((4, 10, 5), dtype=bool) ds1 = Dataset( - {"data": (["time", "y", "x"], [data[0]])}, - coords={"time": time[0], "y": y, "x": x}, + {"data": (["time", "y", "x"], data[0:2])}, + coords={"time": time[0:2], "y": y, "x": x}, ) ds2 = Dataset( - {"data": (["time", "y", "x"], [data[1]])}, - coords={"time": time[1], "y": y, "x": x}, + {"data": (["time", "y", "x"], data[2:4])}, + coords={"time": time[2:4], "y": y, "x": x}, ) expected = Dataset(