Skip to content

Commit 099c090

Browse files
jusegTomNicholas
authored andcommitted
Add option to choose mfdataset attributes source. (#3498)
* Add 'master_file' kwarg in open_mfdataset, which can be a str or Path to a particular data file.
1 parent ff75081 commit 099c090

File tree

3 files changed

+55
-3
lines changed

3 files changed

+55
-3
lines changed

doc/whats-new.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,9 @@ New Features
3737
- Added the ``count`` reduction method to both :py:class:`~core.rolling.DatasetCoarsen`
3838
and :py:class:`~core.rolling.DataArrayCoarsen` objects. (:pull:`3500`)
3939
By `Deepak Cherian <https://github.com/dcherian>`_
40+
- Add `attrs_file` option in :py:func:`~xarray.open_mfdataset` to choose the
41+
source file for global attributes in a multi-file dataset (:issue:`2382`,
42+
:pull:`3498`) by `Julien Seguinot <https://github.com/juseg>_`.
4043
- :py:meth:`Dataset.swap_dims` and :py:meth:`DataArray.swap_dims`
4144
now allow swapping to dimension names that don't exist yet. (:pull:`3636`)
4245
By `Justus Magin <https://github.com/keewis>`_.

xarray/backends/api.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -718,6 +718,7 @@ def open_mfdataset(
718718
autoclose=None,
719719
parallel=False,
720720
join="outer",
721+
attrs_file=None,
721722
**kwargs,
722723
):
723724
"""Open multiple files as a single dataset.
@@ -729,8 +730,8 @@ def open_mfdataset(
729730
``combine_by_coords`` and ``combine_nested``. By default the old (now deprecated)
730731
``auto_combine`` will be used, please specify either ``combine='by_coords'`` or
731732
``combine='nested'`` in future. Requires dask to be installed. See documentation for
732-
details on dask [1]_. Attributes from the first dataset file are used for the
733-
combined dataset.
733+
details on dask [1]_. Global attributes from the ``attrs_file`` are used
734+
for the combined dataset.
734735
735736
Parameters
736737
----------
@@ -827,6 +828,10 @@ def open_mfdataset(
827828
- 'override': if indexes are of same size, rewrite indexes to be
828829
those of the first object with that dimension. Indexes for the same
829830
dimension must have the same size in all objects.
831+
attrs_file : str or pathlib.Path, optional
832+
Path of the file used to read global attributes from.
833+
By default global attributes are read from the first file provided,
834+
with wildcard matches sorted by filename.
830835
**kwargs : optional
831836
Additional arguments passed on to :py:func:`xarray.open_dataset`.
832837
@@ -961,7 +966,15 @@ def open_mfdataset(
961966
raise
962967

963968
combined._file_obj = _MultiFileCloser(file_objs)
964-
combined.attrs = datasets[0].attrs
969+
970+
# read global attributes from the attrs_file or from the first dataset
971+
if attrs_file is not None:
972+
if isinstance(attrs_file, Path):
973+
attrs_file = str(attrs_file)
974+
combined.attrs = datasets[paths.index(attrs_file)].attrs
975+
else:
976+
combined.attrs = datasets[0].attrs
977+
965978
return combined
966979

967980

xarray/tests/test_backends.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2832,6 +2832,42 @@ def test_attrs_mfdataset(self):
28322832
with raises_regex(AttributeError, "no attribute"):
28332833
actual.test2
28342834

2835+
def test_open_mfdataset_attrs_file(self):
2836+
original = Dataset({"foo": ("x", np.random.randn(10))})
2837+
with create_tmp_files(2) as (tmp1, tmp2):
2838+
ds1 = original.isel(x=slice(5))
2839+
ds2 = original.isel(x=slice(5, 10))
2840+
ds1.attrs["test1"] = "foo"
2841+
ds2.attrs["test2"] = "bar"
2842+
ds1.to_netcdf(tmp1)
2843+
ds2.to_netcdf(tmp2)
2844+
with open_mfdataset(
2845+
[tmp1, tmp2], concat_dim="x", combine="nested", attrs_file=tmp2
2846+
) as actual:
2847+
# attributes are inherited from the master file
2848+
assert actual.attrs["test2"] == ds2.attrs["test2"]
2849+
# attributes from ds1 are not retained, e.g.,
2850+
assert "test1" not in actual.attrs
2851+
2852+
def test_open_mfdataset_attrs_file_path(self):
2853+
original = Dataset({"foo": ("x", np.random.randn(10))})
2854+
with create_tmp_files(2) as (tmp1, tmp2):
2855+
tmp1 = Path(tmp1)
2856+
tmp2 = Path(tmp2)
2857+
ds1 = original.isel(x=slice(5))
2858+
ds2 = original.isel(x=slice(5, 10))
2859+
ds1.attrs["test1"] = "foo"
2860+
ds2.attrs["test2"] = "bar"
2861+
ds1.to_netcdf(tmp1)
2862+
ds2.to_netcdf(tmp2)
2863+
with open_mfdataset(
2864+
[tmp1, tmp2], concat_dim="x", combine="nested", attrs_file=tmp2
2865+
) as actual:
2866+
# attributes are inherited from the master file
2867+
assert actual.attrs["test2"] == ds2.attrs["test2"]
2868+
# attributes from ds1 are not retained, e.g.,
2869+
assert "test1" not in actual.attrs
2870+
28352871
def test_open_mfdataset_auto_combine(self):
28362872
original = Dataset({"foo": ("x", np.random.randn(10)), "x": np.arange(10)})
28372873
with create_tmp_file() as tmp1:

0 commit comments

Comments
 (0)