Skip to content

Commit cf99361

Browse files
committed
Implement load_dataset() and load_dataarray()
BUG: Fixes #2887 by adding @shoyer solution for load_dataset and load_dataarray, wrappers around open_dataset and open_dataarray which open, load, and close the file and return the Dataset/DataArray TST: Add tests for sequentially opening and writing to files using new functions DOC: Add to whats-new.rst. Also a tiny change to the open_dataset docstring Update docstrings and check for cache in kwargs Undeprecate load_dataset Add to api.rst, fix whats-new.rst typo, raise error instead of warning
1 parent 9492c2f commit cf99361

File tree

6 files changed

+90
-17
lines changed

6 files changed

+90
-17
lines changed

doc/api.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -460,6 +460,7 @@ Dataset methods
460460
:toctree: generated/
461461

462462
open_dataset
463+
load_dataset
463464
open_mfdataset
464465
open_rasterio
465466
open_zarr
@@ -487,6 +488,7 @@ DataArray methods
487488
:toctree: generated/
488489

489490
open_dataarray
491+
load_dataarray
490492
DataArray.to_dataset
491493
DataArray.to_netcdf
492494
DataArray.to_pandas

doc/whats-new.rst

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,12 @@ Enhancements
2929
By `James McCreight <https://github.com/jmccreight>`_.
3030
- Clean up Python 2 compatibility in code (:issue:`2950`)
3131
By `Guido Imperiale <https://github.com/crusaderky>`_.
32+
- Implement ``load_dataset()`` and ``load_dataarray()`` as alternatives to
33+
``open_dataset()`` and ``open_dataarray()`` to open, load into memory,
34+
and close files, returning the Dataset or DataArray. These functions are
35+
helpful for avoiding file-lock errors when trying to write to files opened
36+
using ``open_dataset()`` or ``open_dataarray()``. (:issue:`2887`)
37+
By `Dan Nowacki <https://github.com/dnowacki-usgs>`_.
3238

3339
Bug fixes
3440
~~~~~~~~~
@@ -153,9 +159,9 @@ Other enhancements
153159
By `Keisuke Fujii <https://github.com/fujiisoup>`_.
154160
- Added :py:meth:`~xarray.Dataset.drop_dims` (:issue:`1949`).
155161
By `Kevin Squire <https://github.com/kmsquire>`_.
156-
- ``xr.open_zarr`` now accepts manually specified chunks with the ``chunks=``
157-
parameter. ``auto_chunk=True`` is equivalent to ``chunks='auto'`` for
158-
backwards compatibility. The ``overwrite_encoded_chunks`` parameter is
162+
- ``xr.open_zarr`` now accepts manually specified chunks with the ``chunks=``
163+
parameter. ``auto_chunk=True`` is equivalent to ``chunks='auto'`` for
164+
backwards compatibility. The ``overwrite_encoded_chunks`` parameter is
159165
added to remove the original zarr chunk encoding.
160166
By `Lily Wang <https://github.com/lilyminium>`_.
161167

xarray/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from .core.options import set_options
1818

1919
from .backends.api import (open_dataset, open_dataarray, open_mfdataset,
20-
save_mfdataset)
20+
save_mfdataset, load_dataset, load_dataarray)
2121
from .backends.rasterio_ import open_rasterio
2222
from .backends.zarr import open_zarr
2323

xarray/backends/api.py

Lines changed: 55 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -185,12 +185,64 @@ def _finalize_store(write, store):
185185
store.close()
186186

187187

188+
def load_dataset(filename_or_obj, **kwargs):
189+
"""Open, load into memory, and close a Dataset from a file or file-like
190+
object.
191+
192+
This is a thin wrapper around :py:meth:`~xarray.open_dataset`. It differs
193+
from `open_dataset` in that it loads the Dataset into memory, closes the
194+
file, and returns the Dataset. In contrast, `open_dataset` keeps the file
195+
handle open and lazy loads its contents. All parameters are passed directly
196+
to `open_dataset`. See that documentation for further details.
197+
198+
Returns
199+
-------
200+
dataset : Dataset
201+
The newly created Dataset.
202+
203+
See Also
204+
--------
205+
open_dataset
206+
"""
207+
if 'cache' in kwargs:
208+
raise TypeError('cache has no effect in this context')
209+
210+
with open_dataset(filename_or_obj, **kwargs) as ds:
211+
return ds.load()
212+
213+
214+
def load_dataarray(filename_or_obj, **kwargs):
215+
"""Open, load into memory, and close a DataArray from a file or file-like
216+
object containing a single data variable.
217+
218+
This is a thin wrapper around :py:meth:`~xarray.open_dataarray`. It differs
219+
from `open_dataarray` in that it loads the Dataset into memory, closes the
220+
file, and returns the Dataset. In contrast, `open_dataarray` keeps the file
221+
handle open and lazy loads its contents. All parameters are passed directly
222+
to `open_dataarray`. See that documentation for further details.
223+
224+
Returns
225+
-------
226+
datarray : DataArray
227+
The newly created DataArray.
228+
229+
See Also
230+
--------
231+
open_dataarray
232+
"""
233+
if 'cache' in kwargs:
234+
raise TypeError('cache has no effect in this context')
235+
236+
with open_dataarray(filename_or_obj, **kwargs) as da:
237+
return da.load()
238+
239+
188240
def open_dataset(filename_or_obj, group=None, decode_cf=True,
189241
mask_and_scale=None, decode_times=True, autoclose=None,
190242
concat_characters=True, decode_coords=True, engine=None,
191243
chunks=None, lock=None, cache=None, drop_variables=None,
192244
backend_kwargs=None, use_cftime=None):
193-
"""Load and decode a dataset from a file or file-like object.
245+
"""Open and decode a dataset from a file or file-like object.
194246
195247
Parameters
196248
----------
@@ -406,7 +458,8 @@ def open_dataarray(filename_or_obj, group=None, decode_cf=True,
406458
concat_characters=True, decode_coords=True, engine=None,
407459
chunks=None, lock=None, cache=None, drop_variables=None,
408460
backend_kwargs=None, use_cftime=None):
409-
"""Open an DataArray from a netCDF file containing a single data variable.
461+
"""Open an DataArray from a file or file-like object containing a single
462+
data variable.
410463
411464
This is designed to read netCDF files with only one data variable. If
412465
multiple variables are present then a ValueError is raised.

xarray/tests/test_backends.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
import xarray as xr
2020
from xarray import (
2121
DataArray, Dataset, backends, open_dataarray, open_dataset, open_mfdataset,
22-
save_mfdataset)
22+
save_mfdataset, load_dataset, load_dataarray)
2323
from xarray.backends.common import robust_getitem
2424
from xarray.backends.netCDF4_ import _extract_nc4_variable_encoding
2525
from xarray.backends.pydap_ import PydapDataStore
@@ -2641,6 +2641,23 @@ def test_save_mfdataset_compute_false_roundtrip(self):
26412641
with open_mfdataset([tmp1, tmp2]) as actual:
26422642
assert_identical(actual, original)
26432643

2644+
def test_load_dataset(self):
2645+
with create_tmp_file() as tmp:
2646+
original = Dataset({'foo': ('x', np.random.randn(10))})
2647+
original.to_netcdf(tmp)
2648+
ds = load_dataset(tmp)
2649+
# this would fail if we used open_dataset instead of load_dataset
2650+
ds.to_netcdf(tmp)
2651+
2652+
def test_load_dataarray(self):
2653+
with create_tmp_file() as tmp:
2654+
original = Dataset({'foo': ('x', np.random.randn(10))})
2655+
original.to_netcdf(tmp)
2656+
ds = load_dataarray(tmp)
2657+
# this would fail if we used open_dataarray instead of
2658+
# load_dataarray
2659+
ds.to_netcdf(tmp)
2660+
26442661

26452662
@requires_scipy_or_netCDF4
26462663
@requires_pydap

xarray/tutorial.py

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ def open_dataset(name, cache=True, cache_dir=_default_cache_dir,
2727
github_url='https://github.com/pydata/xarray-data',
2828
branch='master', **kws):
2929
"""
30-
Load a dataset from the online repository (requires internet).
30+
Open a dataset from the online repository (requires internet).
3131
3232
If a local copy is found then always use that to avoid network traffic.
3333
@@ -91,17 +91,12 @@ def open_dataset(name, cache=True, cache_dir=_default_cache_dir,
9191

9292
def load_dataset(*args, **kwargs):
9393
"""
94-
`load_dataset` will be removed a future version of xarray. The current
95-
behavior of this function can be achived by using
96-
`tutorial.open_dataset(...).load()`.
94+
Open, load into memory, and close a dataset from the online repository
95+
(requires internet).
9796
9897
See Also
9998
--------
10099
open_dataset
101100
"""
102-
warnings.warn(
103-
"load_dataset` will be removed in a future version of xarray. The "
104-
"current behavior of this function can be achived by using "
105-
"`tutorial.open_dataset(...).load()`.",
106-
DeprecationWarning, stacklevel=2)
107-
return open_dataset(*args, **kwargs).load()
101+
with open_dataset(*args, **kwargs) as ds:
102+
return ds.load()

0 commit comments

Comments
 (0)