diff --git a/doc/api-hidden.rst b/doc/api-hidden.rst index e5492ec73a4..301fc53e0fa 100644 --- a/doc/api-hidden.rst +++ b/doc/api-hidden.rst @@ -809,3 +809,28 @@ backends.DummyFileManager.acquire backends.DummyFileManager.acquire_context backends.DummyFileManager.close + + backends.common.BackendArray + backends.common.BackendEntrypoint + backends.common.BackendEntrypoint.guess_can_open + backends.common.BackendEntrypoint.open_dataset + + core.indexing.IndexingSupport + core.indexing.explicit_indexing_adapter + core.indexing.BasicIndexer + core.indexing.OuterIndexer + core.indexing.VectorizedIndexer + core.indexing.LazilyIndexedArray + core.indexing.LazilyVectorizedIndexedArray + + conventions.decode_cf_variables + + coding.variables.UnsignedIntegerCoder + coding.variables.CFMaskCoder + coding.variables.CFScaleOffsetCoder + + coding.strings.CharacterArrayCoder + coding.strings.EncodedStringCoder + + coding.times.CFTimedeltaCoder + coding.times.CFDatetimeCoder diff --git a/doc/internals.rst b/doc/internals.rst index 60d32128c60..a461a12ec2e 100644 --- a/doc/internals.rst +++ b/doc/internals.rst @@ -231,3 +231,459 @@ re-open it directly with Zarr: zgroup = zarr.open("rasm.zarr") print(zgroup.tree()) dict(zgroup["Tair"].attrs) + + +How to add a new backend +------------------------ + +Adding a new backend for read support to Xarray does not require +to integrate any code in Xarray; all you need to do is: + +- Create a class that inherits from Xarray :py:class:`~xarray.backends.common.BackendEntrypoint` + and implements the method ``open_dataset`` see :ref:`RST backend_entrypoint` + +- Declare this class as an external plugin in your ``setup.py``, see :ref:`RST backend_registration` + +If you also want to support lazy loading and dask see :ref:`RST lazy_loading`. + +Note that the new interface for backends is available from Xarray +version >= 0.18 onwards. + +.. _RST backend_entrypoint: + +BackendEntrypoint subclassing ++++++++++++++++++++++++++++++ + +Your ``BackendEntrypoint`` sub-class is the primary interface with Xarray, and +it should implement the following attributes and methods: + +- the ``open_dataset`` method (mandatory) +- the ``open_dataset_parameters`` attribute (optional) +- the ``guess_can_open`` method (optional). + +This is what a ``BackendEntrypoint`` subclass should look like: + +.. code-block:: python + + class MyBackendEntrypoint(BackendEntrypoint): + def open_dataset( + self, + filename_or_obj, + *, + drop_variables=None, + # other backend specific keyword arguments + ): + ... + return ds + + open_dataset_parameters = ["filename_or_obj", "drop_variables"] + + def guess_can_open(self, filename_or_obj): + try: + _, ext = os.path.splitext(filename_or_obj) + except TypeError: + return False + return ext in {...} + +``BackendEntrypoint`` subclass methods and attributes are detailed in the following. + +.. _RST open_dataset: + +open_dataset +^^^^^^^^^^^^ + +The backend ``open_dataset`` shall implement reading from file, the variables +decoding and it shall instantiate the output Xarray class :py:class:`~xarray.Dataset`. + +The following is an example of the high level processing steps: + +.. code-block:: python + + def open_dataset( + self, + filename_or_obj, + *, + drop_variables=None, + decode_times=True, + decode_timedelta=True, + decode_coords=True, + my_backend_param=None, + ): + vars, attrs, coords = my_reader( + filename_or_obj, + drop_variables=drop_variables, + my_backend_param=my_backend_param, + ) + vars, attrs, coords = my_decode_variables( + vars, attrs, decode_times, decode_timedelta, decode_coords + ) # see also conventions.decode_cf_variables + + ds = xr.Dataset(vars, attrs=attrs) + ds = ds.set_coords(coords) + ds.set_close(store.close) + + return ds + + +The output :py:class:`~xarray.Dataset` shall implement the additional custom method +``close``, used by Xarray to ensure the related files are eventually closed. This +method shall be set by using :py:meth:`~xarray.Dataset.set_close`. + + +The input of ``open_dataset`` method are one argument +(``filename``) and one keyword argument (``drop_variables``): + +- ``filename``: can be a string containing a path or an instance of + :py:class:`pathlib.Path`. +- ``drop_variables``: can be `None` or an iterable containing the variable + names to be dropped when reading the data. + +If it makes sense for your backend, your ``open_dataset`` method +should implement in its interface the following boolean keyword arguments, called +**decoders**, which default to ``None``: + +- ``mask_and_scale`` +- ``decode_times`` +- ``decode_timedelta`` +- ``use_cftime`` +- ``concat_characters`` +- ``decode_coords`` + +Note: all the supported decoders shall be declared explicitly +in backend ``open_dataset`` signature. + +These keyword arguments are explicitly defined in Xarray +:py:func:`~xarray.open_dataset` signature. Xarray will pass them to the +backend only if the User explicitly sets a value different from ``None``. +For more details on decoders see :ref:`RST decoders`. + +Your backend can also take as input a set of backend-specific keyword +arguments. All these keyword arguments can be passed to +:py:func:`~xarray.open_dataset` grouped either via the ``backend_kwargs`` +parameter or explicitly using the syntax ``**kwargs``. + + +If you don't want to support the lazy loading, then the +:py:class:`~xarray.Dataset` shall contain values as a :py:class:`numpy.ndarray` +and your work is almost done. + +.. _RST open_dataset_parameters: + +open_dataset_parameters +^^^^^^^^^^^^^^^^^^^^^^^ + +``open_dataset_parameters`` is the list of backend ``open_dataset`` parameters. +It is not a mandatory parameter, and if the backend does not provide it +explicitly, Xarray creates a list of them automatically by inspecting the +backend signature. + +If ``open_dataset_parameters`` is not defined, but ``**kwargs`` and ``*args`` +are in the backend ``open_dataset`` signature, Xarray raises an error. +On the other hand, if the backend provides the ``open_dataset_parameters``, +then ``**kwargs`` and ``*args`` can be used in the signature. +However, this practice is discouraged unless there is a good reasons for using +``**kwargs`` or ``*args``. + +.. _RST guess_can_open: + +guess_can_open +^^^^^^^^^^^^^^ + +``guess_can_open`` is used to identify the proper engine to open your data +file automatically in case the engine is not specified explicitly. If you are +not interested in supporting this feature, you can skip this step since +:py:class:`~xarray.backends.common.BackendEntrypoint` already provides a +default :py:meth:`~xarray.backend.common.BackendEntrypoint.guess_can_open` +that always returns ``False``. + +Backend ``guess_can_open`` takes as input the ``filename_or_obj`` parameter of +Xarray :py:meth:`~xarray.open_dataset`, and returns a boolean. + +.. _RST decoders: + +Decoders +^^^^^^^^ +The decoders implement specific operations to transform data from on-disk +representation to Xarray representation. + +A classic example is the “time” variable decoding operation. In NetCDF, the +elements of the “time” variable are stored as integers, and the unit contains +an origin (for example: "seconds since 1970-1-1"). In this case, Xarray +transforms the pair integer-unit in a :py:class:`numpy.datetime64`. + +The standard coders implemented in Xarray are: + +- :py:class:`xarray.coding.strings.CharacterArrayCoder()` +- :py:class:`xarray.coding.strings.EncodedStringCoder()` +- :py:class:`xarray.coding.variables.UnsignedIntegerCoder()` +- :py:class:`xarray.coding.variables.CFMaskCoder()` +- :py:class:`xarray.coding.variables.CFScaleOffsetCoder()` +- :py:class:`xarray.coding.times.CFTimedeltaCoder()` +- :py:class:`xarray.coding.times.CFDatetimeCoder()` + +Xarray coders all have the same interface. They have two methods: ``decode`` +and ``encode``. The method ``decode`` takes a ``Variable`` in on-disk +format and returns a ``Variable`` in Xarray format. Variable +attributes no more applicable after the decoding, are dropped and stored in the +``Variable.encoding`` to make them available to the ``encode`` method, which +performs the inverse transformation. + +In the following an example on how to use the coders ``decode`` method: + +.. ipython:: python + + var = xr.Variable( + dims=("x",), data=np.arange(10.0), attrs={"scale_factor": 10, "add_offset": 2} + ) + var + + coder = xr.coding.variables.CFScaleOffsetCoder() + decoded_var = coder.decode(var) + decoded_var + decoded_var.encoding + +Some of the transformations can be common to more backends, so before +implementing a new decoder, be sure Xarray does not already implement that one. + +The backends can reuse Xarray’s decoders, either instantiating the coders +and using the method ``decode`` directly or using the higher-level function +:py:func:`~xarray.conventions.decode_cf_variables` that groups Xarray decoders. + +In some cases, the transformation to apply strongly depends on the on-disk +data format. Therefore, you may need to implement your own decoder. + +An example of such a case is when you have to deal with the time format of a +grib file. grib format is very different from the NetCDF one: in grib, the +time is stored in two attributes dataDate and dataTime as strings. Therefore, +it is not possible to reuse the Xarray time decoder, and implementing a new +one is mandatory. + +Decoders can be activated or deactivated using the boolean keywords of +Xarray :py:meth:`~xarray.open_dataset` signature: ``mask_and_scale``, +``decode_times``, ``decode_timedelta``, ``use_cftime``, +``concat_characters``, ``decode_coords``. +Such keywords are passed to the backend only if the User sets a value +different from ``None``. Note that the backend does not necessarily have to +implement all the decoders, but it shall declare in its ``open_dataset`` +interface only the boolean keywords related to the supported decoders. + +.. _RST backend_registration: + +How to register a backend ++++++++++++++++++++++++++++ + +Define a new entrypoint in your ``setup.py`` (or ``setup.cfg``) with: + +- group: ``xarray.backend`` +- name: the name to be passed to :py:meth:`~xarray.open_dataset` as ``engine`` +- object reference: the reference of the class that you have implemented. + +You can declare the entrypoint in ``setup.py`` using the following syntax: + +.. code-block:: + + setuptools.setup( + entry_points={ + "xarray.backends": [ + "engine_name=your_package.your_module:YourBackendEntryClass" + ], + }, + ) + +in ``setup.cfg``: + +.. code-block:: cfg + + [options.entry_points] + xarray.backends = + engine_name = your_package.your_module:YourBackendEntryClass + + +See https://packaging.python.org/specifications/entry-points/#data-model +for more information + +If you are using [Poetry](https://python-poetry.org/) for your build system, you can accomplish the same thing using "plugins". In this case you would need to add the following to your ``pyproject.toml`` file: + +.. code-block:: toml + + [tool.poetry.plugins."xarray_backends"] + "engine_name" = "your_package.your_module:YourBackendEntryClass" + +See https://python-poetry.org/docs/pyproject/#plugins for more information on Poetry plugins. + +.. _RST lazy_loading: + +How to support Lazy Loading ++++++++++++++++++++++++++++ +If you want to make your backend effective with big datasets, then you should +support lazy loading. +Basically, you shall replace the :py:class:`numpy.ndarray` inside the +variables with a custom class that supports lazy loading indexing. +See the example below: + +.. code-block:: python + + backend_array = MyBackendArray() + data = indexing.LazilyIndexedArray(backend_array) + var = xr.Variable(dims, data, attrs=attrs, encoding=encoding) + +Where: + +- :py:class:`~xarray.core.indexing.LazilyIndexedArray` is a class + provided by Xarray that manages the lazy loading. +- ``MyBackendArray`` shall be implemented by the backend and shall inherit + from :py:class:`~xarray.backends.common.BackendArray`. + +BackendArray subclassing +^^^^^^^^^^^^^^^^^^^^^^^^ + +The BackendArray subclass shall implement the following method and attributes: + +- the ``__getitem__`` method that takes in input an index and returns a + `NumPy `__ array +- the ``shape`` attribute +- the ``dtype`` attribute. + + +Xarray supports different type of +`indexing `__, that can be +grouped in three types of indexes +:py:class:`~xarray.core.indexing.BasicIndexer`, +:py:class:`~xarray.core.indexing.OuterIndexer` and +:py:class:`~xarray.core.indexing.VectorizedIndexer`. +This implies that the implementation of the method ``__getitem__`` can be tricky. +In oder to simplify this task, Xarray provides a helper function, +:py:func:`~xarray.core.indexing.explicit_indexing_adapter`, that transforms +all the input ``indexer`` types (`basic`, `outer`, `vectorized`) in a tuple +which is interpreted correctly by your backend. + +This is an example ``BackendArray`` subclass implementation: + +.. code-block:: python + + class MyBackendArray(BackendArray): + def __init__( + self, + shape, + dtype, + lock, + # other backend specific keyword arguments + ): + self.shape = shape + self.dtype = lock + self.lock = dtype + + def __getitem__( + self, key: xarray.core.indexing.ExplicitIndexer + ) -> np.typing.ArrayLike: + return indexing.explicit_indexing_adapter( + key, + self.shape, + indexing.IndexingSupport.BASIC, + self._raw_indexing_method, + ) + + def _raw_indexing_method(self, key: tuple) -> np.typing.ArrayLike: + # thread safe method that access to data on disk + with self.lock: + ... + return item + +Note that ``BackendArray.__getitem__`` must be thread safe to support +multi-thread processing. + +The :py:func:`~xarray.core.indexing.explicit_indexing_adapter` method takes in +input the ``key``, the array ``shape`` and the following parameters: + +- ``indexing_support``: the type of index supported by ``raw_indexing_method`` +- ``raw_indexing_method``: a method that shall take in input a key in the form + of a tuple and return an indexed :py:class:`numpy.ndarray`. + +For more details see +:py:class:`~xarray.core.indexing.IndexingSupport` and :ref:`RST indexing`. + +In order to support `Dask `__ distributed and +:py:mod:`multiprocessing`, ``BackendArray`` subclass should be serializable +either with :ref:`io.pickle` or +`cloudpickle `__. +That implies that all the reference to open files should be dropped. For +opening files, we therefore suggest to use the helper class provided by Xarray +:py:class:`~xarray.backends.CachingFileManager`. + +.. _RST indexing: + +Indexing Examples +^^^^^^^^^^^^^^^^^ +**BASIC** + +In the ``BASIC`` indexing support, numbers and slices are supported. + +Example: + +.. ipython:: + :verbatim: + + In [1]: # () shall return the full array + ...: backend_array._raw_indexing_method(()) + Out[1]: array([[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]]) + + In [2]: # shall support integers + ...: backend_array._raw_indexing_method(1, 1) + Out[2]: 5 + + In [3]: # shall support slices + ...: backend_array._raw_indexing_method(slice(0, 3), slice(2, 4)) + Out[3]: array([[2, 3], [6, 7], [10, 11]]) + +**OUTER** + +The ``OUTER`` indexing shall support number, slices and in addition it shall +support also lists of integers. The the outer indexing is equivalent to +combining multiple input list with ``itertools.product()``: + +.. ipython:: + :verbatim: + + In [1]: backend_array._raw_indexing_method([0, 1], [0, 1, 2]) + Out[1]: array([[0, 1, 2], [4, 5, 6]]) + + # shall support integers + In [2]: backend_array._raw_indexing_method(1, 1) + Out[2]: 5 + + +**OUTER_1VECTOR** + +The ``OUTER_1VECTOR`` indexing shall supports number, slices and at most one +list. The behaviour with the list shall be the same of ``OUTER`` indexing. + +If you support more complex indexing as `explicit indexing` or +`numpy indexing`, you can have a look to the implemetation of Zarr backend and Scipy backend, +currently available in :py:mod:`~xarray.backends` module. + +.. _RST preferred_chunks: + +Backend preferred chunks +^^^^^^^^^^^^^^^^^^^^^^^^ + +The backend is not directly involved in `Dask `__ +chunking, since it is internally managed by Xarray. However, the backend can +define the preferred chunk size inside the variable’s encoding +``var.encoding["preferred_chunks"]``. The ``preferred_chunks`` may be useful +to improve performances with lazy loading. ``preferred_chunks`` shall be a +dictionary specifying chunk size per dimension like +``{“dim1”: 1000, “dim2”: 2000}`` or +``{“dim1”: [1000, 100], “dim2”: [2000, 2000, 2000]]}``. + +The ``preferred_chunks`` is used by Xarray to define the chunk size in some +special cases: + +- if ``chunks`` along a dimension is ``None`` or not defined +- if ``chunks`` is ``"auto"``. + +In the first case Xarray uses the chunks size specified in +``preferred_chunks``. +In the second case Xarray accommodates ideal chunk sizes, preserving if +possible the "preferred_chunks". The ideal chunk size is computed using +:py:func:`dask.core.normalize_chunks`, setting +``previous_chunks = preferred_chunks``. diff --git a/xarray/backends/cfgrib_.py b/xarray/backends/cfgrib_.py index 65c5bc2a02b..7ebbc246f55 100644 --- a/xarray/backends/cfgrib_.py +++ b/xarray/backends/cfgrib_.py @@ -62,7 +62,7 @@ def open_store_variable(self, name, var): data = var.data else: wrapped_array = CfGribArrayWrapper(self, var.data) - data = indexing.LazilyOuterIndexedArray(wrapped_array) + data = indexing.LazilyIndexedArray(wrapped_array) encoding = self.ds.encoding.copy() encoding["original_shape"] = var.data.shape @@ -87,9 +87,9 @@ def get_encoding(self): class CfgribfBackendEntrypoint(BackendEntrypoint): - def guess_can_open(self, store_spec): + def guess_can_open(self, filename_or_obj): try: - _, ext = os.path.splitext(store_spec) + _, ext = os.path.splitext(filename_or_obj) except TypeError: return False return ext in {".grib", ".grib2", ".grb", ".grb2"} diff --git a/xarray/backends/common.py b/xarray/backends/common.py index e2905d0866b..aa902602278 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -1,7 +1,7 @@ import logging import time import traceback -from typing import Dict, Tuple, Type, Union +from typing import Any, Dict, Tuple, Type, Union import numpy as np @@ -344,12 +344,41 @@ def encode(self, variables, attributes): class BackendEntrypoint: + """ + ``BackendEntrypoint`` is a class container and it is the main interface + for the backend plugins, see :ref:`RST backend_entrypoint`. + It shall implement: + + - ``open_dataset`` method: it shall implement reading from file, variables + decoding and it returns an instance of :py:class:`~xarray.Dataset`. + It shall take in input at least ``filename_or_obj`` argument and + ``drop_variables`` keyword argument. + For more details see :ref:`RST open_dataset`. + - ``guess_can_open`` method: it shall return ``True`` if the backend is able to open + ``filename_or_obj``, ``False`` otherwise. The implementation of this + method is not mandatory. + """ + open_dataset_parameters: Union[Tuple, None] = None + """list of ``open_dataset`` method parameters""" + + def open_dataset( + self, + filename_or_obj: str, + drop_variables: Tuple[str] = None, + **kwargs: Any, + ): + """ + Backend open_dataset method used by Xarray in :py:func:`~xarray.open_dataset`. + """ - def open_dataset(self): raise NotImplementedError - def guess_can_open(self, store_spec): + def guess_can_open(self, filename_or_obj): + """ + Backend open_dataset method used by Xarray in :py:func:`~xarray.open_dataset`. + """ + return False diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index aa892c4f89c..532c63089bb 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -182,7 +182,7 @@ def open_store_variable(self, name, var): import h5py dimensions = var.dimensions - data = indexing.LazilyOuterIndexedArray(H5NetCDFArrayWrapper(name, self)) + data = indexing.LazilyIndexedArray(H5NetCDFArrayWrapper(name, self)) attrs = _read_attributes(var) # netCDF4 specific encoding @@ -329,14 +329,14 @@ def close(self, **kwargs): class H5netcdfBackendEntrypoint(BackendEntrypoint): - def guess_can_open(self, store_spec): + def guess_can_open(self, filename_or_obj): try: - return read_magic_number(store_spec).startswith(b"\211HDF\r\n\032\n") + return read_magic_number(filename_or_obj).startswith(b"\211HDF\r\n\032\n") except TypeError: pass try: - _, ext = os.path.splitext(store_spec) + _, ext = os.path.splitext(filename_or_obj) except TypeError: return False diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index e3d87aaf83f..1c82f1975a7 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -388,7 +388,7 @@ def ds(self): def open_store_variable(self, name, var): dimensions = var.dimensions - data = indexing.LazilyOuterIndexedArray(NetCDF4ArrayWrapper(name, self)) + data = indexing.LazilyIndexedArray(NetCDF4ArrayWrapper(name, self)) attributes = {k: var.getncattr(k) for k in var.ncattrs()} _ensure_fill_value_valid(data, attributes) # netCDF4 specific encoding; save _FillValue for later @@ -513,11 +513,11 @@ def close(self, **kwargs): class NetCDF4BackendEntrypoint(BackendEntrypoint): - def guess_can_open(self, store_spec): - if isinstance(store_spec, str) and is_remote_uri(store_spec): + def guess_can_open(self, filename_or_obj): + if isinstance(filename_or_obj, str) and is_remote_uri(filename_or_obj): return True try: - _, ext = os.path.splitext(store_spec) + _, ext = os.path.splitext(filename_or_obj) except TypeError: return False return ext in {".nc", ".nc4", ".cdf"} diff --git a/xarray/backends/pseudonetcdf_.py b/xarray/backends/pseudonetcdf_.py index 80485fce459..3faa42b1b12 100644 --- a/xarray/backends/pseudonetcdf_.py +++ b/xarray/backends/pseudonetcdf_.py @@ -74,7 +74,7 @@ def ds(self): return self._manager.acquire() def open_store_variable(self, name, var): - data = indexing.LazilyOuterIndexedArray(PncArrayWrapper(name, self)) + data = indexing.LazilyIndexedArray(PncArrayWrapper(name, self)) attrs = {k: getattr(var, k) for k in var.ncattrs()} return Variable(var.dimensions, data, attrs) diff --git a/xarray/backends/pydap_.py b/xarray/backends/pydap_.py index 7f8622ca66e..69d93299381 100644 --- a/xarray/backends/pydap_.py +++ b/xarray/backends/pydap_.py @@ -92,7 +92,7 @@ def open(cls, url, session=None): return cls(ds) def open_store_variable(self, var): - data = indexing.LazilyOuterIndexedArray(PydapArrayWrapper(var)) + data = indexing.LazilyIndexedArray(PydapArrayWrapper(var)) return Variable(var.dimensions, data, _fix_attributes(var.attributes)) def get_variables(self): @@ -108,8 +108,8 @@ def get_dimensions(self): class PydapBackendEntrypoint(BackendEntrypoint): - def guess_can_open(self, store_spec): - return isinstance(store_spec, str) and is_remote_uri(store_spec) + def guess_can_open(self, filename_or_obj): + return isinstance(filename_or_obj, str) and is_remote_uri(filename_or_obj) def open_dataset( self, diff --git a/xarray/backends/pynio_.py b/xarray/backends/pynio_.py index 41c99efd076..dfc0efbd6da 100644 --- a/xarray/backends/pynio_.py +++ b/xarray/backends/pynio_.py @@ -74,7 +74,7 @@ def ds(self): return self._manager.acquire() def open_store_variable(self, name, var): - data = indexing.LazilyOuterIndexedArray(NioArrayWrapper(name, self)) + data = indexing.LazilyIndexedArray(NioArrayWrapper(name, self)) return Variable(var.dimensions, data, var.attributes) def get_variables(self): diff --git a/xarray/backends/rasterio_.py b/xarray/backends/rasterio_.py index d776b116ea8..51f0599e8e0 100644 --- a/xarray/backends/rasterio_.py +++ b/xarray/backends/rasterio_.py @@ -335,9 +335,7 @@ def open_rasterio(filename, parse_coordinates=None, chunks=None, cache=None, loc else: attrs[k] = v - data = indexing.LazilyOuterIndexedArray( - RasterioArrayWrapper(manager, lock, vrt_params) - ) + data = indexing.LazilyIndexedArray(RasterioArrayWrapper(manager, lock, vrt_params)) # this lets you write arrays loaded with rasterio data = indexing.CopyOnWriteArray(data) diff --git a/xarray/backends/scipy_.py b/xarray/backends/scipy_.py index ddc157ed8e4..49215843397 100644 --- a/xarray/backends/scipy_.py +++ b/xarray/backends/scipy_.py @@ -233,14 +233,14 @@ def close(self): class ScipyBackendEntrypoint(BackendEntrypoint): - def guess_can_open(self, store_spec): + def guess_can_open(self, filename_or_obj): try: - return read_magic_number(store_spec).startswith(b"CDF") + return read_magic_number(filename_or_obj).startswith(b"CDF") except TypeError: pass try: - _, ext = os.path.splitext(store_spec) + _, ext = os.path.splitext(filename_or_obj) except TypeError: return False return ext in {".nc", ".nc4", ".cdf", ".gz"} diff --git a/xarray/backends/store.py b/xarray/backends/store.py index d57b3ab9df8..860a0254b64 100644 --- a/xarray/backends/store.py +++ b/xarray/backends/store.py @@ -4,8 +4,8 @@ class StoreBackendEntrypoint(BackendEntrypoint): - def guess_can_open(self, store_spec): - return isinstance(store_spec, AbstractDataStore) + def guess_can_open(self, filename_or_obj): + return isinstance(filename_or_obj, AbstractDataStore) def open_dataset( self, diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 04fdeac6450..ca5c2a51fa4 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -326,7 +326,7 @@ def __init__( self._write_region = write_region def open_store_variable(self, name, zarr_array): - data = indexing.LazilyOuterIndexedArray(ZarrArrayWrapper(name, self)) + data = indexing.LazilyIndexedArray(ZarrArrayWrapper(name, self)) dimensions, attributes = _get_zarr_dims_and_attrs(zarr_array, DIMENSION_KEY) attributes = dict(attributes) encoding = { diff --git a/xarray/conventions.py b/xarray/conventions.py index 93e765e5622..7b467d3ee2e 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -354,7 +354,7 @@ def decode_cf_variable( data = BoolTypeArray(data) if not is_duck_dask_array(data): - data = indexing.LazilyOuterIndexedArray(data) + data = indexing.LazilyIndexedArray(data) return Variable(dimensions, data, attributes, encoding=encoding) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index dff6d75d5b7..0c180fdc9f7 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -513,7 +513,7 @@ def __getitem__(self, key): return result -class LazilyOuterIndexedArray(ExplicitlyIndexedNDArrayMixin): +class LazilyIndexedArray(ExplicitlyIndexedNDArrayMixin): """Wrap an array to make basic and outer indexing lazy.""" __slots__ = ("array", "key") @@ -619,10 +619,10 @@ def _updated_key(self, new_key): return _combine_indexers(self.key, self.shape, new_key) def __getitem__(self, indexer): - # If the indexed array becomes a scalar, return LazilyOuterIndexedArray + # If the indexed array becomes a scalar, return LazilyIndexedArray if all(isinstance(ind, integer_types) for ind in indexer.tuple): key = BasicIndexer(tuple(k[indexer.tuple] for k in self.key.tuple)) - return LazilyOuterIndexedArray(self.array, key) + return LazilyIndexedArray(self.array, key) return type(self)(self.array, self._updated_key(indexer)) def transpose(self, order): diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 45553eb9b1e..5081f1dbda1 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -169,7 +169,7 @@ def _maybe_wrap_data(data): Put pandas.Index and numpy.ndarray arguments in adapter objects to ensure they can be indexed properly. - NumpyArrayAdapter, PandasIndexAdapter and LazilyOuterIndexedArray should + NumpyArrayAdapter, PandasIndexAdapter and LazilyIndexedArray should all pass through unmodified. """ if isinstance(data, pd.Index): diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index db47faa8d2b..9bc7a1b8566 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -187,7 +187,7 @@ def get_variables(self): def lazy_inaccessible(k, v): if k in self._indexvars: return v - data = indexing.LazilyOuterIndexedArray(InaccessibleArray(v.values)) + data = indexing.LazilyIndexedArray(InaccessibleArray(v.values)) return Variable(v.dims, data, v.attrs) return {k: lazy_inaccessible(k, v) for k, v in self._variables.items()} diff --git a/xarray/tests/test_indexing.py b/xarray/tests/test_indexing.py index 4ef7536e1f2..10641ff54e9 100644 --- a/xarray/tests/test_indexing.py +++ b/xarray/tests/test_indexing.py @@ -224,7 +224,7 @@ def test_lazily_indexed_array(self): original = np.random.rand(10, 20, 30) x = indexing.NumpyIndexingAdapter(original) v = Variable(["i", "j", "k"], original) - lazy = indexing.LazilyOuterIndexedArray(x) + lazy = indexing.LazilyIndexedArray(x) v_lazy = Variable(["i", "j", "k"], lazy) arr = ReturnItem() # test orthogonally applied indexers @@ -244,9 +244,7 @@ def test_lazily_indexed_array(self): ]: assert expected.shape == actual.shape assert_array_equal(expected, actual) - assert isinstance( - actual._data, indexing.LazilyOuterIndexedArray - ) + assert isinstance(actual._data, indexing.LazilyIndexedArray) # make sure actual.key is appropriate type if all( @@ -282,18 +280,18 @@ def test_lazily_indexed_array(self): actual._data, ( indexing.LazilyVectorizedIndexedArray, - indexing.LazilyOuterIndexedArray, + indexing.LazilyIndexedArray, ), ) - assert isinstance(actual._data, indexing.LazilyOuterIndexedArray) + assert isinstance(actual._data, indexing.LazilyIndexedArray) assert isinstance(actual._data.array, indexing.NumpyIndexingAdapter) def test_vectorized_lazily_indexed_array(self): original = np.random.rand(10, 20, 30) x = indexing.NumpyIndexingAdapter(original) v_eager = Variable(["i", "j", "k"], x) - lazy = indexing.LazilyOuterIndexedArray(x) + lazy = indexing.LazilyIndexedArray(x) v_lazy = Variable(["i", "j", "k"], lazy) arr = ReturnItem() @@ -306,7 +304,7 @@ def check_indexing(v_eager, v_lazy, indexers): actual._data, ( indexing.LazilyVectorizedIndexedArray, - indexing.LazilyOuterIndexedArray, + indexing.LazilyIndexedArray, ), ) assert_array_equal(expected, actual) @@ -364,19 +362,19 @@ def test_index_scalar(self): class TestMemoryCachedArray: def test_wrapper(self): - original = indexing.LazilyOuterIndexedArray(np.arange(10)) + original = indexing.LazilyIndexedArray(np.arange(10)) wrapped = indexing.MemoryCachedArray(original) assert_array_equal(wrapped, np.arange(10)) assert isinstance(wrapped.array, indexing.NumpyIndexingAdapter) def test_sub_array(self): - original = indexing.LazilyOuterIndexedArray(np.arange(10)) + original = indexing.LazilyIndexedArray(np.arange(10)) wrapped = indexing.MemoryCachedArray(original) child = wrapped[B[:5]] assert isinstance(child, indexing.MemoryCachedArray) assert_array_equal(child, np.arange(5)) assert isinstance(child.array, indexing.NumpyIndexingAdapter) - assert isinstance(wrapped.array, indexing.LazilyOuterIndexedArray) + assert isinstance(wrapped.array, indexing.LazilyIndexedArray) def test_setitem(self): original = np.arange(10) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index e1ae3e1f258..90dfaa9c121 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -15,7 +15,7 @@ BasicIndexer, CopyOnWriteArray, DaskIndexingAdapter, - LazilyOuterIndexedArray, + LazilyIndexedArray, MemoryCachedArray, NumpyIndexingAdapter, OuterIndexer, @@ -1095,9 +1095,9 @@ def test_repr(self): assert expected == repr(v) def test_repr_lazy_data(self): - v = Variable("x", LazilyOuterIndexedArray(np.arange(2e5))) + v = Variable("x", LazilyIndexedArray(np.arange(2e5))) assert "200000 values with dtype" in repr(v) - assert isinstance(v._data, LazilyOuterIndexedArray) + assert isinstance(v._data, LazilyIndexedArray) def test_detect_indexer_type(self): """ Tests indexer type was correctly detected. """ @@ -2169,7 +2169,7 @@ def test_coarsen_2d(self): class TestAsCompatibleData: def test_unchanged_types(self): - types = (np.asarray, PandasIndexAdapter, LazilyOuterIndexedArray) + types = (np.asarray, PandasIndexAdapter, LazilyIndexedArray) for t in types: for data in [ np.arange(3), @@ -2340,19 +2340,19 @@ def test_NumpyIndexingAdapter(self): dims=("x", "y"), data=NumpyIndexingAdapter(NumpyIndexingAdapter(self.d)) ) - def test_LazilyOuterIndexedArray(self): - v = Variable(dims=("x", "y"), data=LazilyOuterIndexedArray(self.d)) + def test_LazilyIndexedArray(self): + v = Variable(dims=("x", "y"), data=LazilyIndexedArray(self.d)) self.check_orthogonal_indexing(v) self.check_vectorized_indexing(v) # doubly wrapping v = Variable( dims=("x", "y"), - data=LazilyOuterIndexedArray(LazilyOuterIndexedArray(self.d)), + data=LazilyIndexedArray(LazilyIndexedArray(self.d)), ) self.check_orthogonal_indexing(v) # hierarchical wrapping v = Variable( - dims=("x", "y"), data=LazilyOuterIndexedArray(NumpyIndexingAdapter(self.d)) + dims=("x", "y"), data=LazilyIndexedArray(NumpyIndexingAdapter(self.d)) ) self.check_orthogonal_indexing(v) @@ -2361,9 +2361,7 @@ def test_CopyOnWriteArray(self): self.check_orthogonal_indexing(v) self.check_vectorized_indexing(v) # doubly wrapping - v = Variable( - dims=("x", "y"), data=CopyOnWriteArray(LazilyOuterIndexedArray(self.d)) - ) + v = Variable(dims=("x", "y"), data=CopyOnWriteArray(LazilyIndexedArray(self.d))) self.check_orthogonal_indexing(v) self.check_vectorized_indexing(v)