diff --git a/doc/api-hidden.rst b/doc/api-hidden.rst
index e5492ec73a4..301fc53e0fa 100644
--- a/doc/api-hidden.rst
+++ b/doc/api-hidden.rst
@@ -809,3 +809,28 @@
backends.DummyFileManager.acquire
backends.DummyFileManager.acquire_context
backends.DummyFileManager.close
+
+ backends.common.BackendArray
+ backends.common.BackendEntrypoint
+ backends.common.BackendEntrypoint.guess_can_open
+ backends.common.BackendEntrypoint.open_dataset
+
+ core.indexing.IndexingSupport
+ core.indexing.explicit_indexing_adapter
+ core.indexing.BasicIndexer
+ core.indexing.OuterIndexer
+ core.indexing.VectorizedIndexer
+ core.indexing.LazilyIndexedArray
+ core.indexing.LazilyVectorizedIndexedArray
+
+ conventions.decode_cf_variables
+
+ coding.variables.UnsignedIntegerCoder
+ coding.variables.CFMaskCoder
+ coding.variables.CFScaleOffsetCoder
+
+ coding.strings.CharacterArrayCoder
+ coding.strings.EncodedStringCoder
+
+ coding.times.CFTimedeltaCoder
+ coding.times.CFDatetimeCoder
diff --git a/doc/internals.rst b/doc/internals.rst
index 60d32128c60..a461a12ec2e 100644
--- a/doc/internals.rst
+++ b/doc/internals.rst
@@ -231,3 +231,459 @@ re-open it directly with Zarr:
zgroup = zarr.open("rasm.zarr")
print(zgroup.tree())
dict(zgroup["Tair"].attrs)
+
+
+How to add a new backend
+------------------------
+
+Adding a new backend for read support to Xarray does not require
+to integrate any code in Xarray; all you need to do is:
+
+- Create a class that inherits from Xarray :py:class:`~xarray.backends.common.BackendEntrypoint`
+ and implements the method ``open_dataset`` see :ref:`RST backend_entrypoint`
+
+- Declare this class as an external plugin in your ``setup.py``, see :ref:`RST backend_registration`
+
+If you also want to support lazy loading and dask see :ref:`RST lazy_loading`.
+
+Note that the new interface for backends is available from Xarray
+version >= 0.18 onwards.
+
+.. _RST backend_entrypoint:
+
+BackendEntrypoint subclassing
++++++++++++++++++++++++++++++
+
+Your ``BackendEntrypoint`` sub-class is the primary interface with Xarray, and
+it should implement the following attributes and methods:
+
+- the ``open_dataset`` method (mandatory)
+- the ``open_dataset_parameters`` attribute (optional)
+- the ``guess_can_open`` method (optional).
+
+This is what a ``BackendEntrypoint`` subclass should look like:
+
+.. code-block:: python
+
+ class MyBackendEntrypoint(BackendEntrypoint):
+ def open_dataset(
+ self,
+ filename_or_obj,
+ *,
+ drop_variables=None,
+ # other backend specific keyword arguments
+ ):
+ ...
+ return ds
+
+ open_dataset_parameters = ["filename_or_obj", "drop_variables"]
+
+ def guess_can_open(self, filename_or_obj):
+ try:
+ _, ext = os.path.splitext(filename_or_obj)
+ except TypeError:
+ return False
+ return ext in {...}
+
+``BackendEntrypoint`` subclass methods and attributes are detailed in the following.
+
+.. _RST open_dataset:
+
+open_dataset
+^^^^^^^^^^^^
+
+The backend ``open_dataset`` shall implement reading from file, the variables
+decoding and it shall instantiate the output Xarray class :py:class:`~xarray.Dataset`.
+
+The following is an example of the high level processing steps:
+
+.. code-block:: python
+
+ def open_dataset(
+ self,
+ filename_or_obj,
+ *,
+ drop_variables=None,
+ decode_times=True,
+ decode_timedelta=True,
+ decode_coords=True,
+ my_backend_param=None,
+ ):
+ vars, attrs, coords = my_reader(
+ filename_or_obj,
+ drop_variables=drop_variables,
+ my_backend_param=my_backend_param,
+ )
+ vars, attrs, coords = my_decode_variables(
+ vars, attrs, decode_times, decode_timedelta, decode_coords
+ ) # see also conventions.decode_cf_variables
+
+ ds = xr.Dataset(vars, attrs=attrs)
+ ds = ds.set_coords(coords)
+ ds.set_close(store.close)
+
+ return ds
+
+
+The output :py:class:`~xarray.Dataset` shall implement the additional custom method
+``close``, used by Xarray to ensure the related files are eventually closed. This
+method shall be set by using :py:meth:`~xarray.Dataset.set_close`.
+
+
+The input of ``open_dataset`` method are one argument
+(``filename``) and one keyword argument (``drop_variables``):
+
+- ``filename``: can be a string containing a path or an instance of
+ :py:class:`pathlib.Path`.
+- ``drop_variables``: can be `None` or an iterable containing the variable
+ names to be dropped when reading the data.
+
+If it makes sense for your backend, your ``open_dataset`` method
+should implement in its interface the following boolean keyword arguments, called
+**decoders**, which default to ``None``:
+
+- ``mask_and_scale``
+- ``decode_times``
+- ``decode_timedelta``
+- ``use_cftime``
+- ``concat_characters``
+- ``decode_coords``
+
+Note: all the supported decoders shall be declared explicitly
+in backend ``open_dataset`` signature.
+
+These keyword arguments are explicitly defined in Xarray
+:py:func:`~xarray.open_dataset` signature. Xarray will pass them to the
+backend only if the User explicitly sets a value different from ``None``.
+For more details on decoders see :ref:`RST decoders`.
+
+Your backend can also take as input a set of backend-specific keyword
+arguments. All these keyword arguments can be passed to
+:py:func:`~xarray.open_dataset` grouped either via the ``backend_kwargs``
+parameter or explicitly using the syntax ``**kwargs``.
+
+
+If you don't want to support the lazy loading, then the
+:py:class:`~xarray.Dataset` shall contain values as a :py:class:`numpy.ndarray`
+and your work is almost done.
+
+.. _RST open_dataset_parameters:
+
+open_dataset_parameters
+^^^^^^^^^^^^^^^^^^^^^^^
+
+``open_dataset_parameters`` is the list of backend ``open_dataset`` parameters.
+It is not a mandatory parameter, and if the backend does not provide it
+explicitly, Xarray creates a list of them automatically by inspecting the
+backend signature.
+
+If ``open_dataset_parameters`` is not defined, but ``**kwargs`` and ``*args``
+are in the backend ``open_dataset`` signature, Xarray raises an error.
+On the other hand, if the backend provides the ``open_dataset_parameters``,
+then ``**kwargs`` and ``*args`` can be used in the signature.
+However, this practice is discouraged unless there is a good reasons for using
+``**kwargs`` or ``*args``.
+
+.. _RST guess_can_open:
+
+guess_can_open
+^^^^^^^^^^^^^^
+
+``guess_can_open`` is used to identify the proper engine to open your data
+file automatically in case the engine is not specified explicitly. If you are
+not interested in supporting this feature, you can skip this step since
+:py:class:`~xarray.backends.common.BackendEntrypoint` already provides a
+default :py:meth:`~xarray.backend.common.BackendEntrypoint.guess_can_open`
+that always returns ``False``.
+
+Backend ``guess_can_open`` takes as input the ``filename_or_obj`` parameter of
+Xarray :py:meth:`~xarray.open_dataset`, and returns a boolean.
+
+.. _RST decoders:
+
+Decoders
+^^^^^^^^
+The decoders implement specific operations to transform data from on-disk
+representation to Xarray representation.
+
+A classic example is the “time” variable decoding operation. In NetCDF, the
+elements of the “time” variable are stored as integers, and the unit contains
+an origin (for example: "seconds since 1970-1-1"). In this case, Xarray
+transforms the pair integer-unit in a :py:class:`numpy.datetime64`.
+
+The standard coders implemented in Xarray are:
+
+- :py:class:`xarray.coding.strings.CharacterArrayCoder()`
+- :py:class:`xarray.coding.strings.EncodedStringCoder()`
+- :py:class:`xarray.coding.variables.UnsignedIntegerCoder()`
+- :py:class:`xarray.coding.variables.CFMaskCoder()`
+- :py:class:`xarray.coding.variables.CFScaleOffsetCoder()`
+- :py:class:`xarray.coding.times.CFTimedeltaCoder()`
+- :py:class:`xarray.coding.times.CFDatetimeCoder()`
+
+Xarray coders all have the same interface. They have two methods: ``decode``
+and ``encode``. The method ``decode`` takes a ``Variable`` in on-disk
+format and returns a ``Variable`` in Xarray format. Variable
+attributes no more applicable after the decoding, are dropped and stored in the
+``Variable.encoding`` to make them available to the ``encode`` method, which
+performs the inverse transformation.
+
+In the following an example on how to use the coders ``decode`` method:
+
+.. ipython:: python
+
+ var = xr.Variable(
+ dims=("x",), data=np.arange(10.0), attrs={"scale_factor": 10, "add_offset": 2}
+ )
+ var
+
+ coder = xr.coding.variables.CFScaleOffsetCoder()
+ decoded_var = coder.decode(var)
+ decoded_var
+ decoded_var.encoding
+
+Some of the transformations can be common to more backends, so before
+implementing a new decoder, be sure Xarray does not already implement that one.
+
+The backends can reuse Xarray’s decoders, either instantiating the coders
+and using the method ``decode`` directly or using the higher-level function
+:py:func:`~xarray.conventions.decode_cf_variables` that groups Xarray decoders.
+
+In some cases, the transformation to apply strongly depends on the on-disk
+data format. Therefore, you may need to implement your own decoder.
+
+An example of such a case is when you have to deal with the time format of a
+grib file. grib format is very different from the NetCDF one: in grib, the
+time is stored in two attributes dataDate and dataTime as strings. Therefore,
+it is not possible to reuse the Xarray time decoder, and implementing a new
+one is mandatory.
+
+Decoders can be activated or deactivated using the boolean keywords of
+Xarray :py:meth:`~xarray.open_dataset` signature: ``mask_and_scale``,
+``decode_times``, ``decode_timedelta``, ``use_cftime``,
+``concat_characters``, ``decode_coords``.
+Such keywords are passed to the backend only if the User sets a value
+different from ``None``. Note that the backend does not necessarily have to
+implement all the decoders, but it shall declare in its ``open_dataset``
+interface only the boolean keywords related to the supported decoders.
+
+.. _RST backend_registration:
+
+How to register a backend
++++++++++++++++++++++++++++
+
+Define a new entrypoint in your ``setup.py`` (or ``setup.cfg``) with:
+
+- group: ``xarray.backend``
+- name: the name to be passed to :py:meth:`~xarray.open_dataset` as ``engine``
+- object reference: the reference of the class that you have implemented.
+
+You can declare the entrypoint in ``setup.py`` using the following syntax:
+
+.. code-block::
+
+ setuptools.setup(
+ entry_points={
+ "xarray.backends": [
+ "engine_name=your_package.your_module:YourBackendEntryClass"
+ ],
+ },
+ )
+
+in ``setup.cfg``:
+
+.. code-block:: cfg
+
+ [options.entry_points]
+ xarray.backends =
+ engine_name = your_package.your_module:YourBackendEntryClass
+
+
+See https://packaging.python.org/specifications/entry-points/#data-model
+for more information
+
+If you are using [Poetry](https://python-poetry.org/) for your build system, you can accomplish the same thing using "plugins". In this case you would need to add the following to your ``pyproject.toml`` file:
+
+.. code-block:: toml
+
+ [tool.poetry.plugins."xarray_backends"]
+ "engine_name" = "your_package.your_module:YourBackendEntryClass"
+
+See https://python-poetry.org/docs/pyproject/#plugins for more information on Poetry plugins.
+
+.. _RST lazy_loading:
+
+How to support Lazy Loading
++++++++++++++++++++++++++++
+If you want to make your backend effective with big datasets, then you should
+support lazy loading.
+Basically, you shall replace the :py:class:`numpy.ndarray` inside the
+variables with a custom class that supports lazy loading indexing.
+See the example below:
+
+.. code-block:: python
+
+ backend_array = MyBackendArray()
+ data = indexing.LazilyIndexedArray(backend_array)
+ var = xr.Variable(dims, data, attrs=attrs, encoding=encoding)
+
+Where:
+
+- :py:class:`~xarray.core.indexing.LazilyIndexedArray` is a class
+ provided by Xarray that manages the lazy loading.
+- ``MyBackendArray`` shall be implemented by the backend and shall inherit
+ from :py:class:`~xarray.backends.common.BackendArray`.
+
+BackendArray subclassing
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+The BackendArray subclass shall implement the following method and attributes:
+
+- the ``__getitem__`` method that takes in input an index and returns a
+ `NumPy `__ array
+- the ``shape`` attribute
+- the ``dtype`` attribute.
+
+
+Xarray supports different type of
+`indexing `__, that can be
+grouped in three types of indexes
+:py:class:`~xarray.core.indexing.BasicIndexer`,
+:py:class:`~xarray.core.indexing.OuterIndexer` and
+:py:class:`~xarray.core.indexing.VectorizedIndexer`.
+This implies that the implementation of the method ``__getitem__`` can be tricky.
+In oder to simplify this task, Xarray provides a helper function,
+:py:func:`~xarray.core.indexing.explicit_indexing_adapter`, that transforms
+all the input ``indexer`` types (`basic`, `outer`, `vectorized`) in a tuple
+which is interpreted correctly by your backend.
+
+This is an example ``BackendArray`` subclass implementation:
+
+.. code-block:: python
+
+ class MyBackendArray(BackendArray):
+ def __init__(
+ self,
+ shape,
+ dtype,
+ lock,
+ # other backend specific keyword arguments
+ ):
+ self.shape = shape
+ self.dtype = lock
+ self.lock = dtype
+
+ def __getitem__(
+ self, key: xarray.core.indexing.ExplicitIndexer
+ ) -> np.typing.ArrayLike:
+ return indexing.explicit_indexing_adapter(
+ key,
+ self.shape,
+ indexing.IndexingSupport.BASIC,
+ self._raw_indexing_method,
+ )
+
+ def _raw_indexing_method(self, key: tuple) -> np.typing.ArrayLike:
+ # thread safe method that access to data on disk
+ with self.lock:
+ ...
+ return item
+
+Note that ``BackendArray.__getitem__`` must be thread safe to support
+multi-thread processing.
+
+The :py:func:`~xarray.core.indexing.explicit_indexing_adapter` method takes in
+input the ``key``, the array ``shape`` and the following parameters:
+
+- ``indexing_support``: the type of index supported by ``raw_indexing_method``
+- ``raw_indexing_method``: a method that shall take in input a key in the form
+ of a tuple and return an indexed :py:class:`numpy.ndarray`.
+
+For more details see
+:py:class:`~xarray.core.indexing.IndexingSupport` and :ref:`RST indexing`.
+
+In order to support `Dask `__ distributed and
+:py:mod:`multiprocessing`, ``BackendArray`` subclass should be serializable
+either with :ref:`io.pickle` or
+`cloudpickle `__.
+That implies that all the reference to open files should be dropped. For
+opening files, we therefore suggest to use the helper class provided by Xarray
+:py:class:`~xarray.backends.CachingFileManager`.
+
+.. _RST indexing:
+
+Indexing Examples
+^^^^^^^^^^^^^^^^^
+**BASIC**
+
+In the ``BASIC`` indexing support, numbers and slices are supported.
+
+Example:
+
+.. ipython::
+ :verbatim:
+
+ In [1]: # () shall return the full array
+ ...: backend_array._raw_indexing_method(())
+ Out[1]: array([[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]])
+
+ In [2]: # shall support integers
+ ...: backend_array._raw_indexing_method(1, 1)
+ Out[2]: 5
+
+ In [3]: # shall support slices
+ ...: backend_array._raw_indexing_method(slice(0, 3), slice(2, 4))
+ Out[3]: array([[2, 3], [6, 7], [10, 11]])
+
+**OUTER**
+
+The ``OUTER`` indexing shall support number, slices and in addition it shall
+support also lists of integers. The the outer indexing is equivalent to
+combining multiple input list with ``itertools.product()``:
+
+.. ipython::
+ :verbatim:
+
+ In [1]: backend_array._raw_indexing_method([0, 1], [0, 1, 2])
+ Out[1]: array([[0, 1, 2], [4, 5, 6]])
+
+ # shall support integers
+ In [2]: backend_array._raw_indexing_method(1, 1)
+ Out[2]: 5
+
+
+**OUTER_1VECTOR**
+
+The ``OUTER_1VECTOR`` indexing shall supports number, slices and at most one
+list. The behaviour with the list shall be the same of ``OUTER`` indexing.
+
+If you support more complex indexing as `explicit indexing` or
+`numpy indexing`, you can have a look to the implemetation of Zarr backend and Scipy backend,
+currently available in :py:mod:`~xarray.backends` module.
+
+.. _RST preferred_chunks:
+
+Backend preferred chunks
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+The backend is not directly involved in `Dask `__
+chunking, since it is internally managed by Xarray. However, the backend can
+define the preferred chunk size inside the variable’s encoding
+``var.encoding["preferred_chunks"]``. The ``preferred_chunks`` may be useful
+to improve performances with lazy loading. ``preferred_chunks`` shall be a
+dictionary specifying chunk size per dimension like
+``{“dim1”: 1000, “dim2”: 2000}`` or
+``{“dim1”: [1000, 100], “dim2”: [2000, 2000, 2000]]}``.
+
+The ``preferred_chunks`` is used by Xarray to define the chunk size in some
+special cases:
+
+- if ``chunks`` along a dimension is ``None`` or not defined
+- if ``chunks`` is ``"auto"``.
+
+In the first case Xarray uses the chunks size specified in
+``preferred_chunks``.
+In the second case Xarray accommodates ideal chunk sizes, preserving if
+possible the "preferred_chunks". The ideal chunk size is computed using
+:py:func:`dask.core.normalize_chunks`, setting
+``previous_chunks = preferred_chunks``.
diff --git a/xarray/backends/cfgrib_.py b/xarray/backends/cfgrib_.py
index 65c5bc2a02b..7ebbc246f55 100644
--- a/xarray/backends/cfgrib_.py
+++ b/xarray/backends/cfgrib_.py
@@ -62,7 +62,7 @@ def open_store_variable(self, name, var):
data = var.data
else:
wrapped_array = CfGribArrayWrapper(self, var.data)
- data = indexing.LazilyOuterIndexedArray(wrapped_array)
+ data = indexing.LazilyIndexedArray(wrapped_array)
encoding = self.ds.encoding.copy()
encoding["original_shape"] = var.data.shape
@@ -87,9 +87,9 @@ def get_encoding(self):
class CfgribfBackendEntrypoint(BackendEntrypoint):
- def guess_can_open(self, store_spec):
+ def guess_can_open(self, filename_or_obj):
try:
- _, ext = os.path.splitext(store_spec)
+ _, ext = os.path.splitext(filename_or_obj)
except TypeError:
return False
return ext in {".grib", ".grib2", ".grb", ".grb2"}
diff --git a/xarray/backends/common.py b/xarray/backends/common.py
index e2905d0866b..aa902602278 100644
--- a/xarray/backends/common.py
+++ b/xarray/backends/common.py
@@ -1,7 +1,7 @@
import logging
import time
import traceback
-from typing import Dict, Tuple, Type, Union
+from typing import Any, Dict, Tuple, Type, Union
import numpy as np
@@ -344,12 +344,41 @@ def encode(self, variables, attributes):
class BackendEntrypoint:
+ """
+ ``BackendEntrypoint`` is a class container and it is the main interface
+ for the backend plugins, see :ref:`RST backend_entrypoint`.
+ It shall implement:
+
+ - ``open_dataset`` method: it shall implement reading from file, variables
+ decoding and it returns an instance of :py:class:`~xarray.Dataset`.
+ It shall take in input at least ``filename_or_obj`` argument and
+ ``drop_variables`` keyword argument.
+ For more details see :ref:`RST open_dataset`.
+ - ``guess_can_open`` method: it shall return ``True`` if the backend is able to open
+ ``filename_or_obj``, ``False`` otherwise. The implementation of this
+ method is not mandatory.
+ """
+
open_dataset_parameters: Union[Tuple, None] = None
+ """list of ``open_dataset`` method parameters"""
+
+ def open_dataset(
+ self,
+ filename_or_obj: str,
+ drop_variables: Tuple[str] = None,
+ **kwargs: Any,
+ ):
+ """
+ Backend open_dataset method used by Xarray in :py:func:`~xarray.open_dataset`.
+ """
- def open_dataset(self):
raise NotImplementedError
- def guess_can_open(self, store_spec):
+ def guess_can_open(self, filename_or_obj):
+ """
+ Backend open_dataset method used by Xarray in :py:func:`~xarray.open_dataset`.
+ """
+
return False
diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py
index aa892c4f89c..532c63089bb 100644
--- a/xarray/backends/h5netcdf_.py
+++ b/xarray/backends/h5netcdf_.py
@@ -182,7 +182,7 @@ def open_store_variable(self, name, var):
import h5py
dimensions = var.dimensions
- data = indexing.LazilyOuterIndexedArray(H5NetCDFArrayWrapper(name, self))
+ data = indexing.LazilyIndexedArray(H5NetCDFArrayWrapper(name, self))
attrs = _read_attributes(var)
# netCDF4 specific encoding
@@ -329,14 +329,14 @@ def close(self, **kwargs):
class H5netcdfBackendEntrypoint(BackendEntrypoint):
- def guess_can_open(self, store_spec):
+ def guess_can_open(self, filename_or_obj):
try:
- return read_magic_number(store_spec).startswith(b"\211HDF\r\n\032\n")
+ return read_magic_number(filename_or_obj).startswith(b"\211HDF\r\n\032\n")
except TypeError:
pass
try:
- _, ext = os.path.splitext(store_spec)
+ _, ext = os.path.splitext(filename_or_obj)
except TypeError:
return False
diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py
index e3d87aaf83f..1c82f1975a7 100644
--- a/xarray/backends/netCDF4_.py
+++ b/xarray/backends/netCDF4_.py
@@ -388,7 +388,7 @@ def ds(self):
def open_store_variable(self, name, var):
dimensions = var.dimensions
- data = indexing.LazilyOuterIndexedArray(NetCDF4ArrayWrapper(name, self))
+ data = indexing.LazilyIndexedArray(NetCDF4ArrayWrapper(name, self))
attributes = {k: var.getncattr(k) for k in var.ncattrs()}
_ensure_fill_value_valid(data, attributes)
# netCDF4 specific encoding; save _FillValue for later
@@ -513,11 +513,11 @@ def close(self, **kwargs):
class NetCDF4BackendEntrypoint(BackendEntrypoint):
- def guess_can_open(self, store_spec):
- if isinstance(store_spec, str) and is_remote_uri(store_spec):
+ def guess_can_open(self, filename_or_obj):
+ if isinstance(filename_or_obj, str) and is_remote_uri(filename_or_obj):
return True
try:
- _, ext = os.path.splitext(store_spec)
+ _, ext = os.path.splitext(filename_or_obj)
except TypeError:
return False
return ext in {".nc", ".nc4", ".cdf"}
diff --git a/xarray/backends/pseudonetcdf_.py b/xarray/backends/pseudonetcdf_.py
index 80485fce459..3faa42b1b12 100644
--- a/xarray/backends/pseudonetcdf_.py
+++ b/xarray/backends/pseudonetcdf_.py
@@ -74,7 +74,7 @@ def ds(self):
return self._manager.acquire()
def open_store_variable(self, name, var):
- data = indexing.LazilyOuterIndexedArray(PncArrayWrapper(name, self))
+ data = indexing.LazilyIndexedArray(PncArrayWrapper(name, self))
attrs = {k: getattr(var, k) for k in var.ncattrs()}
return Variable(var.dimensions, data, attrs)
diff --git a/xarray/backends/pydap_.py b/xarray/backends/pydap_.py
index 7f8622ca66e..69d93299381 100644
--- a/xarray/backends/pydap_.py
+++ b/xarray/backends/pydap_.py
@@ -92,7 +92,7 @@ def open(cls, url, session=None):
return cls(ds)
def open_store_variable(self, var):
- data = indexing.LazilyOuterIndexedArray(PydapArrayWrapper(var))
+ data = indexing.LazilyIndexedArray(PydapArrayWrapper(var))
return Variable(var.dimensions, data, _fix_attributes(var.attributes))
def get_variables(self):
@@ -108,8 +108,8 @@ def get_dimensions(self):
class PydapBackendEntrypoint(BackendEntrypoint):
- def guess_can_open(self, store_spec):
- return isinstance(store_spec, str) and is_remote_uri(store_spec)
+ def guess_can_open(self, filename_or_obj):
+ return isinstance(filename_or_obj, str) and is_remote_uri(filename_or_obj)
def open_dataset(
self,
diff --git a/xarray/backends/pynio_.py b/xarray/backends/pynio_.py
index 41c99efd076..dfc0efbd6da 100644
--- a/xarray/backends/pynio_.py
+++ b/xarray/backends/pynio_.py
@@ -74,7 +74,7 @@ def ds(self):
return self._manager.acquire()
def open_store_variable(self, name, var):
- data = indexing.LazilyOuterIndexedArray(NioArrayWrapper(name, self))
+ data = indexing.LazilyIndexedArray(NioArrayWrapper(name, self))
return Variable(var.dimensions, data, var.attributes)
def get_variables(self):
diff --git a/xarray/backends/rasterio_.py b/xarray/backends/rasterio_.py
index d776b116ea8..51f0599e8e0 100644
--- a/xarray/backends/rasterio_.py
+++ b/xarray/backends/rasterio_.py
@@ -335,9 +335,7 @@ def open_rasterio(filename, parse_coordinates=None, chunks=None, cache=None, loc
else:
attrs[k] = v
- data = indexing.LazilyOuterIndexedArray(
- RasterioArrayWrapper(manager, lock, vrt_params)
- )
+ data = indexing.LazilyIndexedArray(RasterioArrayWrapper(manager, lock, vrt_params))
# this lets you write arrays loaded with rasterio
data = indexing.CopyOnWriteArray(data)
diff --git a/xarray/backends/scipy_.py b/xarray/backends/scipy_.py
index ddc157ed8e4..49215843397 100644
--- a/xarray/backends/scipy_.py
+++ b/xarray/backends/scipy_.py
@@ -233,14 +233,14 @@ def close(self):
class ScipyBackendEntrypoint(BackendEntrypoint):
- def guess_can_open(self, store_spec):
+ def guess_can_open(self, filename_or_obj):
try:
- return read_magic_number(store_spec).startswith(b"CDF")
+ return read_magic_number(filename_or_obj).startswith(b"CDF")
except TypeError:
pass
try:
- _, ext = os.path.splitext(store_spec)
+ _, ext = os.path.splitext(filename_or_obj)
except TypeError:
return False
return ext in {".nc", ".nc4", ".cdf", ".gz"}
diff --git a/xarray/backends/store.py b/xarray/backends/store.py
index d57b3ab9df8..860a0254b64 100644
--- a/xarray/backends/store.py
+++ b/xarray/backends/store.py
@@ -4,8 +4,8 @@
class StoreBackendEntrypoint(BackendEntrypoint):
- def guess_can_open(self, store_spec):
- return isinstance(store_spec, AbstractDataStore)
+ def guess_can_open(self, filename_or_obj):
+ return isinstance(filename_or_obj, AbstractDataStore)
def open_dataset(
self,
diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index 04fdeac6450..ca5c2a51fa4 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -326,7 +326,7 @@ def __init__(
self._write_region = write_region
def open_store_variable(self, name, zarr_array):
- data = indexing.LazilyOuterIndexedArray(ZarrArrayWrapper(name, self))
+ data = indexing.LazilyIndexedArray(ZarrArrayWrapper(name, self))
dimensions, attributes = _get_zarr_dims_and_attrs(zarr_array, DIMENSION_KEY)
attributes = dict(attributes)
encoding = {
diff --git a/xarray/conventions.py b/xarray/conventions.py
index 93e765e5622..7b467d3ee2e 100644
--- a/xarray/conventions.py
+++ b/xarray/conventions.py
@@ -354,7 +354,7 @@ def decode_cf_variable(
data = BoolTypeArray(data)
if not is_duck_dask_array(data):
- data = indexing.LazilyOuterIndexedArray(data)
+ data = indexing.LazilyIndexedArray(data)
return Variable(dimensions, data, attributes, encoding=encoding)
diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py
index dff6d75d5b7..0c180fdc9f7 100644
--- a/xarray/core/indexing.py
+++ b/xarray/core/indexing.py
@@ -513,7 +513,7 @@ def __getitem__(self, key):
return result
-class LazilyOuterIndexedArray(ExplicitlyIndexedNDArrayMixin):
+class LazilyIndexedArray(ExplicitlyIndexedNDArrayMixin):
"""Wrap an array to make basic and outer indexing lazy."""
__slots__ = ("array", "key")
@@ -619,10 +619,10 @@ def _updated_key(self, new_key):
return _combine_indexers(self.key, self.shape, new_key)
def __getitem__(self, indexer):
- # If the indexed array becomes a scalar, return LazilyOuterIndexedArray
+ # If the indexed array becomes a scalar, return LazilyIndexedArray
if all(isinstance(ind, integer_types) for ind in indexer.tuple):
key = BasicIndexer(tuple(k[indexer.tuple] for k in self.key.tuple))
- return LazilyOuterIndexedArray(self.array, key)
+ return LazilyIndexedArray(self.array, key)
return type(self)(self.array, self._updated_key(indexer))
def transpose(self, order):
diff --git a/xarray/core/variable.py b/xarray/core/variable.py
index 45553eb9b1e..5081f1dbda1 100644
--- a/xarray/core/variable.py
+++ b/xarray/core/variable.py
@@ -169,7 +169,7 @@ def _maybe_wrap_data(data):
Put pandas.Index and numpy.ndarray arguments in adapter objects to ensure
they can be indexed properly.
- NumpyArrayAdapter, PandasIndexAdapter and LazilyOuterIndexedArray should
+ NumpyArrayAdapter, PandasIndexAdapter and LazilyIndexedArray should
all pass through unmodified.
"""
if isinstance(data, pd.Index):
diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
index db47faa8d2b..9bc7a1b8566 100644
--- a/xarray/tests/test_dataset.py
+++ b/xarray/tests/test_dataset.py
@@ -187,7 +187,7 @@ def get_variables(self):
def lazy_inaccessible(k, v):
if k in self._indexvars:
return v
- data = indexing.LazilyOuterIndexedArray(InaccessibleArray(v.values))
+ data = indexing.LazilyIndexedArray(InaccessibleArray(v.values))
return Variable(v.dims, data, v.attrs)
return {k: lazy_inaccessible(k, v) for k, v in self._variables.items()}
diff --git a/xarray/tests/test_indexing.py b/xarray/tests/test_indexing.py
index 4ef7536e1f2..10641ff54e9 100644
--- a/xarray/tests/test_indexing.py
+++ b/xarray/tests/test_indexing.py
@@ -224,7 +224,7 @@ def test_lazily_indexed_array(self):
original = np.random.rand(10, 20, 30)
x = indexing.NumpyIndexingAdapter(original)
v = Variable(["i", "j", "k"], original)
- lazy = indexing.LazilyOuterIndexedArray(x)
+ lazy = indexing.LazilyIndexedArray(x)
v_lazy = Variable(["i", "j", "k"], lazy)
arr = ReturnItem()
# test orthogonally applied indexers
@@ -244,9 +244,7 @@ def test_lazily_indexed_array(self):
]:
assert expected.shape == actual.shape
assert_array_equal(expected, actual)
- assert isinstance(
- actual._data, indexing.LazilyOuterIndexedArray
- )
+ assert isinstance(actual._data, indexing.LazilyIndexedArray)
# make sure actual.key is appropriate type
if all(
@@ -282,18 +280,18 @@ def test_lazily_indexed_array(self):
actual._data,
(
indexing.LazilyVectorizedIndexedArray,
- indexing.LazilyOuterIndexedArray,
+ indexing.LazilyIndexedArray,
),
)
- assert isinstance(actual._data, indexing.LazilyOuterIndexedArray)
+ assert isinstance(actual._data, indexing.LazilyIndexedArray)
assert isinstance(actual._data.array, indexing.NumpyIndexingAdapter)
def test_vectorized_lazily_indexed_array(self):
original = np.random.rand(10, 20, 30)
x = indexing.NumpyIndexingAdapter(original)
v_eager = Variable(["i", "j", "k"], x)
- lazy = indexing.LazilyOuterIndexedArray(x)
+ lazy = indexing.LazilyIndexedArray(x)
v_lazy = Variable(["i", "j", "k"], lazy)
arr = ReturnItem()
@@ -306,7 +304,7 @@ def check_indexing(v_eager, v_lazy, indexers):
actual._data,
(
indexing.LazilyVectorizedIndexedArray,
- indexing.LazilyOuterIndexedArray,
+ indexing.LazilyIndexedArray,
),
)
assert_array_equal(expected, actual)
@@ -364,19 +362,19 @@ def test_index_scalar(self):
class TestMemoryCachedArray:
def test_wrapper(self):
- original = indexing.LazilyOuterIndexedArray(np.arange(10))
+ original = indexing.LazilyIndexedArray(np.arange(10))
wrapped = indexing.MemoryCachedArray(original)
assert_array_equal(wrapped, np.arange(10))
assert isinstance(wrapped.array, indexing.NumpyIndexingAdapter)
def test_sub_array(self):
- original = indexing.LazilyOuterIndexedArray(np.arange(10))
+ original = indexing.LazilyIndexedArray(np.arange(10))
wrapped = indexing.MemoryCachedArray(original)
child = wrapped[B[:5]]
assert isinstance(child, indexing.MemoryCachedArray)
assert_array_equal(child, np.arange(5))
assert isinstance(child.array, indexing.NumpyIndexingAdapter)
- assert isinstance(wrapped.array, indexing.LazilyOuterIndexedArray)
+ assert isinstance(wrapped.array, indexing.LazilyIndexedArray)
def test_setitem(self):
original = np.arange(10)
diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py
index e1ae3e1f258..90dfaa9c121 100644
--- a/xarray/tests/test_variable.py
+++ b/xarray/tests/test_variable.py
@@ -15,7 +15,7 @@
BasicIndexer,
CopyOnWriteArray,
DaskIndexingAdapter,
- LazilyOuterIndexedArray,
+ LazilyIndexedArray,
MemoryCachedArray,
NumpyIndexingAdapter,
OuterIndexer,
@@ -1095,9 +1095,9 @@ def test_repr(self):
assert expected == repr(v)
def test_repr_lazy_data(self):
- v = Variable("x", LazilyOuterIndexedArray(np.arange(2e5)))
+ v = Variable("x", LazilyIndexedArray(np.arange(2e5)))
assert "200000 values with dtype" in repr(v)
- assert isinstance(v._data, LazilyOuterIndexedArray)
+ assert isinstance(v._data, LazilyIndexedArray)
def test_detect_indexer_type(self):
""" Tests indexer type was correctly detected. """
@@ -2169,7 +2169,7 @@ def test_coarsen_2d(self):
class TestAsCompatibleData:
def test_unchanged_types(self):
- types = (np.asarray, PandasIndexAdapter, LazilyOuterIndexedArray)
+ types = (np.asarray, PandasIndexAdapter, LazilyIndexedArray)
for t in types:
for data in [
np.arange(3),
@@ -2340,19 +2340,19 @@ def test_NumpyIndexingAdapter(self):
dims=("x", "y"), data=NumpyIndexingAdapter(NumpyIndexingAdapter(self.d))
)
- def test_LazilyOuterIndexedArray(self):
- v = Variable(dims=("x", "y"), data=LazilyOuterIndexedArray(self.d))
+ def test_LazilyIndexedArray(self):
+ v = Variable(dims=("x", "y"), data=LazilyIndexedArray(self.d))
self.check_orthogonal_indexing(v)
self.check_vectorized_indexing(v)
# doubly wrapping
v = Variable(
dims=("x", "y"),
- data=LazilyOuterIndexedArray(LazilyOuterIndexedArray(self.d)),
+ data=LazilyIndexedArray(LazilyIndexedArray(self.d)),
)
self.check_orthogonal_indexing(v)
# hierarchical wrapping
v = Variable(
- dims=("x", "y"), data=LazilyOuterIndexedArray(NumpyIndexingAdapter(self.d))
+ dims=("x", "y"), data=LazilyIndexedArray(NumpyIndexingAdapter(self.d))
)
self.check_orthogonal_indexing(v)
@@ -2361,9 +2361,7 @@ def test_CopyOnWriteArray(self):
self.check_orthogonal_indexing(v)
self.check_vectorized_indexing(v)
# doubly wrapping
- v = Variable(
- dims=("x", "y"), data=CopyOnWriteArray(LazilyOuterIndexedArray(self.d))
- )
+ v = Variable(dims=("x", "y"), data=CopyOnWriteArray(LazilyIndexedArray(self.d)))
self.check_orthogonal_indexing(v)
self.check_vectorized_indexing(v)