diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 34d4f9a23ad..a30affcbe93 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -5,7 +5,7 @@ from collections.abc import Hashable from datetime import datetime, timedelta from functools import partial -from typing import TYPE_CHECKING, Callable, Union +from typing import TYPE_CHECKING, Callable, Union, overload import numpy as np import pandas as pd @@ -22,13 +22,19 @@ ) from xarray.core import indexing from xarray.core.common import contains_cftime_datetimes, is_np_datetime_like -from xarray.core.duck_array_ops import asarray +from xarray.core.duck_array_ops import asarray, ravel from xarray.core.formatting import first_n_items, format_timestamp, last_item from xarray.core.pdcompat import nanosecond_precision_timestamp from xarray.core.utils import emit_user_level_warning from xarray.core.variable import Variable -from xarray.namedarray.parallelcompat import T_ChunkedArray, get_chunked_array_type -from xarray.namedarray.pycompat import is_chunked_array +from xarray.namedarray._typing import ( + _chunkedarrayfunction_or_api, + chunkedduckarray, + duckarray, +) +from xarray.namedarray.parallelcompat import get_chunked_array_type + +# from xarray.namedarray.pycompat import is_chunked_array from xarray.namedarray.utils import is_duck_dask_array try: @@ -37,7 +43,7 @@ cftime = None if TYPE_CHECKING: - from xarray.core.types import CFCalendar, T_DuckArray + from xarray.core.types import CFCalendar T_Name = Union[Hashable, None] @@ -315,7 +321,7 @@ def decode_cf_datetime( cftime.num2date """ num_dates = np.asarray(num_dates) - flat_num_dates = num_dates.ravel() + flat_num_dates = ravel(num_dates) if calendar is None: calendar = "standard" @@ -369,7 +375,7 @@ def decode_cf_timedelta(num_timedeltas, units: str) -> np.ndarray: """ num_timedeltas = np.asarray(num_timedeltas) units = _netcdf_to_numpy_timeunit(units) - result = to_timedelta_unboxed(num_timedeltas.ravel(), unit=units) + result = to_timedelta_unboxed(ravel(num_timedeltas), unit=units) return result.reshape(num_timedeltas.shape) @@ -428,7 +434,7 @@ def infer_datetime_units(dates) -> str: 'hours', 'minutes' or 'seconds' (the first one that can evenly divide all unique time deltas in `dates`) """ - dates = np.asarray(dates).ravel() + dates = ravel(np.asarray(dates)) if np.asarray(dates).dtype == "datetime64[ns]": dates = to_datetime_unboxed(dates) dates = dates[pd.notnull(dates)] @@ -456,7 +462,7 @@ def infer_timedelta_units(deltas) -> str: {'days', 'hours', 'minutes' 'seconds'} (the first one that can evenly divide all unique time deltas in `deltas`) """ - deltas = to_timedelta_unboxed(np.asarray(deltas).ravel()) + deltas = to_timedelta_unboxed(ravel(np.asarray(deltas))) unique_timedeltas = np.unique(deltas[pd.notnull(deltas)]) return _infer_time_units_from_diff(unique_timedeltas) @@ -643,7 +649,7 @@ def encode_datetime(d): except TypeError: return np.nan if d is None else cftime.date2num(d, units, calendar) - return np.array([encode_datetime(d) for d in dates.ravel()]).reshape(dates.shape) + return np.array([encode_datetime(d) for d in ravel(dates)]).reshape(dates.shape) def cast_to_int_if_safe(num) -> np.ndarray: @@ -700,12 +706,26 @@ def _cast_to_dtype_if_safe(num: np.ndarray, dtype: np.dtype) -> np.ndarray: return cast_num +@overload +def encode_cf_datetime( + dates: chunkedduckarray, + units: str | None = None, + calendar: str | None = None, + dtype: np.dtype | None = None, +) -> tuple[chunkedduckarray, str, str]: ... +@overload +def encode_cf_datetime( + dates: duckarray, + units: str | None = None, + calendar: str | None = None, + dtype: np.dtype | None = None, +) -> tuple[duckarray, str, str]: ... def encode_cf_datetime( - dates: T_DuckArray, # type: ignore + dates: duckarray | chunkedduckarray, units: str | None = None, calendar: str | None = None, dtype: np.dtype | None = None, -) -> tuple[T_DuckArray, str, str]: +) -> tuple[duckarray | chunkedduckarray, str, str]: """Given an array of datetime objects, returns the tuple `(num, units, calendar)` suitable for a CF compliant time variable. @@ -716,19 +736,19 @@ def encode_cf_datetime( cftime.date2num """ dates = asarray(dates) - if is_chunked_array(dates): + if isinstance(dates, _chunkedarrayfunction_or_api): return _lazily_encode_cf_datetime(dates, units, calendar, dtype) else: return _eagerly_encode_cf_datetime(dates, units, calendar, dtype) def _eagerly_encode_cf_datetime( - dates: T_DuckArray, # type: ignore + dates: duckarray, units: str | None = None, calendar: str | None = None, dtype: np.dtype | None = None, allow_units_modification: bool = True, -) -> tuple[T_DuckArray, str, str]: +) -> tuple[duckarray, str, str]: dates = asarray(dates) data_units = infer_datetime_units(dates) @@ -753,7 +773,7 @@ def _eagerly_encode_cf_datetime( # Wrap the dates in a DatetimeIndex to do the subtraction to ensure # an OverflowError is raised if the ref_date is too far away from # dates to be encoded (GH 2272). - dates_as_index = pd.DatetimeIndex(dates.ravel()) + dates_as_index = pd.DatetimeIndex(ravel(dates)) time_deltas = dates_as_index - ref_date # retrieve needed units to faithfully encode to int64 @@ -806,11 +826,11 @@ def _eagerly_encode_cf_datetime( def _encode_cf_datetime_within_map_blocks( - dates: T_DuckArray, # type: ignore + dates: duckarray, units: str, calendar: str, dtype: np.dtype, -) -> T_DuckArray: +) -> duckarray: num, *_ = _eagerly_encode_cf_datetime( dates, units, calendar, dtype, allow_units_modification=False ) @@ -818,11 +838,11 @@ def _encode_cf_datetime_within_map_blocks( def _lazily_encode_cf_datetime( - dates: T_ChunkedArray, + dates: chunkedduckarray, units: str | None = None, calendar: str | None = None, dtype: np.dtype | None = None, -) -> tuple[T_ChunkedArray, str, str]: +) -> tuple[chunkedduckarray, str, str]: if calendar is None: # This will only trigger minor compute if dates is an object dtype array. calendar = infer_calendar_name(dates) @@ -855,31 +875,43 @@ def _lazily_encode_cf_datetime( return num, units, calendar +@overload def encode_cf_timedelta( - timedeltas: T_DuckArray, # type: ignore + timedeltas: chunkedduckarray, units: str | None = None, dtype: np.dtype | None = None, -) -> tuple[T_DuckArray, str]: +) -> tuple[chunkedduckarray, str]: ... +@overload +def encode_cf_timedelta( + timedeltas: duckarray, + units: str | None = None, + dtype: np.dtype | None = None, +) -> tuple[duckarray, str]: ... +def encode_cf_timedelta( + timedeltas: chunkedduckarray | duckarray, + units: str | None = None, + dtype: np.dtype | None = None, +) -> tuple[chunkedduckarray | duckarray, str]: timedeltas = asarray(timedeltas) - if is_chunked_array(timedeltas): + if isinstance(timedeltas, _chunkedarrayfunction_or_api): return _lazily_encode_cf_timedelta(timedeltas, units, dtype) else: return _eagerly_encode_cf_timedelta(timedeltas, units, dtype) def _eagerly_encode_cf_timedelta( - timedeltas: T_DuckArray, # type: ignore + timedeltas: duckarray, units: str | None = None, dtype: np.dtype | None = None, allow_units_modification: bool = True, -) -> tuple[T_DuckArray, str]: +) -> tuple[duckarray, str]: data_units = infer_timedelta_units(timedeltas) if units is None: units = data_units time_delta = _time_units_to_timedelta64(units) - time_deltas = pd.TimedeltaIndex(timedeltas.ravel()) + time_deltas = pd.TimedeltaIndex(ravel(timedeltas)) # retrieve needed units to faithfully encode to int64 needed_units = data_units @@ -920,10 +952,10 @@ def _eagerly_encode_cf_timedelta( def _encode_cf_timedelta_within_map_blocks( - timedeltas: T_DuckArray, # type:ignore + timedeltas: duckarray, units: str, dtype: np.dtype, -) -> T_DuckArray: +) -> duckarray: num, _ = _eagerly_encode_cf_timedelta( timedeltas, units, dtype, allow_units_modification=False ) @@ -931,8 +963,10 @@ def _encode_cf_timedelta_within_map_blocks( def _lazily_encode_cf_timedelta( - timedeltas: T_ChunkedArray, units: str | None = None, dtype: np.dtype | None = None -) -> tuple[T_ChunkedArray, str]: + timedeltas: chunkedduckarray, + units: str | None = None, + dtype: np.dtype | None = None, +) -> tuple[chunkedduckarray, str]: if units is None and dtype is None: units = "nanoseconds" dtype = np.dtype("int64") diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index d31cb6e626a..3e1104dc202 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -179,7 +179,7 @@ def lazy_elemwise_func(array, func: Callable, dtype: np.typing.DTypeLike): if is_chunked_array(array): chunkmanager = get_chunked_array_type(array) - return chunkmanager.map_blocks(func, array, dtype=dtype) # type: ignore[arg-type] + return chunkmanager.map_blocks(func, array, dtype=dtype) else: return _ElementwiseFunctionArray(array, func, dtype) diff --git a/xarray/core/computation.py b/xarray/core/computation.py index f418d3821c2..a5d5ac09405 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -25,6 +25,7 @@ from xarray.core.types import Dims, T_DataArray from xarray.core.utils import is_dict_like, is_duck_dask_array, is_scalar, parse_dims from xarray.core.variable import Variable +from xarray.namedarray._typing import chunkedduckarray from xarray.namedarray.parallelcompat import get_chunked_array_type from xarray.namedarray.pycompat import is_chunked_array from xarray.util.deprecation_helpers import deprecate_dims @@ -795,6 +796,7 @@ def apply_variable_ufunc( ) def func(*arrays): + res: chunkedduckarray | tuple[chunkedduckarray, ...] res = chunkmanager.apply_gufunc( numpy_func, signature.to_gufunc_string(exclude_dims), diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 50cfc7b0c29..7e0d0151d95 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -163,6 +163,7 @@ T_Xarray, ) from xarray.core.weighted import DatasetWeighted + from xarray.namedarray._typing import duckarray from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint @@ -860,7 +861,7 @@ def load(self, **kwargs) -> Self: chunkmanager = get_chunked_array_type(*lazy_data.values()) # evaluate all the chunked arrays simultaneously - evaluated_data: tuple[np.ndarray[Any, Any], ...] = chunkmanager.compute( + evaluated_data: tuple[duckarray[Any, Any], ...] = chunkmanager.compute( *lazy_data.values(), **kwargs ) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 06e7efdbb48..55d3c41acec 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -1325,7 +1325,7 @@ def _arrayize_vectorized_indexer( def _chunked_array_with_chunks_hint( - array, chunks, chunkmanager: ChunkManagerEntrypoint[Any] + array, chunks, chunkmanager: ChunkManagerEntrypoint ): """Create a chunked array using the chunks hint for dimensions of size > 1.""" diff --git a/xarray/core/variable.py b/xarray/core/variable.py index f0685882595..594c4287d4d 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -2526,7 +2526,7 @@ def chunk( # type: ignore[override] name: str | None = None, lock: bool | None = None, inline_array: bool | None = None, - chunked_array_type: str | ChunkManagerEntrypoint[Any] | None = None, + chunked_array_type: str | ChunkManagerEntrypoint | None = None, from_array_kwargs: Any = None, **chunks_kwargs: Any, ) -> Self: diff --git a/xarray/namedarray/_typing.py b/xarray/namedarray/_typing.py index b715973814f..751fc92837a 100644 --- a/xarray/namedarray/_typing.py +++ b/xarray/namedarray/_typing.py @@ -43,11 +43,37 @@ class Default(Enum): _T = TypeVar("_T") _T_co = TypeVar("_T_co", covariant=True) +_generic = Any +# _generic = np.generic + + +class _DType2(Protocol[_T_co]): + def __eq__(self, other: _DType2[_generic], /) -> bool: + """ + Computes the truth value of ``self == other`` in order to test for data type object equality. + + Parameters + ---------- + self: dtype + data type instance. May be any supported data type. + other: dtype + other data type instance. May be any supported data type. + + Returns + ------- + out: bool + a boolean indicating whether the data type objects are equal. + """ + ... + + _dtype = np.dtype -_DType = TypeVar("_DType", bound=np.dtype[Any]) -_DType_co = TypeVar("_DType_co", covariant=True, bound=np.dtype[Any]) +_DType = TypeVar("_DType", bound=_dtype[Any]) +_DType_co = TypeVar("_DType_co", covariant=True, bound=_dtype[Any]) # A subset of `npt.DTypeLike` that can be parametrized w.r.t. `np.generic` +# _ScalarType = TypeVar("_ScalarType", bound=_generic) +# _ScalarType_co = TypeVar("_ScalarType_co", bound=_generic, covariant=True) _ScalarType = TypeVar("_ScalarType", bound=np.generic) _ScalarType_co = TypeVar("_ScalarType_co", bound=np.generic, covariant=True) @@ -60,9 +86,9 @@ def dtype(self) -> _DType_co: ... _DTypeLike = Union[ - np.dtype[_ScalarType], + _dtype[_ScalarType], type[_ScalarType], - _SupportsDType[np.dtype[_ScalarType]], + _SupportsDType[_dtype[_ScalarType]], ] # For unknown shapes Dask uses np.nan, array_api uses None: @@ -76,8 +102,14 @@ def dtype(self) -> _DType_co: ... _Axes = tuple[_Axis, ...] _AxisLike = Union[_Axis, _Axes] -_Chunks = tuple[_Shape, ...] -_NormalizedChunks = tuple[tuple[int, ...], ...] +_Chunk = tuple[int, ...] +_Chunks = tuple[_Chunk, ...] +_NormalizedChunks = tuple[tuple[int, ...], ...] # TODO: Same as Chunks. +_ChunksLike = Union[ + int, Literal["auto"], None, _Chunk, _Chunks +] # TODO: Literal["auto"] +_ChunksType = TypeVar("_ChunksType", bound=_Chunks) + # FYI in some cases we don't allow `None`, which this doesn't take account of. T_ChunkDim: TypeAlias = Union[int, Literal["auto"], None, tuple[int, ...]] # We allow the tuple form of this (though arguably we could transition to named dims only) @@ -125,9 +157,7 @@ def dtype(self) -> _DType_co: ... @runtime_checkable -class _arrayfunction( - _array[_ShapeType_co, _DType_co], Protocol[_ShapeType_co, _DType_co] -): +class _arrayfunction(_array[_ShapeType, _DType_co], Protocol[_ShapeType, _DType_co]): """ Duck array supporting NEP 18. @@ -153,14 +183,14 @@ def __getitem__( ) -> _arrayfunction[Any, _DType_co] | Any: ... @overload - def __array__(self, dtype: None = ..., /) -> np.ndarray[Any, _DType_co]: ... + def __array__(self, dtype: None = ..., /) -> np.ndarray[_ShapeType, _DType_co]: ... @overload - def __array__(self, dtype: _DType, /) -> np.ndarray[Any, _DType]: ... + def __array__(self, dtype: _DType, /) -> np.ndarray[_ShapeType, _DType]: ... def __array__( self, dtype: _DType | None = ..., / - ) -> np.ndarray[Any, _DType] | np.ndarray[Any, _DType_co]: ... + ) -> np.ndarray[_ShapeType, _DType] | np.ndarray[_ShapeType, _DType_co]: ... # TODO: Should return the same subclass but with a new dtype generic. # https://github.com/python/typing/issues/548 @@ -183,10 +213,10 @@ def __array_function__( ) -> Any: ... @property - def imag(self) -> _arrayfunction[_ShapeType_co, Any]: ... + def imag(self) -> _arrayfunction[_ShapeType, Any]: ... @property - def real(self) -> _arrayfunction[_ShapeType_co, Any]: ... + def real(self) -> _arrayfunction[_ShapeType, Any]: ... @runtime_checkable @@ -216,7 +246,7 @@ def __array_namespace__(self) -> ModuleType: ... ] # Corresponds to np.typing.NDArray: -DuckArray = _arrayfunction[Any, np.dtype[_ScalarType_co]] +DuckArray = _arrayfunction[Any, _dtype[_ScalarType_co]] @runtime_checkable @@ -235,7 +265,7 @@ def chunks(self) -> _Chunks: ... @runtime_checkable class _chunkedarrayfunction( - _arrayfunction[_ShapeType_co, _DType_co], Protocol[_ShapeType_co, _DType_co] + _arrayfunction[_ShapeType, _DType_co], Protocol[_ShapeType, _DType_co] ): """ Chunked duck array supporting NEP 18. @@ -246,6 +276,11 @@ class _chunkedarrayfunction( @property def chunks(self) -> _Chunks: ... + def rechunk( + self, + chunks: _ChunksLike, + ) -> _chunkedarrayfunction[_ShapeType, _DType_co]: ... + @runtime_checkable class _chunkedarrayapi( @@ -260,6 +295,11 @@ class _chunkedarrayapi( @property def chunks(self) -> _Chunks: ... + def rechunk( + self, + chunks: _ChunksLike, + ) -> _chunkedarrayapi[_ShapeType_co, _DType_co]: ... + # NamedArray can most likely use both __array_function__ and __array_namespace__: _chunkedarrayfunction_or_api = (_chunkedarrayfunction, _chunkedarrayapi) @@ -279,12 +319,12 @@ class _sparsearray( Corresponds to np.ndarray. """ - def todense(self) -> np.ndarray[Any, _DType_co]: ... + def todense(self) -> np.ndarray[Any, np.dtype[np.generic]]: ... @runtime_checkable class _sparsearrayfunction( - _arrayfunction[_ShapeType_co, _DType_co], Protocol[_ShapeType_co, _DType_co] + _arrayfunction[_ShapeType, _DType_co], Protocol[_ShapeType, _DType_co] ): """ Sparse duck array supporting NEP 18. @@ -292,7 +332,7 @@ class _sparsearrayfunction( Corresponds to np.ndarray. """ - def todense(self) -> np.ndarray[Any, _DType_co]: ... + def todense(self) -> np.ndarray[Any, np.dtype[np.generic]]: ... @runtime_checkable @@ -305,7 +345,7 @@ class _sparsearrayapi( Corresponds to np.ndarray. """ - def todense(self) -> np.ndarray[Any, _DType_co]: ... + def todense(self) -> np.ndarray[Any, np.dtype[np.generic]]: ... # NamedArray can most likely use both __array_function__ and __array_namespace__: @@ -317,3 +357,20 @@ def todense(self) -> np.ndarray[Any, _DType_co]: ... ErrorOptions = Literal["raise", "ignore"] ErrorOptionsWithWarn = Literal["raise", "warn", "ignore"] + + +# def test(arr: duckarray[_ShapeType, _DType]) -> duckarray[_ShapeType, _DType]: +# return np.round(arr) + + +# test(np.array([], dtype=np.int64)) + + +# def test2(arr: _arrayfunction[Any, _DType]) -> _arrayfunction[Any, _DType]: +# return np.round(arr) +# # return np.asarray(arr) +# # return arr.__array__() +# # return arr + + +# test2(np.array([], dtype=np.int64)) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index fe47bf50533..0c5fc1073e0 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -749,7 +749,7 @@ def sizes(self) -> dict[_Dim, _IntOrUnknown]: def chunk( self, chunks: int | Literal["auto"] | Mapping[Any, None | int | tuple[int, ...]] = {}, - chunked_array_type: str | ChunkManagerEntrypoint[Any] | None = None, + chunked_array_type: str | ChunkManagerEntrypoint | None = None, from_array_kwargs: Any = None, **chunks_kwargs: Any, ) -> Self: @@ -822,6 +822,7 @@ def chunk( chunkmanager = guess_chunkmanager(chunked_array_type) data_old = self._data + data_chunked: _chunkedarray[Any, _DType_co] if chunkmanager.is_chunked_array(data_old): data_chunked = chunkmanager.rechunk(data_old, chunks) # type: ignore[arg-type] else: @@ -990,7 +991,7 @@ def _to_dense(self) -> NamedArray[Any, _DType_co]: Change backend from sparse to np.array. """ if isinstance(self._data, _sparsearrayfunction_or_api): - data_dense: np.ndarray[Any, _DType_co] = self._data.todense() + data_dense: np.ndarray[Any, Any] = self._data.todense() return self._new(data=data_dense) else: raise TypeError("self.data is not a sparse array") @@ -1152,7 +1153,7 @@ def expand_dims( return expand_dims(self, dim=dim) -_NamedArray = NamedArray[Any, np.dtype[_ScalarType_co]] +_NamedArray = NamedArray[Any, _dtype[_ScalarType_co]] def _raise_if_any_duplicate_dimensions( diff --git a/xarray/namedarray/daskmanager.py b/xarray/namedarray/daskmanager.py index 14744d2de6b..cfd788793e1 100644 --- a/xarray/namedarray/daskmanager.py +++ b/xarray/namedarray/daskmanager.py @@ -1,25 +1,29 @@ from __future__ import annotations from collections.abc import Iterable, Sequence +from types import ModuleType from typing import TYPE_CHECKING, Any, Callable import numpy as np from packaging.version import Version from xarray.core.indexing import ImplicitToExplicitIndexingAdapter -from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint, T_ChunkedArray +from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint from xarray.namedarray.utils import is_duck_dask_array, module_available if TYPE_CHECKING: from xarray.namedarray._typing import ( - T_Chunks, - _DType_co, - _NormalizedChunks, + _Chunks, + _ChunksLike, + _DType, + _dtype, + _Shape, + chunkedduckarray, duckarray, ) try: - from dask.array import Array as DaskArray + from dask.array.core import Array as DaskArray except ImportError: DaskArray = np.ndarray[Any, Any] # type: ignore[assignment, misc] @@ -27,83 +31,90 @@ dask_available = module_available("dask") -class DaskManager(ChunkManagerEntrypoint["DaskArray"]): # type: ignore[type-var] - array_cls: type[DaskArray] +class DaskManager(ChunkManagerEntrypoint): available: bool = dask_available def __init__(self) -> None: # TODO can we replace this with a class attribute instead? - from dask.array import Array + from dask.array.core import Array - self.array_cls = Array + # TODO: error: Incompatible types in assignment (expression has type "type[Array]", variable has type "type[_chunkedarrayfunction[Any, Any]] | type[_chunkedarrayapi[Any, Any]]") [assignment] + self.array_cls = Array # type: ignore[assignment] def is_chunked_array(self, data: duckarray[Any, Any]) -> bool: return is_duck_dask_array(data) - def chunks(self, data: Any) -> _NormalizedChunks: - return data.chunks # type: ignore[no-any-return] + def chunks(self, data: chunkedduckarray[Any, _dtype[Any]]) -> _Chunks: + return data.chunks def normalize_chunks( self, - chunks: T_Chunks | _NormalizedChunks, - shape: tuple[int, ...] | None = None, + chunks: _ChunksLike, + shape: _Shape | None = None, limit: int | None = None, - dtype: _DType_co | None = None, - previous_chunks: _NormalizedChunks | None = None, - ) -> Any: + dtype: _dtype[Any] | None = None, + previous_chunks: _Chunks | None = None, + ) -> _Chunks: """Called by open_dataset""" from dask.array.core import normalize_chunks - return normalize_chunks( + out: _Chunks + out = normalize_chunks( chunks, shape=shape, limit=limit, dtype=dtype, previous_chunks=previous_chunks, ) # type: ignore[no-untyped-call] + return out def from_array( - self, data: Any, chunks: T_Chunks | _NormalizedChunks, **kwargs: Any - ) -> DaskArray | Any: - import dask.array as da + self, data: duckarray[Any, _DType], chunks: _ChunksLike, **kwargs: Any + ) -> chunkedduckarray[Any, _DType]: + from dask.array.core import from_array if isinstance(data, ImplicitToExplicitIndexingAdapter): # lazily loaded backend array classes should use NumPy array operations. kwargs["meta"] = np.ndarray - return da.from_array( + out: chunkedduckarray[Any, _DType] + out = from_array( data, chunks, **kwargs, ) # type: ignore[no-untyped-call] + return out def compute( - self, *data: Any, **kwargs: Any - ) -> tuple[np.ndarray[Any, _DType_co], ...]: + self, *data: chunkedduckarray[Any, _DType] | Any, **kwargs: Any + ) -> tuple[duckarray[Any, _DType], ...]: from dask.array import compute - return compute(*data, **kwargs) # type: ignore[no-untyped-call, no-any-return] + out: tuple[duckarray[Any, _DType], ...] + out = compute(*data, **kwargs) # type: ignore[no-untyped-call] + return out @property - def array_api(self) -> Any: + def array_api(self) -> ModuleType: from dask import array as da return da - def reduction( # type: ignore[override] + def reduction( self, - arr: T_ChunkedArray, + arr: chunkedduckarray[Any, _DType], func: Callable[..., Any], combine_func: Callable[..., Any] | None = None, aggregate_func: Callable[..., Any] | None = None, axis: int | Sequence[int] | None = None, - dtype: _DType_co | None = None, + dtype: _DType | None = None, keepdims: bool = False, - ) -> DaskArray | Any: - from dask.array import reduction + ) -> chunkedduckarray[Any, _DType]: + from dask.array.reductions import reduction - return reduction( + out: chunkedduckarray[Any, _DType] + out = reduction( arr, chunk=func, combine=combine_func, @@ -112,20 +123,22 @@ def reduction( # type: ignore[override] dtype=dtype, keepdims=keepdims, ) # type: ignore[no-untyped-call] + return out - def scan( # type: ignore[override] + def scan( self, func: Callable[..., Any], binop: Callable[..., Any], ident: float, - arr: T_ChunkedArray, + arr: chunkedduckarray[Any, _DType], axis: int | None = None, - dtype: _DType_co | None = None, + dtype: _DType | None = None, **kwargs: Any, - ) -> DaskArray | Any: + ) -> chunkedduckarray[Any, _DType]: from dask.array.reductions import cumreduction - return cumreduction( + out: chunkedduckarray[Any, _DType] + out = cumreduction( func, binop, ident, @@ -134,6 +147,7 @@ def scan( # type: ignore[override] dtype=dtype, **kwargs, ) # type: ignore[no-untyped-call] + return out def apply_gufunc( self, @@ -141,18 +155,19 @@ def apply_gufunc( signature: str, *args: Any, axes: Sequence[tuple[int, ...]] | None = None, - axis: int | None = None, keepdims: bool = False, - output_dtypes: Sequence[_DType_co] | None = None, - output_sizes: dict[str, int] | None = None, + output_dtypes: Sequence[_DType] | None = None, vectorize: bool | None = None, + axis: int | None = None, + output_sizes: dict[str, int] | None = None, allow_rechunk: bool = False, - meta: tuple[np.ndarray[Any, _DType_co], ...] | None = None, + meta: tuple[np.ndarray[Any, np.dtype[np.generic]], ...] | None = None, **kwargs: Any, - ) -> Any: + ) -> chunkedduckarray[Any, _DType] | tuple[chunkedduckarray[Any, _DType], ...]: from dask.array.gufunc import apply_gufunc - return apply_gufunc( + out: chunkedduckarray[Any, _DType] | tuple[chunkedduckarray[Any, _DType], ...] + out = apply_gufunc( func, signature, *args, @@ -167,18 +182,20 @@ def apply_gufunc( **kwargs, ) # type: ignore[no-untyped-call] + return out + def map_blocks( self, func: Callable[..., Any], *args: Any, - dtype: _DType_co | None = None, - chunks: tuple[int, ...] | None = None, + dtype: _DType | None = None, + chunks: _Chunks | None = None, drop_axis: int | Sequence[int] | None = None, new_axis: int | Sequence[int] | None = None, **kwargs: Any, - ) -> Any: + ) -> chunkedduckarray[Any, _DType]: import dask - from dask.array import map_blocks + from dask.array.core import map_blocks if drop_axis is None and Version(dask.__version__) < Version("2022.9.1"): # See https://github.com/pydata/xarray/pull/7019#discussion_r1196729489 @@ -186,7 +203,8 @@ def map_blocks( drop_axis = [] # pass through name, meta, token as kwargs - return map_blocks( + out: chunkedduckarray[Any, _DType] + out = map_blocks( func, *args, dtype=dtype, @@ -195,26 +213,27 @@ def map_blocks( new_axis=new_axis, **kwargs, ) # type: ignore[no-untyped-call] + return out def blockwise( self, func: Callable[..., Any], out_ind: Iterable[Any], - *args: Any, - # can't type this as mypy assumes args are all same type, but dask blockwise args alternate types - name: str | None = None, - token: Any | None = None, - dtype: _DType_co | None = None, + *args: Any, # can't type this as mypy assumes args are all same type, but dask blockwise args alternate types adjust_chunks: dict[Any, Callable[..., Any]] | None = None, new_axes: dict[Any, int] | None = None, align_arrays: bool = True, + name: str | None = None, + token: Any | None = None, + dtype: _DType | None = None, concatenate: bool | None = None, - meta: tuple[np.ndarray[Any, _DType_co], ...] | None = None, + meta: tuple[np.ndarray[Any, np.dtype[np.generic]], ...] | None = None, **kwargs: Any, - ) -> DaskArray | Any: - from dask.array import blockwise + ) -> chunkedduckarray[Any, _DType]: + from dask.array.blockwise import blockwise - return blockwise( + out: chunkedduckarray[Any, _DType] + out = blockwise( func, out_ind, *args, @@ -228,15 +247,18 @@ def blockwise( meta=meta, **kwargs, ) # type: ignore[no-untyped-call] + return out def unify_chunks( self, *args: Any, # can't type this as mypy assumes args are all same type, but dask unify_chunks args alternate types **kwargs: Any, - ) -> tuple[dict[str, _NormalizedChunks], list[DaskArray]]: + ) -> tuple[dict[str, _Chunks], list[chunkedduckarray[Any, Any]]]: from dask.array.core import unify_chunks - return unify_chunks(*args, **kwargs) # type: ignore[no-any-return, no-untyped-call] + out: tuple[dict[str, _Chunks], list[chunkedduckarray[Any, Any]]] + out = unify_chunks(*args, **kwargs) # type: ignore[no-untyped-call] + return out def store( self, @@ -244,7 +266,7 @@ def store( targets: Any, **kwargs: Any, ) -> Any: - from dask.array import store + from dask.array.core import store return store( sources=sources, diff --git a/xarray/namedarray/parallelcompat.py b/xarray/namedarray/parallelcompat.py index dd555fe200a..252ea5778ba 100644 --- a/xarray/namedarray/parallelcompat.py +++ b/xarray/namedarray/parallelcompat.py @@ -11,7 +11,8 @@ from abc import ABC, abstractmethod from collections.abc import Iterable, Sequence from importlib.metadata import EntryPoint, entry_points -from typing import TYPE_CHECKING, Any, Callable, Generic, Protocol, TypeVar +from types import ModuleType +from typing import TYPE_CHECKING, Any, Callable import numpy as np @@ -20,34 +21,18 @@ if TYPE_CHECKING: from xarray.namedarray._typing import ( + _chunkedarrayfunction_or_api, _Chunks, + _ChunksLike, _DType, - _DType_co, - _NormalizedChunks, - _ShapeType, + _Shape, + chunkedduckarray, duckarray, ) -class ChunkedArrayMixinProtocol(Protocol): - def rechunk(self, chunks: Any, **kwargs: Any) -> Any: ... - - @property - def dtype(self) -> np.dtype[Any]: ... - - @property - def chunks(self) -> _NormalizedChunks: ... - - def compute( - self, *data: Any, **kwargs: Any - ) -> tuple[np.ndarray[Any, _DType_co], ...]: ... - - -T_ChunkedArray = TypeVar("T_ChunkedArray", bound=ChunkedArrayMixinProtocol) - - @functools.lru_cache(maxsize=1) -def list_chunkmanagers() -> dict[str, ChunkManagerEntrypoint[Any]]: +def list_chunkmanagers() -> dict[str, ChunkManagerEntrypoint]: """ Return a dictionary of available chunk managers and their ChunkManagerEntrypoint subclass objects. @@ -71,7 +56,7 @@ def list_chunkmanagers() -> dict[str, ChunkManagerEntrypoint[Any]]: def load_chunkmanagers( entrypoints: Sequence[EntryPoint], -) -> dict[str, ChunkManagerEntrypoint[Any]]: +) -> dict[str, ChunkManagerEntrypoint]: """Load entrypoints and instantiate chunkmanagers only once.""" loaded_entrypoints = {} @@ -93,8 +78,8 @@ def load_chunkmanagers( def guess_chunkmanager( - manager: str | ChunkManagerEntrypoint[Any] | None, -) -> ChunkManagerEntrypoint[Any]: + manager: str | ChunkManagerEntrypoint | None, +) -> ChunkManagerEntrypoint: """ Get namespace of chunk-handling methods, guessing from what's available. @@ -128,7 +113,7 @@ def guess_chunkmanager( ) -def get_chunked_array_type(*args: Any) -> ChunkManagerEntrypoint[Any]: +def get_chunked_array_type(*args: Any) -> ChunkManagerEntrypoint: """ Detects which parallel backend should be used for given set of arrays. @@ -171,7 +156,7 @@ def get_chunked_array_type(*args: Any) -> ChunkManagerEntrypoint[Any]: return selected[0] -class ChunkManagerEntrypoint(ABC, Generic[T_ChunkedArray]): +class ChunkManagerEntrypoint(ABC): """ Interface between a particular parallel computing framework and xarray. @@ -190,7 +175,7 @@ class ChunkManagerEntrypoint(ABC, Generic[T_ChunkedArray]): This attribute is used for array instance type checking at runtime. """ - array_cls: type[T_ChunkedArray] + array_cls: type[chunkedduckarray[Any, Any]] available: bool = True @abstractmethod @@ -216,10 +201,10 @@ def is_chunked_array(self, data: duckarray[Any, Any]) -> bool: -------- dask.is_dask_collection """ - return isinstance(data, self.array_cls) + return isinstance(data, _chunkedarrayfunction_or_api) @abstractmethod - def chunks(self, data: T_ChunkedArray) -> _NormalizedChunks: + def chunks(self, data: chunkedduckarray[Any, Any]) -> _Chunks: """ Return the current chunks of the given array. @@ -245,12 +230,12 @@ def chunks(self, data: T_ChunkedArray) -> _NormalizedChunks: @abstractmethod def normalize_chunks( self, - chunks: _Chunks | _NormalizedChunks, - shape: _ShapeType | None = None, + chunks: _ChunksLike, + shape: _Shape | None = None, limit: int | None = None, dtype: _DType | None = None, - previous_chunks: _NormalizedChunks | None = None, - ) -> _NormalizedChunks: + previous_chunks: _Chunks | None = None, + ) -> _Chunks: """ Normalize given chunking pattern into an explicit tuple of tuples representation. @@ -281,8 +266,8 @@ def normalize_chunks( @abstractmethod def from_array( - self, data: duckarray[Any, Any], chunks: _Chunks, **kwargs: Any - ) -> T_ChunkedArray: + self, data: duckarray[Any, _DType], chunks: _ChunksLike, **kwargs: Any + ) -> chunkedduckarray[Any, _DType]: """ Create a chunked array from a non-chunked numpy-like array. @@ -307,10 +292,10 @@ def from_array( def rechunk( self, - data: T_ChunkedArray, - chunks: _NormalizedChunks | tuple[int, ...] | _Chunks, + data: chunkedduckarray[Any, _DType], + chunks: _ChunksLike, **kwargs: Any, - ) -> Any: + ) -> chunkedduckarray[Any, _DType]: """ Changes the chunking pattern of the given array. @@ -338,8 +323,8 @@ def rechunk( @abstractmethod def compute( - self, *data: T_ChunkedArray | Any, **kwargs: Any - ) -> tuple[np.ndarray[Any, _DType_co], ...]: + self, *data: chunkedduckarray[Any, _DType] | Any, **kwargs: Any + ) -> tuple[duckarray[Any, _DType], ...]: """ Computes one or more chunked arrays, returning them as eager numpy arrays. @@ -365,7 +350,7 @@ def compute( raise NotImplementedError() @property - def array_api(self) -> Any: + def array_api(self) -> ModuleType: """ Return the array_api namespace following the python array API standard. @@ -382,14 +367,14 @@ def array_api(self) -> Any: def reduction( self, - arr: T_ChunkedArray, + arr: chunkedduckarray[Any, _DType], func: Callable[..., Any], combine_func: Callable[..., Any] | None = None, aggregate_func: Callable[..., Any] | None = None, axis: int | Sequence[int] | None = None, - dtype: _DType_co | None = None, + dtype: _DType | None = None, keepdims: bool = False, - ) -> T_ChunkedArray: + ) -> chunkedduckarray[Any, _DType]: """ A general version of array reductions along one or more axes. @@ -434,11 +419,11 @@ def scan( func: Callable[..., Any], binop: Callable[..., Any], ident: float, - arr: T_ChunkedArray, + arr: chunkedduckarray[Any, _DType], axis: int | None = None, - dtype: _DType_co | None = None, + dtype: _DType | None = None, **kwargs: Any, - ) -> T_ChunkedArray: + ) -> chunkedduckarray[Any, _DType]: """ General version of a 1D scan, also known as a cumulative array reduction. @@ -474,10 +459,10 @@ def apply_gufunc( *args: Any, axes: Sequence[tuple[int, ...]] | None = None, keepdims: bool = False, - output_dtypes: Sequence[_DType_co] | None = None, + output_dtypes: Sequence[_DType] | None = None, vectorize: bool | None = None, **kwargs: Any, - ) -> Any: + ) -> chunkedduckarray[Any, _DType] | tuple[chunkedduckarray[Any, _DType], ...]: """ Apply a generalized ufunc or similar python function to arrays. @@ -557,12 +542,12 @@ def map_blocks( self, func: Callable[..., Any], *args: Any, - dtype: _DType_co | None = None, - chunks: tuple[int, ...] | None = None, + dtype: _DType | None = None, + chunks: _Chunks | None = None, drop_axis: int | Sequence[int] | None = None, new_axis: int | Sequence[int] | None = None, **kwargs: Any, - ) -> Any: + ) -> chunkedduckarray[Any, _DType]: """ Map a function across all blocks of a chunked array. @@ -610,7 +595,7 @@ def blockwise( new_axes: dict[Any, int] | None = None, align_arrays: bool = True, **kwargs: Any, - ) -> Any: + ) -> chunkedduckarray[Any, _DType]: """ Tensor operation: Generalized inner and outer products. @@ -656,7 +641,7 @@ def unify_chunks( self, *args: Any, # can't type this as mypy assumes args are all same type, but dask unify_chunks args alternate types **kwargs: Any, - ) -> tuple[dict[str, _NormalizedChunks], list[T_ChunkedArray]]: + ) -> tuple[dict[str, _Chunks], list[chunkedduckarray[Any, Any]]]: """ Unify chunks across a sequence of arrays. @@ -676,7 +661,9 @@ def unify_chunks( def store( self, - sources: T_ChunkedArray | Sequence[T_ChunkedArray], + sources: ( + chunkedduckarray[Any, _DType] | Sequence[chunkedduckarray[Any, _DType]] + ), targets: Any, **kwargs: dict[str, Any], ) -> Any: diff --git a/xarray/namedarray/pycompat.py b/xarray/namedarray/pycompat.py index 3ce33d4d8ea..616c55ea5ee 100644 --- a/xarray/namedarray/pycompat.py +++ b/xarray/namedarray/pycompat.py @@ -2,7 +2,7 @@ from importlib import import_module from types import ModuleType -from typing import TYPE_CHECKING, Any, Literal +from typing import TYPE_CHECKING, Any, Literal, overload import numpy as np from packaging.version import Version @@ -15,7 +15,15 @@ if TYPE_CHECKING: ModType = Literal["dask", "pint", "cupy", "sparse", "cubed", "numbagg"] DuckArrayTypes = tuple[type[Any], ...] # TODO: improve this? maybe Generic - from xarray.namedarray._typing import _DType, _ShapeType, duckarray + from xarray.core.indexing import ExplicitlyIndexed + from xarray.namedarray._typing import ( + _DType, + _dtype, + _generic, + _ShapeType, + chunkedduckarray, + duckarray, + ) class DuckArrayModule: @@ -116,23 +124,58 @@ def to_numpy( data = data.magnitude if isinstance(data, array_type("sparse")): data = data.todense() - data = np.asarray(data) - - return data - - -def to_duck_array(data: Any, **kwargs: dict[str, Any]) -> duckarray[_ShapeType, _DType]: + out = np.asarray(data) + + return out + + +@overload +def to_duck_array( + data: ExplicitlyIndexed, **kwargs: dict[str, Any] +) -> duckarray[Any, _dtype[_generic]]: ... +@overload +def to_duck_array( + data: chunkedduckarray[_ShapeType, _DType], **kwargs: dict[str, Any] +) -> chunkedduckarray[_ShapeType, _DType]: ... +@overload +def to_duck_array( + data: duckarray[_ShapeType, _DType], **kwargs: dict[str, Any] +) -> duckarray[_ShapeType, _DType]: ... + + +# @overload +# def to_duck_array( +# data: np.typing.ArrayLike, **kwargs: dict[str, Any] +# ) -> np.ndarray[Any, np.dtype[np.generic]]: ... +@overload +def to_duck_array( + data: np.typing.ArrayLike, **kwargs: dict[str, Any] +) -> duckarray[Any, _dtype[_generic]]: ... +def to_duck_array( + data: ( + ExplicitlyIndexed + | chunkedduckarray[_ShapeType, _DType] + | duckarray[_ShapeType, _DType] + | np.typing.ArrayLike + ), + **kwargs: dict[str, Any], +) -> ( + duckarray[_ShapeType, _DType] + | duckarray[Any, _dtype[_generic]] + | np.ndarray[Any, np.dtype[np.generic]] +): from xarray.core.indexing import ExplicitlyIndexed from xarray.namedarray.parallelcompat import get_chunked_array_type + if isinstance(data, ExplicitlyIndexed): + return data.get_duck_array() # type: ignore[no-untyped-call, no-any-return] + if is_chunked_array(data): chunkmanager = get_chunked_array_type(data) loaded_data, *_ = chunkmanager.compute(data, **kwargs) # type: ignore[var-annotated] return loaded_data - if isinstance(data, ExplicitlyIndexed): - return data.get_duck_array() # type: ignore[no-untyped-call, no-any-return] - elif is_duck_array(data): + if is_duck_array(data): return data else: - return np.asarray(data) # type: ignore[return-value] + return np.asarray(data) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index d568bdc3268..623e4e9f970 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -44,6 +44,8 @@ FirstElementAccessibleArray, arm_xfail, assert_array_equal, + assert_duckarray_allclose, + assert_duckarray_equal, assert_no_warnings, has_cftime, requires_cftime, @@ -144,13 +146,13 @@ def test_cf_datetime(num_dates, units, calendar) -> None: assert (abs_diff <= np.timedelta64(1, "s")).all() encoded, _, _ = encode_cf_datetime(actual, units, calendar) - assert_array_equal(num_dates, np.round(encoded, 1)) + assert_duckarray_allclose(num_dates, encoded) if hasattr(num_dates, "ndim") and num_dates.ndim == 1 and "1000" not in units: # verify that wrapping with a pandas.Index works # note that it *does not* currently work to put # non-datetime64 compatible dates into a pandas.Index encoded, _, _ = encode_cf_datetime(pd.Index(actual), units, calendar) - assert_array_equal(num_dates, np.round(encoded, 1)) + assert_duckarray_allclose(num_dates, encoded) @requires_cftime @@ -893,10 +895,10 @@ def test_time_units_with_timezone_roundtrip(calendar) -> None: ) if calendar in _STANDARD_CALENDARS: - np.testing.assert_array_equal(result_num_dates, expected_num_dates) + assert_duckarray_equal(result_num_dates, expected_num_dates) else: # cftime datetime arithmetic is not quite exact. - np.testing.assert_allclose(result_num_dates, expected_num_dates) + assert_duckarray_allclose(result_num_dates, expected_num_dates) assert result_units == expected_units assert result_calendar == calendar diff --git a/xarray/tests/test_parallelcompat.py b/xarray/tests/test_parallelcompat.py index dbe40be710c..1ff1414783b 100644 --- a/xarray/tests/test_parallelcompat.py +++ b/xarray/tests/test_parallelcompat.py @@ -6,8 +6,14 @@ import numpy as np import pytest -from xarray.core.types import T_Chunks, T_DuckArray, T_NormalizedChunks -from xarray.namedarray._typing import _Chunks +from xarray.namedarray._typing import ( + _Chunks, + _ChunksLike, + _DType, + _Shape, + chunkedduckarray, + duckarray, +) from xarray.namedarray.daskmanager import DaskManager from xarray.namedarray.parallelcompat import ( ChunkManagerEntrypoint, @@ -27,7 +33,7 @@ class DummyChunkedArray(np.ndarray): https://numpy.org/doc/stable/user/basics.subclassing.html#simple-example-adding-an-extra-attribute-to-ndarray """ - chunks: T_NormalizedChunks + chunks: _Chunks def __new__( cls, @@ -63,32 +69,36 @@ def __init__(self): def is_chunked_array(self, data: Any) -> bool: return isinstance(data, DummyChunkedArray) - def chunks(self, data: DummyChunkedArray) -> T_NormalizedChunks: + def chunks(self, data: chunkedduckarray[Any, Any]) -> _Chunks: return data.chunks def normalize_chunks( self, - chunks: T_Chunks | T_NormalizedChunks, - shape: tuple[int, ...] | None = None, + chunks: _ChunksLike, + shape: _Shape | None = None, limit: int | None = None, - dtype: np.dtype | None = None, - previous_chunks: T_NormalizedChunks | None = None, - ) -> T_NormalizedChunks: + dtype: _DType | None = None, + previous_chunks: _Chunks | None = None, + ) -> _Chunks: from dask.array.core import normalize_chunks return normalize_chunks(chunks, shape, limit, dtype, previous_chunks) def from_array( - self, data: T_DuckArray | np.typing.ArrayLike, chunks: _Chunks, **kwargs - ) -> DummyChunkedArray: + self, data: duckarray[Any, _DType], chunks: _ChunksLike, **kwargs + ) -> chunkedduckarray[Any, _DType]: from dask import array as da return da.from_array(data, chunks, **kwargs) - def rechunk(self, data: DummyChunkedArray, chunks, **kwargs) -> DummyChunkedArray: + def rechunk( + self, data: chunkedduckarray[Any, _DType], chunks: _ChunksLike, **kwargs + ) -> chunkedduckarray[Any, _DType]: return data.rechunk(chunks, **kwargs) - def compute(self, *data: DummyChunkedArray, **kwargs) -> tuple[np.ndarray, ...]: + def compute( + self, *data: chunkedduckarray[Any, _DType], **kwargs + ) -> tuple[duckarray[Any, _DType], ...]: from dask.array import compute return compute(*data, **kwargs)