diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 4622298e152..cc360a40930 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -5,7 +5,7 @@ from collections.abc import Callable, Hashable from datetime import datetime, timedelta from functools import partial -from typing import TYPE_CHECKING, Literal, Union, cast +from typing import TYPE_CHECKING, Literal, Union, cast, overload import numpy as np import pandas as pd @@ -27,8 +27,12 @@ from xarray.core.pdcompat import nanosecond_precision_timestamp from xarray.core.utils import attempt_import, emit_user_level_warning from xarray.core.variable import Variable -from xarray.namedarray.parallelcompat import T_ChunkedArray, get_chunked_array_type -from xarray.namedarray.pycompat import is_chunked_array +from xarray.namedarray._typing import ( + _chunkedarrayfunction_or_api, + chunkedduckarray, + duckarray, +) +from xarray.namedarray.parallelcompat import get_chunked_array_type from xarray.namedarray.utils import is_duck_dask_array try: @@ -36,9 +40,11 @@ except ImportError: cftime = None -from xarray.core.types import CFCalendar, NPDatetimeUnitOptions, T_DuckArray +from xarray.core.types import CFCalendar, NPDatetimeUnitOptions T_Name = Union[Hashable, None] +PandasTypes = Union[pd.Index, pd.DatetimeIndex] + # standard calendars recognized by cftime _STANDARD_CALENDARS = {"standard", "gregorian", "proleptic_gregorian"} @@ -716,12 +722,26 @@ def _cast_to_dtype_if_safe(num: np.ndarray, dtype: np.dtype) -> np.ndarray: return cast_num +@overload +def encode_cf_datetime( + dates: chunkedduckarray, + units: str | None = None, + calendar: str | None = None, + dtype: np.dtype | None = None, +) -> tuple[chunkedduckarray, str, str]: ... +@overload +def encode_cf_datetime( + dates: duckarray | PandasTypes, + units: str | None = None, + calendar: str | None = None, + dtype: np.dtype | None = None, +) -> tuple[duckarray, str, str]: ... def encode_cf_datetime( - dates: T_DuckArray, # type: ignore[misc] + dates: duckarray | PandasTypes | chunkedduckarray, units: str | None = None, calendar: str | None = None, dtype: np.dtype | None = None, -) -> tuple[T_DuckArray, str, str]: +) -> tuple[duckarray | PandasTypes | chunkedduckarray, str, str]: """Given an array of datetime objects, returns the tuple `(num, units, calendar)` suitable for a CF compliant time variable. @@ -732,19 +752,19 @@ def encode_cf_datetime( cftime.date2num """ dates = asarray(dates) - if is_chunked_array(dates): + if isinstance(dates, _chunkedarrayfunction_or_api): return _lazily_encode_cf_datetime(dates, units, calendar, dtype) else: return _eagerly_encode_cf_datetime(dates, units, calendar, dtype) def _eagerly_encode_cf_datetime( - dates: T_DuckArray, # type: ignore[misc] + dates: duckarray | PandasTypes, units: str | None = None, calendar: str | None = None, dtype: np.dtype | None = None, allow_units_modification: bool = True, -) -> tuple[T_DuckArray, str, str]: +) -> tuple[duckarray | PandasTypes, str, str]: dates = asarray(dates) data_units = infer_datetime_units(dates) @@ -822,11 +842,11 @@ def _eagerly_encode_cf_datetime( def _encode_cf_datetime_within_map_blocks( - dates: T_DuckArray, # type: ignore[misc] + dates: duckarray, units: str, calendar: str, dtype: np.dtype, -) -> T_DuckArray: +) -> duckarray | PandasTypes: num, *_ = _eagerly_encode_cf_datetime( dates, units, calendar, dtype, allow_units_modification=False ) @@ -834,11 +854,11 @@ def _encode_cf_datetime_within_map_blocks( def _lazily_encode_cf_datetime( - dates: T_ChunkedArray, + dates: chunkedduckarray, units: str | None = None, calendar: str | None = None, dtype: np.dtype | None = None, -) -> tuple[T_ChunkedArray, str, str]: +) -> tuple[chunkedduckarray, str, str]: if calendar is None: # This will only trigger minor compute if dates is an object dtype array. calendar = infer_calendar_name(dates) @@ -871,24 +891,36 @@ def _lazily_encode_cf_datetime( return num, units, calendar +@overload def encode_cf_timedelta( - timedeltas: T_DuckArray, # type: ignore[misc] + timedeltas: chunkedduckarray, units: str | None = None, dtype: np.dtype | None = None, -) -> tuple[T_DuckArray, str]: +) -> tuple[chunkedduckarray, str]: ... +@overload +def encode_cf_timedelta( + timedeltas: duckarray | PandasTypes, + units: str | None = None, + dtype: np.dtype | None = None, +) -> tuple[duckarray, str]: ... +def encode_cf_timedelta( + timedeltas: chunkedduckarray | duckarray | PandasTypes, + units: str | None = None, + dtype: np.dtype | None = None, +) -> tuple[chunkedduckarray | duckarray, str]: timedeltas = asarray(timedeltas) - if is_chunked_array(timedeltas): + if isinstance(timedeltas, _chunkedarrayfunction_or_api): return _lazily_encode_cf_timedelta(timedeltas, units, dtype) else: return _eagerly_encode_cf_timedelta(timedeltas, units, dtype) def _eagerly_encode_cf_timedelta( - timedeltas: T_DuckArray, # type: ignore[misc] + timedeltas: duckarray, units: str | None = None, dtype: np.dtype | None = None, allow_units_modification: bool = True, -) -> tuple[T_DuckArray, str]: +) -> tuple[duckarray, str]: data_units = infer_timedelta_units(timedeltas) if units is None: @@ -936,10 +968,10 @@ def _eagerly_encode_cf_timedelta( def _encode_cf_timedelta_within_map_blocks( - timedeltas: T_DuckArray, # type: ignore[misc] + timedeltas: duckarray, units: str, dtype: np.dtype, -) -> T_DuckArray: +) -> duckarray: num, _ = _eagerly_encode_cf_timedelta( timedeltas, units, dtype, allow_units_modification=False ) @@ -947,8 +979,10 @@ def _encode_cf_timedelta_within_map_blocks( def _lazily_encode_cf_timedelta( - timedeltas: T_ChunkedArray, units: str | None = None, dtype: np.dtype | None = None -) -> tuple[T_ChunkedArray, str]: + timedeltas: chunkedduckarray, + units: str | None = None, + dtype: np.dtype | None = None, +) -> tuple[chunkedduckarray, str]: if units is None and dtype is None: units = "nanoseconds" dtype = np.dtype("int64") diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 8154f044332..f34c1cae1d3 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -179,7 +179,7 @@ def lazy_elemwise_func(array, func: Callable, dtype: np.typing.DTypeLike): if is_chunked_array(array): chunkmanager = get_chunked_array_type(array) - return chunkmanager.map_blocks(func, array, dtype=dtype) # type: ignore[arg-type] + return chunkmanager.map_blocks(func, array, dtype=dtype) else: return _ElementwiseFunctionArray(array, func, dtype) diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 6e233425e95..238240d0aa0 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -34,6 +34,7 @@ from xarray.core.types import Dims, T_DataArray from xarray.core.utils import is_dict_like, is_scalar, parse_dims_as_set, result_name from xarray.core.variable import Variable +from xarray.namedarray._typing import chunkedduckarray from xarray.namedarray.parallelcompat import get_chunked_array_type from xarray.namedarray.pycompat import is_chunked_array from xarray.util.deprecation_helpers import deprecate_dims @@ -793,6 +794,7 @@ def apply_variable_ufunc( ) def func(*arrays): + res: chunkedduckarray | tuple[chunkedduckarray, ...] res = chunkmanager.apply_gufunc( numpy_func, signature.to_gufunc_string(exclude_dims), diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index ea17a69f827..25c555e296d 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -174,6 +174,7 @@ ) from xarray.core.weighted import DatasetWeighted from xarray.groupers import Grouper, Resampler + from xarray.namedarray._typing import chunkedduckarray, duckarray from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint @@ -897,7 +898,7 @@ def load(self, **kwargs) -> Self: chunkmanager = get_chunked_array_type(*lazy_data.values()) # evaluate all the chunked arrays simultaneously - evaluated_data: tuple[np.ndarray[Any, Any], ...] = chunkmanager.compute( + evaluated_data: tuple[duckarray[Any, Any], ...] = chunkmanager.compute( *lazy_data.values(), **kwargs ) @@ -1082,6 +1083,7 @@ def _persist_inplace(self, **kwargs) -> Self: chunkmanager = get_chunked_array_type(*lazy_data.values()) # evaluate all the dask arrays simultaneously + evaluated_data: tuple[chunkedduckarray, ...] evaluated_data = chunkmanager.persist(*lazy_data.values(), **kwargs) for k, data in zip(lazy_data, evaluated_data, strict=False): diff --git a/xarray/core/datatree.py b/xarray/core/datatree.py index ee90cf7477c..aaaf2159c66 100644 --- a/xarray/core/datatree.py +++ b/xarray/core/datatree.py @@ -74,6 +74,7 @@ T_ChunksFreq, ZarrWriteModes, ) + from xarray.namedarray._typing import chunkedduckarray, duckarray from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint # """ @@ -1954,9 +1955,8 @@ def load(self, **kwargs) -> Self: chunkmanager = get_chunked_array_type(*flat_lazy_data.values()) # evaluate all the chunked arrays simultaneously - evaluated_data: tuple[np.ndarray[Any, Any], ...] = chunkmanager.compute( - *flat_lazy_data.values(), **kwargs - ) + evaluated_data: tuple[duckarray[Any, Any], ...] + evaluated_data = chunkmanager.compute(*flat_lazy_data.values(), **kwargs) for (path, var_name), data in zip( flat_lazy_data, evaluated_data, strict=False @@ -2018,6 +2018,7 @@ def _persist_inplace(self, **kwargs) -> Self: chunkmanager = get_chunked_array_type(*flat_lazy_data.values()) # evaluate all the dask arrays simultaneously + evaluated_data: tuple[chunkedduckarray[Any, Any], ...] evaluated_data = chunkmanager.persist(*flat_lazy_data.values(), **kwargs) for (path, var_name), data in zip( diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 0a7b94a53c7..7c15ee37e25 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -1329,7 +1329,7 @@ def _arrayize_vectorized_indexer( def _chunked_array_with_chunks_hint( - array, chunks, chunkmanager: ChunkManagerEntrypoint[Any] + array, chunks, chunkmanager: ChunkManagerEntrypoint ): """Create a chunked array using the chunks hint for dimensions of size > 1.""" diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 07113d66b5b..51f244eb3bc 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -2600,7 +2600,7 @@ def chunk( # type: ignore[override] name: str | None = None, lock: bool | None = None, inline_array: bool | None = None, - chunked_array_type: str | ChunkManagerEntrypoint[Any] | None = None, + chunked_array_type: str | ChunkManagerEntrypoint | None = None, from_array_kwargs: Any = None, **chunks_kwargs: Any, ) -> Self: diff --git a/xarray/namedarray/_typing.py b/xarray/namedarray/_typing.py index 95e7d7adfc3..567209f68ca 100644 --- a/xarray/namedarray/_typing.py +++ b/xarray/namedarray/_typing.py @@ -75,11 +75,16 @@ def dtype(self) -> _DType_co: ... _Axes = tuple[_Axis, ...] _AxisLike = Union[_Axis, _Axes] -_Chunks = tuple[_Shape, ...] -_NormalizedChunks = tuple[tuple[int, ...], ...] +_Chunk = tuple[int, ...] +_Chunks = tuple[_Chunk, ...] +_ChunksLike = Union[ + int, Literal["auto"], None, _Chunk, _Chunks +] # TODO: Literal["auto"] +_ChunksType = TypeVar("_ChunksType", bound=_Chunks) + # FYI in some cases we don't allow `None`, which this doesn't take account of. # # FYI the `str` is for a size string, e.g. "16MB", supported by dask. -T_ChunkDim: TypeAlias = str | int | Literal["auto"] | None | tuple[int, ...] +T_ChunkDim: TypeAlias = str | int | Literal["auto"] | None | _Chunk # We allow the tuple form of this (though arguably we could transition to named dims only) T_Chunks: TypeAlias = T_ChunkDim | Mapping[Any, T_ChunkDim] @@ -238,7 +243,7 @@ def chunks(self) -> _Chunks: ... @runtime_checkable class _chunkedarrayfunction( - _arrayfunction[_ShapeType_co, _DType_co], Protocol[_ShapeType_co, _DType_co] + _arrayfunction[_ShapeType, _DType_co], Protocol[_ShapeType, _DType_co] ): """ Chunked duck array supporting NEP 18. @@ -249,6 +254,11 @@ class _chunkedarrayfunction( @property def chunks(self) -> _Chunks: ... + def rechunk( + self, + chunks: _ChunksLike, + ) -> _chunkedarrayfunction[_ShapeType, _DType_co]: ... + @runtime_checkable class _chunkedarrayapi( @@ -263,6 +273,11 @@ class _chunkedarrayapi( @property def chunks(self) -> _Chunks: ... + def rechunk( + self, + chunks: _ChunksLike, + ) -> _chunkedarrayapi[_ShapeType_co, _DType_co]: ... + # NamedArray can most likely use both __array_function__ and __array_namespace__: _chunkedarrayfunction_or_api = (_chunkedarrayfunction, _chunkedarrayapi) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 683a1266472..4036ecef35c 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -756,8 +756,8 @@ def sizes(self) -> dict[_Dim, _IntOrUnknown]: def chunk( self, - chunks: T_Chunks = {}, # noqa: B006 # even though it's unsafe, it is being used intentionally here (#4667) - chunked_array_type: str | ChunkManagerEntrypoint[Any] | None = None, + chunks: T_Chunks = {}, # noqa: B006 # even though it's unsafe, it is being used intentionally here (#4667) + chunked_array_type: str | ChunkManagerEntrypoint | None = None, from_array_kwargs: Any = None, **chunks_kwargs: Any, ) -> Self: @@ -831,6 +831,7 @@ def chunk( chunkmanager = guess_chunkmanager(chunked_array_type) data_old = self._data + data_chunked: _chunkedarray[Any, _DType_co] if chunkmanager.is_chunked_array(data_old): data_chunked = chunkmanager.rechunk(data_old, chunks) # type: ignore[arg-type] else: diff --git a/xarray/namedarray/daskmanager.py b/xarray/namedarray/daskmanager.py index 6485ba375f5..158f40e34f2 100644 --- a/xarray/namedarray/daskmanager.py +++ b/xarray/namedarray/daskmanager.py @@ -1,113 +1,121 @@ from __future__ import annotations from collections.abc import Callable, Iterable, Sequence +from types import ModuleType from typing import TYPE_CHECKING, Any import numpy as np from xarray.core.indexing import ImplicitToExplicitIndexingAdapter -from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint, T_ChunkedArray +from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint from xarray.namedarray.utils import is_duck_dask_array, module_available if TYPE_CHECKING: from xarray.namedarray._typing import ( - T_Chunks, - _DType_co, - _NormalizedChunks, + _Chunks, + _ChunksLike, + _DType, + _dtype, + _Shape, + chunkedduckarray, duckarray, ) - try: - from dask.array import Array as DaskArray - except ImportError: - DaskArray = np.ndarray[Any, Any] - dask_available = module_available("dask") -class DaskManager(ChunkManagerEntrypoint["DaskArray"]): - array_cls: type[DaskArray] +class DaskManager(ChunkManagerEntrypoint): available: bool = dask_available def __init__(self) -> None: # TODO can we replace this with a class attribute instead? - from dask.array import Array + from dask.array.core import Array - self.array_cls = Array + # TODO: error: Incompatible types in assignment (expression has type "type[Array]", variable has type "type[_chunkedarrayfunction[Any, Any]] | type[_chunkedarrayapi[Any, Any]]") [assignment] + self.array_cls = Array # type: ignore[assignment] def is_chunked_array(self, data: duckarray[Any, Any]) -> bool: return is_duck_dask_array(data) - def chunks(self, data: Any) -> _NormalizedChunks: - return data.chunks # type: ignore[no-any-return] + def chunks(self, data: chunkedduckarray[Any, _dtype[Any]]) -> _Chunks: + return data.chunks def normalize_chunks( self, - chunks: T_Chunks | _NormalizedChunks, - shape: tuple[int, ...] | None = None, + chunks: _ChunksLike, + shape: _Shape | None = None, limit: int | None = None, - dtype: _DType_co | None = None, - previous_chunks: _NormalizedChunks | None = None, - ) -> Any: + dtype: _dtype[Any] | None = None, + previous_chunks: _Chunks | None = None, + ) -> _Chunks: """Called by open_dataset""" from dask.array.core import normalize_chunks - return normalize_chunks( + out: _Chunks + out = normalize_chunks( chunks, shape=shape, limit=limit, dtype=dtype, previous_chunks=previous_chunks, ) # type: ignore[no-untyped-call] + return out def from_array( - self, data: Any, chunks: T_Chunks | _NormalizedChunks, **kwargs: Any - ) -> DaskArray | Any: - import dask.array as da + self, data: duckarray[Any, _DType], chunks: _ChunksLike, **kwargs: Any + ) -> chunkedduckarray[Any, _DType]: + from dask.array.core import from_array if isinstance(data, ImplicitToExplicitIndexingAdapter): # lazily loaded backend array classes should use NumPy array operations. kwargs["meta"] = np.ndarray - return da.from_array( + out: chunkedduckarray[Any, _DType] + out = from_array( data, chunks, **kwargs, ) # type: ignore[no-untyped-call] + return out def compute( - self, *data: Any, **kwargs: Any - ) -> tuple[np.ndarray[Any, _DType_co], ...]: + self, *data: chunkedduckarray[Any, _DType] | Any, **kwargs: Any + ) -> tuple[duckarray[Any, _DType], ...]: from dask.array import compute - return compute(*data, **kwargs) # type: ignore[no-untyped-call, no-any-return] + out: tuple[duckarray[Any, _DType], ...] + out = compute(*data, **kwargs) # type: ignore[no-untyped-call] + return out - def persist(self, *data: Any, **kwargs: Any) -> tuple[DaskArray | Any, ...]: + def persist( + self, *data: chunkedduckarray[Any, _DType] | Any, **kwargs: Any + ) -> tuple[chunkedduckarray[Any, _DType] | Any, ...]: from dask import persist return persist(*data, **kwargs) # type: ignore[no-untyped-call, no-any-return] @property - def array_api(self) -> Any: + def array_api(self) -> ModuleType: from dask import array as da return da def reduction( self, - arr: T_ChunkedArray, + arr: chunkedduckarray[Any, _DType], func: Callable[..., Any], combine_func: Callable[..., Any] | None = None, aggregate_func: Callable[..., Any] | None = None, axis: int | Sequence[int] | None = None, - dtype: _DType_co | None = None, + dtype: _DType | None = None, keepdims: bool = False, - ) -> DaskArray | Any: - from dask.array import reduction + ) -> chunkedduckarray[Any, _DType]: + from dask.array.reductions import reduction - return reduction( + out: chunkedduckarray[Any, _DType] + out = reduction( arr, chunk=func, combine=combine_func, @@ -116,20 +124,22 @@ def reduction( dtype=dtype, keepdims=keepdims, ) # type: ignore[no-untyped-call] + return out def scan( self, func: Callable[..., Any], binop: Callable[..., Any], ident: float, - arr: T_ChunkedArray, + arr: chunkedduckarray[Any, _DType], axis: int | None = None, - dtype: _DType_co | None = None, + dtype: _DType | None = None, **kwargs: Any, - ) -> DaskArray | Any: + ) -> chunkedduckarray[Any, _DType]: from dask.array.reductions import cumreduction - return cumreduction( + out: chunkedduckarray[Any, _DType] + out = cumreduction( func, binop, ident, @@ -138,6 +148,7 @@ def scan( dtype=dtype, **kwargs, ) # type: ignore[no-untyped-call] + return out def apply_gufunc( self, @@ -145,18 +156,19 @@ def apply_gufunc( signature: str, *args: Any, axes: Sequence[tuple[int, ...]] | None = None, - axis: int | None = None, keepdims: bool = False, - output_dtypes: Sequence[_DType_co] | None = None, - output_sizes: dict[str, int] | None = None, + output_dtypes: Sequence[_DType] | None = None, vectorize: bool | None = None, + axis: int | None = None, + output_sizes: dict[str, int] | None = None, allow_rechunk: bool = False, - meta: tuple[np.ndarray[Any, _DType_co], ...] | None = None, + meta: tuple[np.ndarray[Any, np.dtype[np.generic]], ...] | None = None, **kwargs: Any, - ) -> Any: + ) -> chunkedduckarray[Any, _DType] | tuple[chunkedduckarray[Any, _DType], ...]: from dask.array.gufunc import apply_gufunc - return apply_gufunc( + out: chunkedduckarray[Any, _DType] | tuple[chunkedduckarray[Any, _DType], ...] + out = apply_gufunc( func, signature, *args, @@ -171,20 +183,23 @@ def apply_gufunc( **kwargs, ) # type: ignore[no-untyped-call] + return out + def map_blocks( self, func: Callable[..., Any], *args: Any, - dtype: _DType_co | None = None, - chunks: tuple[int, ...] | None = None, + dtype: _DType | None = None, + chunks: _Chunks | None = None, drop_axis: int | Sequence[int] | None = None, new_axis: int | Sequence[int] | None = None, **kwargs: Any, - ) -> Any: + ) -> chunkedduckarray[Any, _DType]: from dask.array import map_blocks # pass through name, meta, token as kwargs - return map_blocks( + out: chunkedduckarray[Any, _DType] + out = map_blocks( func, *args, dtype=dtype, @@ -193,26 +208,27 @@ def map_blocks( new_axis=new_axis, **kwargs, ) # type: ignore[no-untyped-call] + return out def blockwise( self, func: Callable[..., Any], out_ind: Iterable[Any], - *args: Any, - # can't type this as mypy assumes args are all same type, but dask blockwise args alternate types - name: str | None = None, - token: Any | None = None, - dtype: _DType_co | None = None, + *args: Any, # can't type this as mypy assumes args are all same type, but dask blockwise args alternate types adjust_chunks: dict[Any, Callable[..., Any]] | None = None, new_axes: dict[Any, int] | None = None, align_arrays: bool = True, + name: str | None = None, + token: Any | None = None, + dtype: _DType | None = None, concatenate: bool | None = None, - meta: tuple[np.ndarray[Any, _DType_co], ...] | None = None, + meta: tuple[np.ndarray[Any, np.dtype[np.generic]], ...] | None = None, **kwargs: Any, - ) -> DaskArray | Any: - from dask.array import blockwise + ) -> chunkedduckarray[Any, _DType]: + from dask.array.blockwise import blockwise - return blockwise( + out: chunkedduckarray[Any, _DType] + out = blockwise( func, out_ind, *args, @@ -226,15 +242,18 @@ def blockwise( meta=meta, **kwargs, ) # type: ignore[no-untyped-call] + return out def unify_chunks( self, *args: Any, # can't type this as mypy assumes args are all same type, but dask unify_chunks args alternate types **kwargs: Any, - ) -> tuple[dict[str, _NormalizedChunks], list[DaskArray]]: + ) -> tuple[dict[str, _Chunks], list[chunkedduckarray[Any, Any]]]: from dask.array.core import unify_chunks - return unify_chunks(*args, **kwargs) # type: ignore[no-any-return, no-untyped-call] + out: tuple[dict[str, _Chunks], list[chunkedduckarray[Any, Any]]] + out = unify_chunks(*args, **kwargs) # type: ignore[no-untyped-call] + return out def store( self, @@ -242,7 +261,7 @@ def store( targets: Any, **kwargs: Any, ) -> Any: - from dask.array import store + from dask.array.core import store return store( sources=sources, diff --git a/xarray/namedarray/parallelcompat.py b/xarray/namedarray/parallelcompat.py index 69dd4ab5f93..a3945d3fa0f 100644 --- a/xarray/namedarray/parallelcompat.py +++ b/xarray/namedarray/parallelcompat.py @@ -10,45 +10,29 @@ from abc import ABC, abstractmethod from collections.abc import Callable, Iterable, Sequence from importlib.metadata import EntryPoint, entry_points -from typing import TYPE_CHECKING, Any, Generic, Protocol, TypeVar +from types import ModuleType +from typing import TYPE_CHECKING, Any import numpy as np from xarray.core.options import OPTIONS from xarray.core.utils import emit_user_level_warning -from xarray.namedarray.pycompat import is_chunked_array +from xarray.namedarray._typing import _chunkedarrayfunction_or_api if TYPE_CHECKING: from xarray.namedarray._typing import ( T_Chunks, _Chunks, + _ChunksLike, _DType, - _DType_co, - _NormalizedChunks, - _ShapeType, + _Shape, + chunkedduckarray, duckarray, ) -class ChunkedArrayMixinProtocol(Protocol): - def rechunk(self, chunks: Any, **kwargs: Any) -> Any: ... - - @property - def dtype(self) -> np.dtype[Any]: ... - - @property - def chunks(self) -> _NormalizedChunks: ... - - def compute( - self, *data: Any, **kwargs: Any - ) -> tuple[np.ndarray[Any, _DType_co], ...]: ... - - -T_ChunkedArray = TypeVar("T_ChunkedArray", bound=ChunkedArrayMixinProtocol) - - @functools.lru_cache(maxsize=1) -def list_chunkmanagers() -> dict[str, ChunkManagerEntrypoint[Any]]: +def list_chunkmanagers() -> dict[str, ChunkManagerEntrypoint]: """ Return a dictionary of available chunk managers and their ChunkManagerEntrypoint subclass objects. @@ -65,7 +49,7 @@ def list_chunkmanagers() -> dict[str, ChunkManagerEntrypoint[Any]]: def load_chunkmanagers( entrypoints: Sequence[EntryPoint], -) -> dict[str, ChunkManagerEntrypoint[Any]]: +) -> dict[str, ChunkManagerEntrypoint]: """Load entrypoints and instantiate chunkmanagers only once.""" loaded_entrypoints = {} @@ -86,8 +70,8 @@ def load_chunkmanagers( def guess_chunkmanager( - manager: str | ChunkManagerEntrypoint[Any] | None, -) -> ChunkManagerEntrypoint[Any]: + manager: str | ChunkManagerEntrypoint | None, +) -> ChunkManagerEntrypoint: """ Get namespace of chunk-handling methods, guessing from what's available. @@ -121,7 +105,7 @@ def guess_chunkmanager( ) -def get_chunked_array_type(*args: Any) -> ChunkManagerEntrypoint[Any]: +def get_chunked_array_type(*args: Any) -> ChunkManagerEntrypoint: """ Detects which parallel backend should be used for given set of arrays. @@ -134,7 +118,8 @@ def get_chunked_array_type(*args: Any) -> ChunkManagerEntrypoint[Any]: chunked_arrays = [ a for a in args - if is_chunked_array(a) and type(a) not in ALLOWED_NON_CHUNKED_TYPES + if isinstance(a, _chunkedarrayfunction_or_api) + and type(a) not in ALLOWED_NON_CHUNKED_TYPES ] # Asserts all arrays are the same type (or numpy etc.) @@ -164,7 +149,7 @@ def get_chunked_array_type(*args: Any) -> ChunkManagerEntrypoint[Any]: return selected[0] -class ChunkManagerEntrypoint(ABC, Generic[T_ChunkedArray]): +class ChunkManagerEntrypoint(ABC): """ Interface between a particular parallel computing framework and xarray. @@ -183,7 +168,7 @@ class ChunkManagerEntrypoint(ABC, Generic[T_ChunkedArray]): This attribute is used for array instance type checking at runtime. """ - array_cls: type[T_ChunkedArray] + array_cls: type[chunkedduckarray[Any, Any]] available: bool = True @abstractmethod @@ -209,10 +194,10 @@ def is_chunked_array(self, data: duckarray[Any, Any]) -> bool: -------- dask.is_dask_collection """ - return isinstance(data, self.array_cls) + return isinstance(data, _chunkedarrayfunction_or_api) @abstractmethod - def chunks(self, data: T_ChunkedArray) -> _NormalizedChunks: + def chunks(self, data: chunkedduckarray[Any, Any]) -> _Chunks: """ Return the current chunks of the given array. @@ -238,12 +223,12 @@ def chunks(self, data: T_ChunkedArray) -> _NormalizedChunks: @abstractmethod def normalize_chunks( self, - chunks: _Chunks | _NormalizedChunks, - shape: _ShapeType | None = None, + chunks: _ChunksLike, + shape: _Shape | None = None, limit: int | None = None, dtype: _DType | None = None, - previous_chunks: _NormalizedChunks | None = None, - ) -> _NormalizedChunks: + previous_chunks: _Chunks | None = None, + ) -> _Chunks: """ Normalize given chunking pattern into an explicit tuple of tuples representation. @@ -274,8 +259,8 @@ def normalize_chunks( @abstractmethod def from_array( - self, data: duckarray[Any, Any], chunks: _Chunks, **kwargs: Any - ) -> T_ChunkedArray: + self, data: duckarray[Any, _DType], chunks: _ChunksLike, **kwargs: Any + ) -> chunkedduckarray[Any, _DType]: """ Create a chunked array from a non-chunked numpy-like array. @@ -300,10 +285,10 @@ def from_array( def rechunk( self, - data: T_ChunkedArray, - chunks: _NormalizedChunks | tuple[int, ...] | _Chunks, + data: chunkedduckarray[Any, _DType], + chunks: _ChunksLike, **kwargs: Any, - ) -> Any: + ) -> chunkedduckarray[Any, _DType]: """ Changes the chunking pattern of the given array. @@ -331,8 +316,8 @@ def rechunk( @abstractmethod def compute( - self, *data: T_ChunkedArray | Any, **kwargs: Any - ) -> tuple[np.ndarray[Any, _DType_co], ...]: + self, *data: chunkedduckarray[Any, _DType] | Any, **kwargs: Any + ) -> tuple[duckarray[Any, _DType], ...]: """ Computes one or more chunked arrays, returning them as eager numpy arrays. @@ -363,8 +348,8 @@ def shuffle( raise NotImplementedError() def persist( - self, *data: T_ChunkedArray | Any, **kwargs: Any - ) -> tuple[T_ChunkedArray | Any, ...]: + self, *data: chunkedduckarray[Any, _DType] | Any, **kwargs: Any + ) -> tuple[chunkedduckarray[Any, _DType], ...]: """ Persist one or more chunked arrays in memory. @@ -386,7 +371,7 @@ def persist( raise NotImplementedError() @property - def array_api(self) -> Any: + def array_api(self) -> ModuleType: """ Return the array_api namespace following the python array API standard. @@ -403,14 +388,14 @@ def array_api(self) -> Any: def reduction( self, - arr: T_ChunkedArray, + arr: chunkedduckarray[Any, _DType], func: Callable[..., Any], combine_func: Callable[..., Any] | None = None, aggregate_func: Callable[..., Any] | None = None, axis: int | Sequence[int] | None = None, - dtype: _DType_co | None = None, + dtype: _DType | None = None, keepdims: bool = False, - ) -> T_ChunkedArray: + ) -> chunkedduckarray[Any, _DType]: """ A general version of array reductions along one or more axes. @@ -455,11 +440,11 @@ def scan( func: Callable[..., Any], binop: Callable[..., Any], ident: float, - arr: T_ChunkedArray, + arr: chunkedduckarray[Any, _DType], axis: int | None = None, - dtype: _DType_co | None = None, + dtype: _DType | None = None, **kwargs: Any, - ) -> T_ChunkedArray: + ) -> chunkedduckarray[Any, _DType]: """ General version of a 1D scan, also known as a cumulative array reduction. @@ -495,10 +480,10 @@ def apply_gufunc( *args: Any, axes: Sequence[tuple[int, ...]] | None = None, keepdims: bool = False, - output_dtypes: Sequence[_DType_co] | None = None, + output_dtypes: Sequence[_DType] | None = None, vectorize: bool | None = None, **kwargs: Any, - ) -> Any: + ) -> chunkedduckarray[Any, _DType] | tuple[chunkedduckarray[Any, _DType], ...]: """ Apply a generalized ufunc or similar python function to arrays. @@ -578,12 +563,12 @@ def map_blocks( self, func: Callable[..., Any], *args: Any, - dtype: _DType_co | None = None, - chunks: tuple[int, ...] | None = None, + dtype: _DType | None = None, + chunks: _Chunks | None = None, drop_axis: int | Sequence[int] | None = None, new_axis: int | Sequence[int] | None = None, **kwargs: Any, - ) -> Any: + ) -> chunkedduckarray[Any, _DType]: """ Map a function across all blocks of a chunked array. @@ -631,7 +616,7 @@ def blockwise( new_axes: dict[Any, int] | None = None, align_arrays: bool = True, **kwargs: Any, - ) -> Any: + ) -> chunkedduckarray[Any, _DType]: """ Tensor operation: Generalized inner and outer products. @@ -677,7 +662,7 @@ def unify_chunks( self, *args: Any, # can't type this as mypy assumes args are all same type, but dask unify_chunks args alternate types **kwargs: Any, - ) -> tuple[dict[str, _NormalizedChunks], list[T_ChunkedArray]]: + ) -> tuple[dict[str, _Chunks], list[chunkedduckarray[Any, Any]]]: """ Unify chunks across a sequence of arrays. @@ -697,7 +682,9 @@ def unify_chunks( def store( self, - sources: T_ChunkedArray | Sequence[T_ChunkedArray], + sources: ( + chunkedduckarray[Any, _DType] | Sequence[chunkedduckarray[Any, _DType]] + ), targets: Any, **kwargs: dict[str, Any], ) -> Any: diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 9a51ca40d07..1276ce20b91 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -123,7 +123,7 @@ def _all_cftime_date_types(): @pytest.mark.filterwarnings("ignore:Ambiguous reference date string") @pytest.mark.filterwarnings("ignore:Times can't be serialized faithfully") @pytest.mark.parametrize(["num_dates", "units", "calendar"], _CF_DATETIME_TESTS) -def test_cf_datetime(num_dates, units, calendar) -> None: +def test_cf_datetime(num_dates, units: str, calendar: str) -> None: import cftime expected = cftime.num2date( diff --git a/xarray/tests/test_namedarray.py b/xarray/tests/test_namedarray.py index 7bd2c3bec06..8c157bcb035 100644 --- a/xarray/tests/test_namedarray.py +++ b/xarray/tests/test_namedarray.py @@ -118,6 +118,12 @@ def check_duck_array_typevar(a: duckarray[Any, _DType]) -> duckarray[Any, _DType ) +def test_duckarray___array__() -> None: + x: duckarray[Any, Any] = np.array([1, 2, 3], dtype=np.int64) + y = np.array(x) + np.testing.assert_array_equal(y, x) + + class NamedArraySubclassobjects: @pytest.fixture def target(self, data: np.ndarray[Any, Any]) -> Any: @@ -389,6 +395,13 @@ def test_duck_array_class_array_api(self) -> None: arrayapi_a = nxp.asarray([2.1, 4], dtype=nxp.int64) check_duck_array_typevar(arrayapi_a) + @pytest.mark.xfail(reason="pd.Index does not include an __array_function__") + def test_pd_index_duckarray(self) -> None: + import pandas as pd + + a: duckarray[Any, Any] = pd.Index([]) + check_duck_array_typevar(a) + def test_new_namedarray(self) -> None: dtype_float = np.dtype(np.float32) narr_float: NamedArray[Any, np.dtype[np.float32]] diff --git a/xarray/tests/test_parallelcompat.py b/xarray/tests/test_parallelcompat.py index 67c68aac534..b6e4dd6b886 100644 --- a/xarray/tests/test_parallelcompat.py +++ b/xarray/tests/test_parallelcompat.py @@ -7,8 +7,14 @@ import pytest from xarray import set_options -from xarray.core.types import T_Chunks, T_DuckArray, T_NormalizedChunks -from xarray.namedarray._typing import _Chunks +from xarray.namedarray._typing import ( + _Chunks, + _ChunksLike, + _DType, + _Shape, + chunkedduckarray, + duckarray, +) from xarray.namedarray.daskmanager import DaskManager from xarray.namedarray.parallelcompat import ( ChunkManagerEntrypoint, @@ -28,7 +34,7 @@ class DummyChunkedArray(np.ndarray): https://numpy.org/doc/stable/user/basics.subclassing.html#simple-example-adding-an-extra-attribute-to-ndarray """ - chunks: T_NormalizedChunks + chunks: _Chunks def __new__( cls, @@ -64,32 +70,36 @@ def __init__(self): def is_chunked_array(self, data: Any) -> bool: return isinstance(data, DummyChunkedArray) - def chunks(self, data: DummyChunkedArray) -> T_NormalizedChunks: + def chunks(self, data: chunkedduckarray[Any, Any]) -> _Chunks: return data.chunks def normalize_chunks( self, - chunks: T_Chunks | T_NormalizedChunks, - shape: tuple[int, ...] | None = None, + chunks: _ChunksLike, + shape: _Shape | None = None, limit: int | None = None, - dtype: np.dtype | None = None, - previous_chunks: T_NormalizedChunks | None = None, - ) -> T_NormalizedChunks: + dtype: _DType | None = None, + previous_chunks: _Chunks | None = None, + ) -> _Chunks: from dask.array.core import normalize_chunks return normalize_chunks(chunks, shape, limit, dtype, previous_chunks) def from_array( - self, data: T_DuckArray | np.typing.ArrayLike, chunks: _Chunks, **kwargs - ) -> DummyChunkedArray: + self, data: duckarray[Any, _DType], chunks: _ChunksLike, **kwargs + ) -> chunkedduckarray[Any, _DType]: from dask import array as da return da.from_array(data, chunks, **kwargs) - def rechunk(self, data: DummyChunkedArray, chunks, **kwargs) -> DummyChunkedArray: + def rechunk( + self, data: chunkedduckarray[Any, _DType], chunks: _ChunksLike, **kwargs + ) -> chunkedduckarray[Any, _DType]: return data.rechunk(chunks, **kwargs) - def compute(self, *data: DummyChunkedArray, **kwargs) -> tuple[np.ndarray, ...]: + def compute( + self, *data: chunkedduckarray[Any, _DType], **kwargs + ) -> tuple[duckarray[Any, _DType], ...]: from dask.array import compute return compute(*data, **kwargs)