diff --git a/changes/3191.feature.rst b/changes/3191.feature.rst new file mode 100644 index 0000000000..7542eab4f0 --- /dev/null +++ b/changes/3191.feature.rst @@ -0,0 +1 @@ +Added `NDBuffer.empty` method for faster ndbuffer initialization. diff --git a/src/zarr/core/buffer/core.py b/src/zarr/core/buffer/core.py index 2f7ced4a74..19125b838f 100644 --- a/src/zarr/core/buffer/core.py +++ b/src/zarr/core/buffer/core.py @@ -374,6 +374,41 @@ def create( cast("NDArrayLike", None) ) # This line will never be reached, but it satisfies the type checker + @classmethod + def empty( + cls, shape: ChunkCoords, dtype: npt.DTypeLike, order: Literal["C", "F"] = "C" + ) -> Self: + """ + Create an empty buffer with the given shape, dtype, and order. + + This method can be faster than ``NDBuffer.create`` because it doesn't + have to initialize the memory used by the underlying ndarray-like + object. + + Parameters + ---------- + shape + The shape of the buffer and its underlying ndarray-like object + dtype + The datatype of the buffer and its underlying ndarray-like object + order + Whether to store multi-dimensional data in row-major (C-style) or + column-major (Fortran-style) order in memory. + + Returns + ------- + buffer + New buffer representing a new ndarray_like object with empty data. + + See Also + -------- + NDBuffer.create + Create a new buffer with some initial fill value. + """ + # Implementations should override this method if they have a faster way + # to allocate an empty buffer. + return cls.create(shape=shape, dtype=dtype, order=order) + @classmethod def from_ndarray_like(cls, ndarray_like: NDArrayLike) -> Self: """Create a new buffer of a ndarray-like object diff --git a/src/zarr/core/buffer/cpu.py b/src/zarr/core/buffer/cpu.py index 3140d75111..9da0059d0b 100644 --- a/src/zarr/core/buffer/cpu.py +++ b/src/zarr/core/buffer/cpu.py @@ -20,7 +20,7 @@ from typing import Self from zarr.core.buffer.core import ArrayLike, NDArrayLike - from zarr.core.common import BytesLike + from zarr.core.common import BytesLike, ChunkCoords class Buffer(core.Buffer): @@ -160,6 +160,12 @@ def create( else: return cls(np.full(shape=tuple(shape), fill_value=fill_value, dtype=dtype, order=order)) + @classmethod + def empty( + cls, shape: ChunkCoords, dtype: npt.DTypeLike, order: Literal["C", "F"] = "C" + ) -> Self: + return cls(np.empty(shape=shape, dtype=dtype, order=order)) + @classmethod def from_numpy_array(cls, array_like: npt.ArrayLike) -> Self: return cls.from_ndarray_like(np.asanyarray(array_like)) diff --git a/src/zarr/core/buffer/gpu.py b/src/zarr/core/buffer/gpu.py index 7ea6d53fe3..d46ee6c8e5 100644 --- a/src/zarr/core/buffer/gpu.py +++ b/src/zarr/core/buffer/gpu.py @@ -22,7 +22,7 @@ from collections.abc import Iterable from typing import Self - from zarr.core.common import BytesLike + from zarr.core.common import BytesLike, ChunkCoords try: import cupy as cp @@ -178,6 +178,12 @@ def create( ret.fill(fill_value) return ret + @classmethod + def empty( + cls, shape: ChunkCoords, dtype: npt.DTypeLike, order: Literal["C", "F"] = "C" + ) -> Self: + return cls(cp.empty(shape=shape, dtype=dtype, order=order)) + @classmethod def from_numpy_array(cls, array_like: npt.ArrayLike) -> Self: """Create a new buffer of Numpy array-like object diff --git a/src/zarr/testing/buffer.py b/src/zarr/testing/buffer.py index c3694e268b..8cbfb2414a 100644 --- a/src/zarr/testing/buffer.py +++ b/src/zarr/testing/buffer.py @@ -13,6 +13,8 @@ from collections.abc import Iterable from typing import Self + from zarr.core.common import ChunkCoords + __all__ = [ "NDBufferUsingTestNDArrayLike", @@ -51,6 +53,15 @@ def create( ret.fill(fill_value) return ret + @classmethod + def empty( + cls, + shape: ChunkCoords, + dtype: npt.DTypeLike, + order: Literal["C", "F"] = "C", + ) -> Self: + return super(cpu.NDBuffer, cls).empty(shape=shape, dtype=dtype, order=order) + class StoreExpectingTestBuffer(MemoryStore): """Example of a custom Store that expect MyBuffer for all its non-metadata diff --git a/src/zarr/testing/utils.py b/src/zarr/testing/utils.py index afc15d742c..3cb7f5cb99 100644 --- a/src/zarr/testing/utils.py +++ b/src/zarr/testing/utils.py @@ -40,13 +40,10 @@ def has_cupy() -> bool: T = TypeVar("T") +gpu_mark = pytest.mark.gpu +skip_if_no_gpu = pytest.mark.skipif(not has_cupy(), reason="CuPy not installed or no GPU available") + + # Decorator for GPU tests def gpu_test(func: T) -> T: - return cast( - "T", - pytest.mark.gpu( - pytest.mark.skipif(not has_cupy(), reason="CuPy not installed or no GPU available")( - func - ) - ), - ) + return cast("T", gpu_mark(skip_if_no_gpu(func))) diff --git a/tests/test_buffer.py b/tests/test_buffer.py index 93b116e908..bbfa25d138 100644 --- a/tests/test_buffer.py +++ b/tests/test_buffer.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Literal import numpy as np import pytest @@ -20,7 +20,7 @@ TestBuffer, TestNDArrayLike, ) -from zarr.testing.utils import gpu_test +from zarr.testing.utils import gpu_mark, gpu_test, skip_if_no_gpu if TYPE_CHECKING: import types @@ -200,3 +200,39 @@ def test_gpu_buffer_prototype() -> None: def test_cpu_buffer_as_scalar() -> None: buf = cpu.buffer_prototype.nd_buffer.create(shape=(), dtype="int64") assert buf.as_scalar() == buf.as_ndarray_like()[()] # type: ignore[index] + + +@pytest.mark.parametrize( + "prototype", + [ + cpu.buffer_prototype, + pytest.param( + gpu.buffer_prototype, + marks=[gpu_mark, skip_if_no_gpu], + ), + BufferPrototype( + buffer=cpu.Buffer, + nd_buffer=NDBufferUsingTestNDArrayLike, + ), + ], +) +@pytest.mark.parametrize( + "shape", + [ + (1, 2), + (1, 2, 3), + ], +) +@pytest.mark.parametrize("dtype", ["int32", "float64"]) +@pytest.mark.parametrize("order", ["C", "F"]) +def test_empty( + prototype: BufferPrototype, shape: tuple[int, ...], dtype: str, order: Literal["C", "F"] +) -> None: + buf = prototype.nd_buffer.empty(shape=shape, dtype=dtype, order=order) + result = buf.as_ndarray_like() + assert result.shape == shape + assert result.dtype == dtype + if order == "C": + assert result.flags.c_contiguous # type: ignore[attr-defined] + else: + assert result.flags.f_contiguous # type: ignore[attr-defined]