diff --git a/changes/3170.bugfix.rst b/changes/3170.bugfix.rst new file mode 100644 index 0000000000..856e8356bb --- /dev/null +++ b/changes/3170.bugfix.rst @@ -0,0 +1,6 @@ +Fixes a variety of issues related to string data types. + +- Brings the ``VariableLengthUTF8`` data type Zarr V3 identifier in alignment with Zarr Python 3.0.8 +- Disallows creation of 0-length fixed-length data types +- Adds a regression test for the ``VariableLengthUTF8`` data type that checks against version 3.0.8 +- Allows users to request the ``VariableLengthUTF8`` data type with ``str``, ``"str"``, or ``"string"``. diff --git a/src/zarr/core/dtype/__init__.py b/src/zarr/core/dtype/__init__.py index 735690d4bc..2f875ec491 100644 --- a/src/zarr/core/dtype/__init__.py +++ b/src/zarr/core/dtype/__init__.py @@ -109,6 +109,12 @@ VariableLengthBytes, ) +# These are aliases for variable-length UTF-8 strings +# We handle them when a user requests a data type instead of using NumPy's dtype inferece because +# the default NumPy behavior -- to inspect the user-provided array data and choose +# an appropriately sized U dtype -- is unworkable for Zarr. +VLEN_UTF8_ALIAS: Final = ("str", str, "string") + # This type models inputs that can be coerced to a ZDType ZDTypeLike: TypeAlias = npt.DTypeLike | ZDType[TBaseDType, TBaseScalar] | Mapping[str, JSON] | str @@ -157,6 +163,10 @@ def parse_data_type( # dict and zarr_format 3 means that we have a JSON object representation of the dtype if zarr_format == 3 and isinstance(dtype_spec, Mapping): return get_data_type_from_json(dtype_spec, zarr_format=3) + if dtype_spec in VLEN_UTF8_ALIAS: + # If the dtype request is one of the aliases for variable-length UTF-8 strings, + # return that dtype. + return VariableLengthUTF8() # type: ignore[return-value] # otherwise, we have either a numpy dtype string, or a zarr v3 dtype string, and in either case # we can create a numpy dtype from it, and do the dtype inference from that return get_data_type_from_native_dtype(dtype_spec) # type: ignore[arg-type] diff --git a/src/zarr/core/dtype/npy/bytes.py b/src/zarr/core/dtype/npy/bytes.py index 25f44a9658..b2f184b2fa 100644 --- a/src/zarr/core/dtype/npy/bytes.py +++ b/src/zarr/core/dtype/npy/bytes.py @@ -37,6 +37,14 @@ class NullTerminatedBytes(ZDType[np.dtypes.BytesDType[int], np.bytes_], HasLengt dtype_cls = np.dtypes.BytesDType _zarr_v3_name: ClassVar[Literal["null_terminated_bytes"]] = "null_terminated_bytes" + def __post_init__(self) -> None: + """ + We don't allow instances of this class with length less than 1 because there is no way such + a data type can contain actual data. + """ + if self.length < 1: + raise ValueError(f"length must be >= 1, got {self.length}.") + @classmethod def from_native_dtype(cls, dtype: TBaseDType) -> Self: if cls._check_native_dtype(dtype): @@ -155,6 +163,14 @@ class RawBytes(ZDType[np.dtypes.VoidDType[int], np.void], HasLength, HasItemSize dtype_cls = np.dtypes.VoidDType # type: ignore[assignment] _zarr_v3_name: ClassVar[Literal["raw_bytes"]] = "raw_bytes" + def __post_init__(self) -> None: + """ + We don't allow instances of this class with length less than 1 because there is no way such + a data type can contain actual data. + """ + if self.length < 1: + raise ValueError(f"length must be >= 1, got {self.length}.") + @classmethod def _check_native_dtype( cls: type[Self], dtype: TBaseDType diff --git a/src/zarr/core/dtype/npy/string.py b/src/zarr/core/dtype/npy/string.py index 4a1114617a..3fb26cf366 100644 --- a/src/zarr/core/dtype/npy/string.py +++ b/src/zarr/core/dtype/npy/string.py @@ -63,6 +63,14 @@ class FixedLengthUTF32( _zarr_v3_name: ClassVar[Literal["fixed_length_utf32"]] = "fixed_length_utf32" code_point_bytes: ClassVar[int] = 4 # utf32 is 4 bytes per code point + def __post_init__(self) -> None: + """ + We don't allow instances of this class with length less than 1 because there is no way such + a data type can contain actual data. + """ + if self.length < 1: + raise ValueError(f"length must be >= 1, got {self.length}.") + @classmethod def from_native_dtype(cls, dtype: TBaseDType) -> Self: if cls._check_native_dtype(dtype): @@ -195,7 +203,7 @@ class UTF8Base(ZDType[TDType_co, str], HasObjectCodec): as data type, but as a base class for other variable length string data types. """ - _zarr_v3_name: ClassVar[Literal["variable_length_utf8"]] = "variable_length_utf8" + _zarr_v3_name: ClassVar[Literal["string"]] = "string" object_codec_id: ClassVar[Literal["vlen-utf8"]] = "vlen-utf8" @classmethod @@ -222,7 +230,7 @@ def _check_json_v2( ) @classmethod - def _check_json_v3(cls, data: DTypeJSON) -> TypeGuard[Literal["variable_length_utf8"]]: + def _check_json_v3(cls, data: DTypeJSON) -> TypeGuard[Literal["string"]]: return data == cls._zarr_v3_name @classmethod @@ -246,15 +254,14 @@ def to_json( self, zarr_format: Literal[2] ) -> DTypeConfig_V2[Literal["|O"], Literal["vlen-utf8"]]: ... @overload - def to_json(self, zarr_format: Literal[3]) -> Literal["variable_length_utf8"]: ... + def to_json(self, zarr_format: Literal[3]) -> Literal["string"]: ... def to_json( self, zarr_format: ZarrFormat - ) -> DTypeConfig_V2[Literal["|O"], Literal["vlen-utf8"]] | Literal["variable_length_utf8"]: + ) -> DTypeConfig_V2[Literal["|O"], Literal["vlen-utf8"]] | Literal["string"]: if zarr_format == 2: return {"name": "|O", "object_codec_id": self.object_codec_id} elif zarr_format == 3: - v3_unstable_dtype_warning(self) return self._zarr_v3_name raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}") # pragma: no cover diff --git a/src/zarr/core/dtype/npy/structured.py b/src/zarr/core/dtype/npy/structured.py index bfdbf7bd95..8f40132820 100644 --- a/src/zarr/core/dtype/npy/structured.py +++ b/src/zarr/core/dtype/npy/structured.py @@ -37,6 +37,10 @@ class Structured(ZDType[np.dtypes.VoidDType[int], np.void], HasItemSize): _zarr_v3_name = "structured" fields: tuple[tuple[str, ZDType[TBaseDType, TBaseScalar]], ...] + def __post_init__(self) -> None: + if len(self.fields) < 1: + raise ValueError(f"must have at least one field. Got {self.fields!r}") + @classmethod def _check_native_dtype(cls, dtype: TBaseDType) -> TypeGuard[np.dtypes.VoidDType[int]]: """ diff --git a/tests/test_array.py b/tests/test_array.py index 28ea812967..bc27a30593 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -41,7 +41,7 @@ from zarr.core.chunk_grids import _auto_partition from zarr.core.chunk_key_encodings import ChunkKeyEncodingParams from zarr.core.common import JSON, MemoryOrder, ZarrFormat -from zarr.core.dtype import get_data_type_from_native_dtype +from zarr.core.dtype import parse_data_type from zarr.core.dtype.common import ENDIANNESS_STR, EndiannessStr from zarr.core.dtype.npy.common import NUMPY_ENDIANNESS_STR, endianness_from_numpy_str from zarr.core.dtype.npy.float import Float32, Float64 @@ -1285,7 +1285,7 @@ async def test_v2_chunk_encoding( filters=filters, ) filters_expected, compressor_expected = _parse_chunk_encoding_v2( - filters=filters, compressor=compressors, dtype=get_data_type_from_native_dtype(dtype) + filters=filters, compressor=compressors, dtype=parse_data_type(dtype, zarr_format=2) ) assert arr.metadata.zarr_format == 2 # guard for mypy assert arr.metadata.compressor == compressor_expected diff --git a/tests/test_dtype/test_npy/test_bytes.py b/tests/test_dtype/test_npy/test_bytes.py index b7c16f573e..3f1ba9315e 100644 --- a/tests/test_dtype/test_npy/test_bytes.py +++ b/tests/test_dtype/test_npy/test_bytes.py @@ -15,7 +15,7 @@ class TestNullTerminatedBytes(BaseTestZDType): np.dtype("|U10"), ) valid_json_v2 = ( - {"name": "|S0", "object_codec_id": None}, + {"name": "|S1", "object_codec_id": None}, {"name": "|S2", "object_codec_id": None}, {"name": "|S4", "object_codec_id": None}, ) @@ -31,22 +31,22 @@ class TestNullTerminatedBytes(BaseTestZDType): ) scalar_v2_params = ( - (NullTerminatedBytes(length=0), ""), + (NullTerminatedBytes(length=1), "MA=="), (NullTerminatedBytes(length=2), "YWI="), (NullTerminatedBytes(length=4), "YWJjZA=="), ) scalar_v3_params = ( - (NullTerminatedBytes(length=0), ""), + (NullTerminatedBytes(length=1), "MA=="), (NullTerminatedBytes(length=2), "YWI="), (NullTerminatedBytes(length=4), "YWJjZA=="), ) cast_value_params = ( - (NullTerminatedBytes(length=0), "", np.bytes_("")), + (NullTerminatedBytes(length=1), "", np.bytes_("")), (NullTerminatedBytes(length=2), "ab", np.bytes_("ab")), (NullTerminatedBytes(length=4), "abcdefg", np.bytes_("abcd")), ) item_size_params = ( - NullTerminatedBytes(length=0), + NullTerminatedBytes(length=1), NullTerminatedBytes(length=4), NullTerminatedBytes(length=10), ) @@ -62,7 +62,7 @@ class TestRawBytes(BaseTestZDType): ) valid_json_v2 = ({"name": "|V10", "object_codec_id": None},) valid_json_v3 = ( - {"name": "raw_bytes", "configuration": {"length_bytes": 0}}, + {"name": "raw_bytes", "configuration": {"length_bytes": 1}}, {"name": "raw_bytes", "configuration": {"length_bytes": 8}}, ) @@ -77,22 +77,22 @@ class TestRawBytes(BaseTestZDType): ) scalar_v2_params = ( - (RawBytes(length=0), ""), + (RawBytes(length=1), "AA=="), (RawBytes(length=2), "YWI="), (RawBytes(length=4), "YWJjZA=="), ) scalar_v3_params = ( - (RawBytes(length=0), ""), + (RawBytes(length=1), "AA=="), (RawBytes(length=2), "YWI="), (RawBytes(length=4), "YWJjZA=="), ) cast_value_params = ( - (RawBytes(length=0), b"", np.void(b"")), + (RawBytes(length=1), b"\x00", np.void(b"\x00")), (RawBytes(length=2), b"ab", np.void(b"ab")), (RawBytes(length=4), b"abcd", np.void(b"abcd")), ) item_size_params = ( - RawBytes(length=0), + RawBytes(length=1), RawBytes(length=4), RawBytes(length=10), ) @@ -152,3 +152,14 @@ def test_unstable_dtype_warning( """ with pytest.raises(UnstableSpecificationWarning): zdtype.to_json(zarr_format=3) + + +@pytest.mark.parametrize("zdtype_cls", [NullTerminatedBytes, RawBytes]) +def test_invalid_size(zdtype_cls: type[NullTerminatedBytes] | type[RawBytes]) -> None: + """ + Test that it's impossible to create a data type that has no length + """ + length = 0 + msg = f"length must be >= 1, got {length}." + with pytest.raises(ValueError, match=msg): + zdtype_cls(length=length) diff --git a/tests/test_dtype/test_npy/test_string.py b/tests/test_dtype/test_npy/test_string.py index 51b1c3df3a..7c3c6a8cd4 100644 --- a/tests/test_dtype/test_npy/test_string.py +++ b/tests/test_dtype/test_npy/test_string.py @@ -19,7 +19,7 @@ class TestVariableLengthString(BaseTestZDType): np.dtype("|S10"), ) valid_json_v2 = ({"name": "|O", "object_codec_id": "vlen-utf8"},) - valid_json_v3 = ("variable_length_utf8",) + valid_json_v3 = ("string",) invalid_json_v2 = ( "|S10", "|f8", @@ -53,7 +53,7 @@ class TestVariableLengthString(BaseTestZDType): # type: ignore[no-redef] np.dtype("|S10"), ) valid_json_v2 = ({"name": "|O", "object_codec_id": "vlen-utf8"},) - valid_json_v3 = ("variable_length_utf8",) + valid_json_v3 = ("string",) invalid_json_v2 = ( "|S10", "|f8", @@ -101,26 +101,31 @@ class TestFixedLengthUTF32(BaseTestZDType): {"name": "numpy.fixed_length_utf32", "configuration": {"length_bits": "invalid"}}, ) - scalar_v2_params = ((FixedLengthUTF32(length=0), ""), (FixedLengthUTF32(length=2), "hi")) + scalar_v2_params = ((FixedLengthUTF32(length=1), ""), (FixedLengthUTF32(length=2), "hi")) scalar_v3_params = ( - (FixedLengthUTF32(length=0), ""), + (FixedLengthUTF32(length=1), ""), (FixedLengthUTF32(length=2), "hi"), (FixedLengthUTF32(length=4), "hihi"), ) cast_value_params = ( - (FixedLengthUTF32(length=0), "", np.str_("")), + (FixedLengthUTF32(length=1), "", np.str_("")), (FixedLengthUTF32(length=2), "hi", np.str_("hi")), (FixedLengthUTF32(length=4), "hihi", np.str_("hihi")), ) item_size_params = ( - FixedLengthUTF32(length=0), + FixedLengthUTF32(length=1), FixedLengthUTF32(length=4), FixedLengthUTF32(length=10), ) -@pytest.mark.parametrize("zdtype", [FixedLengthUTF32(length=10), VariableLengthUTF8()]) +@pytest.mark.parametrize( + "zdtype", + [ + FixedLengthUTF32(length=10), + ], +) def test_unstable_dtype_warning(zdtype: FixedLengthUTF32 | VariableLengthUTF8) -> None: """ Test that we get a warning when serializing a dtype without a zarr v3 spec to json @@ -128,3 +133,13 @@ def test_unstable_dtype_warning(zdtype: FixedLengthUTF32 | VariableLengthUTF8) - """ with pytest.raises(UnstableSpecificationWarning): zdtype.to_json(zarr_format=3) + + +def test_invalid_size() -> None: + """ + Test that it's impossible to create a data type that has no length + """ + length = 0 + msg = f"length must be >= 1, got {length}." + with pytest.raises(ValueError, match=msg): + FixedLengthUTF32(length=length) diff --git a/tests/test_dtype/test_npy/test_structured.py b/tests/test_dtype/test_npy/test_structured.py index e9c9ab11d0..c51aa73ff3 100644 --- a/tests/test_dtype/test_npy/test_structured.py +++ b/tests/test_dtype/test_npy/test_structured.py @@ -3,6 +3,7 @@ from typing import Any import numpy as np +import pytest from tests.test_dtype.test_wrapper import BaseTestZDType from zarr.core.dtype import ( @@ -106,3 +107,13 @@ def scalar_equals(self, scalar1: Any, scalar2: Any) -> bool: Structured(fields=(("field1", Int32()), ("field2", Float64()))), Structured(fields=(("field1", Int64()), ("field2", Int32()))), ) + + +def test_invalid_size() -> None: + """ + Test that it's impossible to create a data type that has no fields + """ + fields = () + msg = f"must have at least one field. Got {fields!r}" + with pytest.raises(ValueError, match=msg): + Structured(fields=fields) diff --git a/tests/test_dtype_registry.py b/tests/test_dtype_registry.py index c7d5f90065..d4e37440a7 100644 --- a/tests/test_dtype_registry.py +++ b/tests/test_dtype_registry.py @@ -21,6 +21,7 @@ Int16, TBaseDType, TBaseScalar, + VariableLengthUTF8, ZDType, data_type_registry, get_data_type_from_json, @@ -176,6 +177,8 @@ def test_entrypoint_dtype(zarr_format: ZarrFormat) -> None: @pytest.mark.parametrize( ("dtype_params", "expected", "zarr_format"), [ + ("str", VariableLengthUTF8(), 2), + ("str", VariableLengthUTF8(), 3), ("int8", Int8(), 3), (Int8(), Int8(), 3), (">i2", Int16(endianness="big"), 2), diff --git a/tests/test_regression/scripts/v3.0.8.py b/tests/test_regression/scripts/v3.0.8.py new file mode 100644 index 0000000000..f93f43fd57 --- /dev/null +++ b/tests/test_regression/scripts/v3.0.8.py @@ -0,0 +1,68 @@ +# /// script +# requires-python = "==3.12" +# dependencies = [ +# "zarr==3.0.8" +# ] +# /// + + +import argparse + +import zarr +from zarr.abc.store import Store + +def copy_group( + *, node: zarr.Group, store: Store, path: str, overwrite: bool +) -> zarr.Group: + result = zarr.create_group( + store=store, + path=path, + overwrite=overwrite, + attributes=node.attrs.asdict(), + zarr_format=node.metadata.zarr_format) + for key, child in node.members(): + child_path = f"{path}/{key}" + if isinstance(child, zarr.Group): + copy_group(node=child, store=store, path=child_path, overwrite=overwrite) + else: + copy_array(node=child, store=store, overwrite=overwrite, path=child_path) + return result + + +def copy_array( + *, node: zarr.Array, store: Store, path: str, overwrite: bool +) -> zarr.Array: + result = zarr.from_array(store, name=path, data=node, write_data=True) + return result + + +def copy_node( + node: zarr.Group | zarr.Array, store: Store, path: str, overwrite: bool +) -> zarr.Group | zarr.Array: + if isinstance(node, zarr.Group): + return copy_group(node=node, store=store, path=path, overwrite=overwrite) + else: + return copy_array(node=node, store=store, path=path, overwrite=overwrite) + + +def cli() -> None: + parser = argparse.ArgumentParser( + description="Copy a zarr hierarchy from one location to another" + ) + parser.add_argument("source", type=str, help="Path to the source zarr hierarchy") + parser.add_argument("destination", type=str, help="Path to the destination zarr hierarchy") + args = parser.parse_args() + + src, dst = args.source, args.destination + root_src = zarr.open(src, mode="r") + result = copy_node(node=root_src, store=dst, path="", overwrite=True) + + print(f"successfully created {result} at {dst}") + + +def main() -> None: + cli() + + +if __name__ == "__main__": + main() diff --git a/tests/test_regression/test_regression.py b/tests/test_regression/test_v2_dtype_regression.py similarity index 52% rename from tests/test_regression/test_regression.py rename to tests/test_regression/test_v2_dtype_regression.py index 34c48a6933..9702ca7d23 100644 --- a/tests/test_regression/test_regression.py +++ b/tests/test_regression/test_v2_dtype_regression.py @@ -2,7 +2,7 @@ from dataclasses import dataclass from itertools import product from pathlib import Path -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Literal import numcodecs import numpy as np @@ -10,6 +10,9 @@ from numcodecs import LZ4, LZMA, Blosc, GZip, VLenBytes, VLenUTF8, Zstd import zarr +import zarr.abc +import zarr.abc.codec +import zarr.codecs as zarrcodecs from zarr.core.array import Array from zarr.core.chunk_key_encodings import V2ChunkKeyEncoding from zarr.core.dtype.npy.bytes import VariableLengthBytes @@ -19,6 +22,8 @@ if TYPE_CHECKING: from zarr.core.dtype import ZDTypeLike +ZarrPythonVersion = Literal["2.18", "3.0.8"] + def runner_installed() -> bool: """ @@ -36,6 +41,7 @@ class ArrayParams: values: np.ndarray[tuple[int], np.dtype[np.generic]] fill_value: np.generic | str | int | bytes filters: tuple[numcodecs.abc.Codec, ...] = () + serializer: str | None = None compressor: numcodecs.abc.Codec @@ -75,7 +81,7 @@ class ArrayParams: ArrayParams( values=np.array(["a", "bb", "ccc", "dddd"], dtype="O"), fill_value="1", - filters=(VLenUTF8(),), + serializer="vlen-utf8", compressor=GZip(), ) ] @@ -83,11 +89,11 @@ class ArrayParams: ArrayParams( values=np.array([b"a", b"bb", b"ccc", b"dddd"], dtype="O"), fill_value=b"1", - filters=(VLenBytes(),), + serializer="vlen-bytes", compressor=GZip(), ) ] -array_cases = ( +array_cases_v2_18 = ( basic_array_cases + bytes_array_cases + datetime_array_cases @@ -96,28 +102,37 @@ class ArrayParams: + vlen_bytes_cases ) +array_cases_v3_08 = vlen_string_cases + @pytest.fixture -def source_array(tmp_path: Path, request: pytest.FixtureRequest) -> Array: +def source_array_v2(tmp_path: Path, request: pytest.FixtureRequest) -> Array: + """ + Writes a zarr array to a temporary directory based on the provided ArrayParams. The array is + returned. + """ dest = tmp_path / "in" store = LocalStore(dest) array_params: ArrayParams = request.param compressor = array_params.compressor chunk_key_encoding = V2ChunkKeyEncoding(separator="/") dtype: ZDTypeLike - if array_params.values.dtype == np.dtype("|O") and array_params.filters == (VLenUTF8(),): + if array_params.values.dtype == np.dtype("|O") and array_params.serializer == "vlen-utf8": dtype = VariableLengthUTF8() # type: ignore[assignment] - elif array_params.values.dtype == np.dtype("|O") and array_params.filters == (VLenBytes(),): + filters = array_params.filters + (VLenUTF8(),) + elif array_params.values.dtype == np.dtype("|O") and array_params.serializer == "vlen-bytes": dtype = VariableLengthBytes() + filters = array_params.filters + (VLenBytes(),) else: dtype = array_params.values.dtype + filters = array_params.filters z = zarr.create_array( store, shape=array_params.values.shape, dtype=dtype, chunks=array_params.values.shape, compressors=compressor, - filters=array_params.filters, + filters=filters, fill_value=array_params.fill_value, order="C", chunk_key_encoding=chunk_key_encoding, @@ -128,23 +143,69 @@ def source_array(tmp_path: Path, request: pytest.FixtureRequest) -> Array: return z +@pytest.fixture +def source_array_v3(tmp_path: Path, request: pytest.FixtureRequest) -> Array: + """ + Writes a zarr array to a temporary directory based on the provided ArrayParams. The array is + returned. + """ + dest = tmp_path / "in" + store = LocalStore(dest) + array_params: ArrayParams = request.param + chunk_key_encoding = V2ChunkKeyEncoding(separator="/") + dtype: ZDTypeLike + serializer: Literal["auto"] | zarr.abc.codec.Codec + if array_params.values.dtype == np.dtype("|O") and array_params.serializer == "vlen-utf8": + dtype = VariableLengthUTF8() # type: ignore[assignment] + serializer = zarrcodecs.VLenUTF8Codec() + elif array_params.values.dtype == np.dtype("|O") and array_params.serializer == "vlen-bytes": + dtype = VariableLengthBytes() + serializer = zarrcodecs.VLenBytesCodec() + else: + dtype = array_params.values.dtype + serializer = "auto" + if array_params.compressor == GZip(): + compressor = zarrcodecs.GzipCodec() + else: + msg = ( + "This test is only compatible with gzip compression at the moment, because the author" + "did not want to implement a complete abstraction layer for v2 and v3 codecs in this test." + ) + raise ValueError(msg) + z = zarr.create_array( + store, + shape=array_params.values.shape, + dtype=dtype, + chunks=array_params.values.shape, + compressors=compressor, + filters=array_params.filters, + serializer=serializer, + fill_value=array_params.fill_value, + chunk_key_encoding=chunk_key_encoding, + write_data=True, + zarr_format=3, + ) + z[:] = array_params.values + return z + + # TODO: make this dynamic based on the installed scripts script_paths = [Path(__file__).resolve().parent / "scripts" / "v2.18.py"] @pytest.mark.skipif(not runner_installed(), reason="no python script runner installed") @pytest.mark.parametrize( - "source_array", array_cases, indirect=True, ids=tuple(map(str, array_cases)) + "source_array_v2", array_cases_v2_18, indirect=True, ids=tuple(map(str, array_cases_v2_18)) ) @pytest.mark.parametrize("script_path", script_paths) -def test_roundtrip(source_array: Array, tmp_path: Path, script_path: Path) -> None: +def test_roundtrip_v2(source_array_v2: Array, tmp_path: Path, script_path: Path) -> None: out_path = tmp_path / "out" copy_op = subprocess.run( [ "uv", "run", - script_path, - str(source_array.store).removeprefix("file://"), + str(script_path), + str(source_array_v2.store).removeprefix("file://"), str(out_path), ], capture_output=True, @@ -152,5 +213,30 @@ def test_roundtrip(source_array: Array, tmp_path: Path, script_path: Path) -> No ) assert copy_op.returncode == 0 out_array = zarr.open_array(store=out_path, mode="r", zarr_format=2) - assert source_array.metadata.to_dict() == out_array.metadata.to_dict() - assert np.array_equal(source_array[:], out_array[:]) + assert source_array_v2.metadata.to_dict() == out_array.metadata.to_dict() + assert np.array_equal(source_array_v2[:], out_array[:]) + + +@pytest.mark.skipif(not runner_installed(), reason="no python script runner installed") +@pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning") +@pytest.mark.parametrize( + "source_array_v3", array_cases_v3_08, indirect=True, ids=tuple(map(str, array_cases_v3_08)) +) +def test_roundtrip_v3(source_array_v3: Array, tmp_path: Path) -> None: + script_path = Path(__file__).resolve().parent / "scripts" / "v3.0.8.py" + out_path = tmp_path / "out" + copy_op = subprocess.run( + [ + "uv", + "run", + str(script_path), + str(source_array_v3.store).removeprefix("file://"), + str(out_path), + ], + capture_output=True, + text=True, + ) + assert copy_op.returncode == 0 + out_array = zarr.open_array(store=out_path, mode="r", zarr_format=3) + assert source_array_v3.metadata.to_dict() == out_array.metadata.to_dict() + assert np.array_equal(source_array_v3[:], out_array[:])