Skip to content

Commit 6970039

Browse files
normanrzTobias314
andauthored
Configurable Zarr3 codecs (#1343)
* adds MagView.rechunk method * make zarr3 codecs configurable * types * tests * fixes * fixes * types * types * changelog * test * changelog * add zarr3_codecs to more methods * types * changed signature to Zarr3Config * pr feedback * types * changelog + rechunk * docsrting * Update webknossos/webknossos/dataset/mag_view.py Co-authored-by: Tobias <[email protected]> * compress defaults to True --------- Co-authored-by: Tobias <[email protected]>
1 parent ad1d3e6 commit 6970039

File tree

5 files changed

+354
-105
lines changed

5 files changed

+354
-105
lines changed

webknossos/Changelog.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,11 @@ For upgrade instructions, please check the respective _Breaking Changes_ section
1616

1717
### Added
1818
- Added `MagView.rechunk` methods to allow for rechunking of datasets. [#1342](https://github.com/scalableminds/webknossos-libs/pull/1342)
19+
- Added the option to configure the codecs of Zarr3 datasets. Supply a `Zarr3Config` to the `compress` argument in `Layer.add_mag` or similar methods. `codecs` and `chunk_key_encoding` can be customized. [#1343](https://github.com/scalableminds/webknossos-libs/pull/1343)
1920

2021
### Changed
2122
- Enforces that `chunk_shape` and `shard_shape` have power-of-two values. This assumptions was used in the code previously, but not explicitly enforced. [#1342](https://github.com/scalableminds/webknossos-libs/pull/1342)
23+
- The `sharding_indexed` codec is no longer used if `chunk_shape == shard_shape`. [#1343](https://github.com/scalableminds/webknossos-libs/pull/1343)
2224

2325
### Fixed
2426

webknossos/tests/dataset/test_dataset.py

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
RemoteDataset,
2828
View,
2929
)
30+
from webknossos.dataset._array import Zarr3ArrayInfo, Zarr3Config
3031
from webknossos.dataset.data_format import AttachmentDataFormat, DataFormat
3132
from webknossos.dataset.dataset import PROPERTIES_FILE_NAME
3233
from webknossos.dataset.defaults import DEFAULT_DATA_FORMAT
@@ -2484,6 +2485,9 @@ def test_invalid_chunk_shard_shape(output_path: UPath) -> None:
24842485
with pytest.raises(ValueError, match=".*must be a multiple.*"):
24852486
layer.add_mag("1", chunk_shape=(16, 16, 16), shard_shape=(8, 16, 16))
24862487

2488+
with pytest.raises(ValueError, match=".*must be a multiple.*"):
2489+
layer.add_mag("1", chunk_shape=(16, 16, 16), shard_shape=(8, 8, 8))
2490+
24872491
with pytest.raises(ValueError, match=".*must be a multiple.*"):
24882492
# also not a power-of-two shard shape
24892493
layer.add_mag("1", chunk_shape=(16, 16, 16), shard_shape=(53, 16, 16))
@@ -2683,6 +2687,103 @@ def test_rechunking(data_format: DataFormat, output_path: UPath) -> None:
26832687
assure_exported_properties(mag1.layer.dataset)
26842688

26852689

2690+
@pytest.mark.parametrize("output_path", OUTPUT_PATHS)
2691+
def test_zarr3_config(output_path: UPath) -> None:
2692+
new_dataset_path = prepare_dataset_path(DataFormat.Zarr3, output_path)
2693+
ds = Dataset(new_dataset_path, voxel_size=(2, 2, 1))
2694+
mag1 = ds.add_layer(
2695+
"color", COLOR_CATEGORY, num_channels=3, data_format=DataFormat.Zarr3
2696+
).add_mag(
2697+
1,
2698+
compress=Zarr3Config(
2699+
codecs=(
2700+
{"name": "bytes"},
2701+
{"name": "gzip", "configuration": {"level": 3}},
2702+
),
2703+
chunk_key_encoding={
2704+
"name": "default",
2705+
"configuration": {"separator": "."},
2706+
},
2707+
),
2708+
)
2709+
2710+
# writing unaligned data to an uncompressed dataset
2711+
write_data = (np.random.rand(3, 10, 20, 30) * 255).astype(np.uint8)
2712+
mag1.write(write_data, absolute_offset=(60, 80, 100), allow_resize=True)
2713+
2714+
assert isinstance(mag1.info, Zarr3ArrayInfo)
2715+
assert mag1.info.codecs == (
2716+
{"name": "bytes"},
2717+
{"name": "gzip", "configuration": {"level": 3}},
2718+
)
2719+
assert mag1.info.chunk_key_encoding == {
2720+
"name": "default",
2721+
"configuration": {"separator": "."},
2722+
}
2723+
assert (mag1.path / "c.0.0.0.0").exists()
2724+
assert json.loads((mag1.path / "zarr.json").read_bytes())["codecs"][0][
2725+
"configuration"
2726+
]["codecs"] == [
2727+
{"name": "bytes"},
2728+
{"name": "gzip", "configuration": {"level": 3}},
2729+
]
2730+
2731+
np.testing.assert_array_equal(
2732+
write_data, mag1.read(absolute_offset=(60, 80, 100), size=(10, 20, 30))
2733+
)
2734+
2735+
assure_exported_properties(mag1.layer.dataset)
2736+
2737+
2738+
@pytest.mark.parametrize("output_path", OUTPUT_PATHS)
2739+
def test_zarr3_sharding(output_path: UPath) -> None:
2740+
new_dataset_path = prepare_dataset_path(DataFormat.Zarr3, output_path)
2741+
ds = Dataset(new_dataset_path, voxel_size=(2, 2, 1))
2742+
mag1 = ds.add_layer(
2743+
"color", COLOR_CATEGORY, num_channels=3, data_format=DataFormat.Zarr3
2744+
).add_mag(1, chunk_shape=(32, 32, 32), shard_shape=(64, 64, 64))
2745+
2746+
# writing unaligned data to an uncompressed dataset
2747+
write_data = (np.random.rand(3, 10, 20, 30) * 255).astype(np.uint8)
2748+
mag1.write(write_data, absolute_offset=(60, 80, 100), allow_resize=True)
2749+
2750+
assert (
2751+
json.loads((mag1.path / "zarr.json").read_bytes())["codecs"][0]["name"]
2752+
== "sharding_indexed"
2753+
)
2754+
2755+
np.testing.assert_array_equal(
2756+
write_data, mag1.read(absolute_offset=(60, 80, 100), size=(10, 20, 30))
2757+
)
2758+
2759+
assure_exported_properties(mag1.layer.dataset)
2760+
2761+
2762+
@pytest.mark.parametrize("output_path", OUTPUT_PATHS)
2763+
def test_zarr3_no_sharding(output_path: UPath) -> None:
2764+
new_dataset_path = prepare_dataset_path(DataFormat.Zarr3, output_path)
2765+
ds = Dataset(new_dataset_path, voxel_size=(2, 2, 1))
2766+
mag1 = ds.add_layer(
2767+
"color", COLOR_CATEGORY, num_channels=3, data_format=DataFormat.Zarr3
2768+
).add_mag(1, chunk_shape=(32, 32, 32), shard_shape=(32, 32, 32))
2769+
2770+
# writing unaligned data to an uncompressed dataset
2771+
write_data = (np.random.rand(3, 10, 20, 30) * 255).astype(np.uint8)
2772+
mag1.write(write_data, absolute_offset=(60, 80, 100), allow_resize=True)
2773+
2774+
# Don't set up a sharding codec, if no sharding is necessary, i.e. chunk_shape == shard_shape
2775+
assert (
2776+
json.loads((mag1.path / "zarr.json").read_bytes())["codecs"][0]["name"]
2777+
!= "sharding_indexed"
2778+
)
2779+
2780+
np.testing.assert_array_equal(
2781+
write_data, mag1.read(absolute_offset=(60, 80, 100), size=(10, 20, 30))
2782+
)
2783+
2784+
assure_exported_properties(mag1.layer.dataset)
2785+
2786+
26862787
def test_dataset_view_configuration() -> None:
26872788
ds_path = prepare_dataset_path(DataFormat.WKW, TESTOUTPUT_DIR)
26882789
ds1 = Dataset(ds_path, voxel_size=(2, 2, 1))

0 commit comments

Comments
 (0)