diff --git a/src/zarr/codecs/pipeline.py b/src/zarr/codecs/pipeline.py index 6828377f97..14d2877274 100644 --- a/src/zarr/codecs/pipeline.py +++ b/src/zarr/codecs/pipeline.py @@ -17,6 +17,7 @@ from zarr.core.common import ChunkCoords, concurrent_map from zarr.core.config import config from zarr.core.indexing import SelectorTuple, is_scalar, is_total_slice +from zarr.core.metadata.v2 import default_fill_value from zarr.registry import register_pipeline if TYPE_CHECKING: @@ -247,7 +248,17 @@ async def read_batch( if chunk_array is not None: out[out_selection] = chunk_array else: - out[out_selection] = chunk_spec.fill_value + fill_value = chunk_spec.fill_value + + if fill_value is None: + # Zarr V2 allowed `fill_value` to be null in the metadata. + # Zarr V3 requires it to be set. This has already been + # validated when decoding the metadata, but we support reading + # Zarr V2 data and need to support the case where fill_value + # is None. + fill_value = default_fill_value(dtype=chunk_spec.dtype) + + out[out_selection] = fill_value else: chunk_bytes_batch = await concurrent_map( [ @@ -274,7 +285,10 @@ async def read_batch( tmp = tmp.squeeze(axis=drop_axes) out[out_selection] = tmp else: - out[out_selection] = chunk_spec.fill_value + fill_value = chunk_spec.fill_value + if fill_value is None: + fill_value = default_fill_value(dtype=chunk_spec.dtype) + out[out_selection] = fill_value def _merge_chunk_array( self, diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 1b525fa99d..86d7214e96 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -382,7 +382,7 @@ async def _create_v2( chunk_grid=chunks, order=order, dimension_separator=dimension_separator, - fill_value=0 if fill_value is None else fill_value, + fill_value=fill_value, compressor=compressor, filters=filters, attributes=attributes, diff --git a/src/zarr/core/metadata/v2.py b/src/zarr/core/metadata/v2.py index 8bb6defb69..66a4f0aba6 100644 --- a/src/zarr/core/metadata/v2.py +++ b/src/zarr/core/metadata/v2.py @@ -302,3 +302,19 @@ def parse_fill_value(fill_value: object, dtype: np.dtype[Any]) -> Any: raise ValueError(msg) from e return fill_value + + +def default_fill_value(dtype: np.dtype[Any]) -> Any: + """ + Get the default fill value for a type. + + Notes + ----- + This differs from :func:`parse_fill_value`, which parses a fill value + stored in the Array metadata into an in-memory value. This only gives + the default fill value for some type. + + This is useful for reading Zarr V2 arrays, which allow the fill + value to be unspecified. + """ + return dtype.type(0) diff --git a/src/zarr/testing/strategies.py b/src/zarr/testing/strategies.py index 234454e289..8494d35939 100644 --- a/src/zarr/testing/strategies.py +++ b/src/zarr/testing/strategies.py @@ -140,7 +140,8 @@ def arrays( ) assert isinstance(a, Array) - assert a.fill_value is not None + if a.metadata.zarr_format == 3: + assert a.fill_value is not None assert isinstance(root[array_path], Array) assert nparray.shape == a.shape assert chunks == a.chunks diff --git a/tests/v3/test_metadata/test_v2.py b/tests/v3/test_metadata/test_v2.py index 399b6e174e..089d5c98e1 100644 --- a/tests/v3/test_metadata/test_v2.py +++ b/tests/v3/test_metadata/test_v2.py @@ -34,7 +34,7 @@ def test_parse_zarr_format_invalid(data: Any) -> None: @pytest.mark.parametrize("attributes", [None, {"foo": "bar"}]) @pytest.mark.parametrize("filters", [None, (), (numcodecs.GZip(),)]) @pytest.mark.parametrize("compressor", [None, numcodecs.GZip()]) -@pytest.mark.parametrize("fill_value", [0, 1]) +@pytest.mark.parametrize("fill_value", [None, 0, 1]) @pytest.mark.parametrize("order", ["C", "F"]) @pytest.mark.parametrize("dimension_separator", [".", "/", None]) def test_metadata_to_dict( diff --git a/tests/v3/test_v2.py b/tests/v3/test_v2.py index 23cb98deba..a04dc631ea 100644 --- a/tests/v3/test_v2.py +++ b/tests/v3/test_v2.py @@ -35,6 +35,15 @@ def test_simple(store: StorePath) -> None: assert np.array_equal(data, a[:, :]) +def test_implicit_fill_value(store: StorePath) -> None: + arr = zarr.open_array(store=store, shape=(4,), fill_value=None, zarr_format=2) + assert arr.metadata.fill_value is None + assert arr.metadata.to_dict()["fill_value"] is None + result = arr[:] + expected = np.zeros(arr.shape, dtype=arr.dtype) + np.testing.assert_array_equal(result, expected) + + def test_codec_pipeline() -> None: # https://github.com/zarr-developers/zarr-python/issues/2243 store = MemoryStore(mode="w")