From 6096a514ea729844355d1723f4e6160ee981e8cc Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Thu, 12 Sep 2024 22:15:54 +0200
Subject: [PATCH 1/4] fix: narrow JSON type, ensure compliance with it, and fix
 a variety of v2 metadata issues

---
 src/zarr/abc/metadata.py          |  2 +-
 src/zarr/codecs/_v2.py            | 11 +++---
 src/zarr/codecs/blosc.py          |  4 +-
 src/zarr/codecs/bytes.py          |  2 +-
 src/zarr/codecs/pipeline.py       |  9 ++++-
 src/zarr/codecs/sharding.py       | 12 +++---
 src/zarr/codecs/transpose.py      |  2 +-
 src/zarr/core/array.py            | 16 ++------
 src/zarr/core/common.py           |  6 +--
 src/zarr/core/metadata/v2.py      | 62 ++++++++++++++++++++-----------
 src/zarr/core/metadata/v3.py      | 41 ++++++++++----------
 tests/v3/test_metadata/test_v2.py |  8 ++--
 12 files changed, 95 insertions(+), 80 deletions(-)

diff --git a/src/zarr/abc/metadata.py b/src/zarr/abc/metadata.py
index d9b11af883..7ea668c891 100644
--- a/src/zarr/abc/metadata.py
+++ b/src/zarr/abc/metadata.py
@@ -15,7 +15,7 @@
 
 @dataclass(frozen=True)
 class Metadata:
-    def to_dict(self) -> JSON:
+    def to_dict(self) -> dict[str, JSON]:
         """
         Recursively serialize this model to a dictionary.
         This method inspects the fields of self and calls `x.to_dict()` for any fields that
diff --git a/src/zarr/codecs/_v2.py b/src/zarr/codecs/_v2.py
index eb8ec435f5..c8bc558349 100644
--- a/src/zarr/codecs/_v2.py
+++ b/src/zarr/codecs/_v2.py
@@ -67,7 +67,7 @@ def compute_encoded_size(self, _input_byte_length: int, _chunk_spec: ArraySpec)
 
 @dataclass(frozen=True)
 class V2Filters(ArrayArrayCodec):
-    filters: list[dict[str, JSON]]
+    filters: tuple[numcodecs.abc.Codec, ...] | None
 
     is_fixed_size = False
 
@@ -79,8 +79,7 @@ async def _decode_single(
         chunk_ndarray = chunk_array.as_ndarray_like()
         # apply filters in reverse order
         if self.filters is not None:
-            for filter_metadata in self.filters[::-1]:
-                filter = numcodecs.get_codec(filter_metadata)
+            for filter in self.filters[::-1]:
                 chunk_ndarray = await to_thread(filter.decode, chunk_ndarray)
 
         # ensure correct chunk shape
@@ -99,9 +98,9 @@ async def _encode_single(
     ) -> NDBuffer | None:
         chunk_ndarray = chunk_array.as_ndarray_like().ravel(order=chunk_spec.order)
 
-        for filter_metadata in self.filters:
-            filter = numcodecs.get_codec(filter_metadata)
-            chunk_ndarray = await to_thread(filter.encode, chunk_ndarray)
+        if self.filters is not None:
+            for filter in self.filters:
+                chunk_ndarray = await to_thread(filter.encode, chunk_ndarray)
 
         return get_ndbuffer_class().from_ndarray_like(chunk_ndarray)
 
diff --git a/src/zarr/codecs/blosc.py b/src/zarr/codecs/blosc.py
index f831dc960d..7b10d91a6a 100644
--- a/src/zarr/codecs/blosc.py
+++ b/src/zarr/codecs/blosc.py
@@ -127,9 +127,9 @@ def to_dict(self) -> dict[str, JSON]:
             "name": "blosc",
             "configuration": {
                 "typesize": self.typesize,
-                "cname": self.cname,
+                "cname": self.cname.value,
                 "clevel": self.clevel,
-                "shuffle": self.shuffle,
+                "shuffle": self.shuffle.value,
                 "blocksize": self.blocksize,
             },
         }
diff --git a/src/zarr/codecs/bytes.py b/src/zarr/codecs/bytes.py
index bc3207be2e..7a683411e9 100644
--- a/src/zarr/codecs/bytes.py
+++ b/src/zarr/codecs/bytes.py
@@ -53,7 +53,7 @@ def to_dict(self) -> dict[str, JSON]:
         if self.endian is None:
             return {"name": "bytes"}
         else:
-            return {"name": "bytes", "configuration": {"endian": self.endian}}
+            return {"name": "bytes", "configuration": {"endian": self.endian.value}}
 
     def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self:
         if array_spec.dtype.itemsize == 0:
diff --git a/src/zarr/codecs/pipeline.py b/src/zarr/codecs/pipeline.py
index 3a400d6eaf..a1a4dbaab1 100644
--- a/src/zarr/codecs/pipeline.py
+++ b/src/zarr/codecs/pipeline.py
@@ -84,8 +84,13 @@ def from_dict(cls, data: Iterable[JSON | Codec], *, batch_size: int | None = Non
                 out.append(get_codec_class(name_parsed).from_dict(c))  # type: ignore[arg-type]
         return cls.from_list(out, batch_size=batch_size)
 
-    def to_dict(self) -> JSON:
-        return [c.to_dict() for c in self]
+    def to_dict(self) -> dict[str, JSON]:
+        return {
+            "array_array_codecs": tuple(c.to_dict() for c in self.array_array_codecs),
+            "array_bytes_codec": self.array_bytes_codec.to_dict(),
+            "bytes_bytes_codec": tuple(c.to_dict() for c in self.bytes_bytes_codecs),
+            "batch_size": self.batch_size,
+        }
 
     def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self:
         return type(self).from_list([c.evolve_from_array_spec(array_spec=array_spec) for c in self])
diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py
index df7f5978a7..6f9df65692 100644
--- a/src/zarr/codecs/sharding.py
+++ b/src/zarr/codecs/sharding.py
@@ -68,7 +68,7 @@ class ShardingCodecIndexLocation(Enum):
     end = "end"
 
 
-def parse_index_location(data: JSON) -> ShardingCodecIndexLocation:
+def parse_index_location(data: object) -> ShardingCodecIndexLocation:
     return parse_enum(data, ShardingCodecIndexLocation)
 
 
@@ -333,7 +333,7 @@ def __init__(
         chunk_shape: ChunkCoordsLike,
         codecs: Iterable[Codec | dict[str, JSON]] = (BytesCodec(),),
         index_codecs: Iterable[Codec | dict[str, JSON]] = (BytesCodec(), Crc32cCodec()),
-        index_location: ShardingCodecIndexLocation = ShardingCodecIndexLocation.end,
+        index_location: ShardingCodecIndexLocation | str = ShardingCodecIndexLocation.end,
     ) -> None:
         chunk_shape_parsed = parse_shapelike(chunk_shape)
         codecs_parsed = parse_codecs(codecs)
@@ -379,10 +379,10 @@ def to_dict(self) -> dict[str, JSON]:
         return {
             "name": "sharding_indexed",
             "configuration": {
-                "chunk_shape": list(self.chunk_shape),
-                "codecs": [s.to_dict() for s in self.codecs],
-                "index_codecs": [s.to_dict() for s in self.index_codecs],
-                "index_location": self.index_location,
+                "chunk_shape": self.chunk_shape,
+                "codecs": tuple([s.to_dict() for s in self.codecs]),
+                "index_codecs": tuple([s.to_dict() for s in self.index_codecs]),
+                "index_location": self.index_location.value,
             },
         }
 
diff --git a/src/zarr/codecs/transpose.py b/src/zarr/codecs/transpose.py
index 9bb795a3a1..45eb5bbe5f 100644
--- a/src/zarr/codecs/transpose.py
+++ b/src/zarr/codecs/transpose.py
@@ -45,7 +45,7 @@ def from_dict(cls, data: dict[str, JSON]) -> Self:
         return cls(**configuration_parsed)  # type: ignore[arg-type]
 
     def to_dict(self) -> dict[str, JSON]:
-        return {"name": "transpose", "configuration": {"order": list(self.order)}}
+        return {"name": "transpose", "configuration": {"order": tuple(self.order)}}
 
     def validate(self, shape: tuple[int, ...], dtype: np.dtype[Any], chunk_grid: ChunkGrid) -> None:
         if len(self.order) != len(shape):
diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
index 7311b6eec2..b16daba0e2 100644
--- a/src/zarr/core/array.py
+++ b/src/zarr/core/array.py
@@ -90,7 +90,7 @@ def create_codec_pipeline(metadata: ArrayV2Metadata | ArrayV3Metadata) -> CodecP
         return get_pipeline_class().from_list(metadata.codecs)
     elif isinstance(metadata, ArrayV2Metadata):
         return get_pipeline_class().from_list(
-            [V2Filters(metadata.filters or []), V2Compressor(metadata.compressor)]
+            [V2Filters(metadata.filters or ()), V2Compressor(metadata.compressor)]
         )
     else:
         raise TypeError
@@ -299,8 +299,6 @@ async def _create_v2(
         attributes: dict[str, JSON] | None = None,
         exists_ok: bool = False,
     ) -> AsyncArray:
-        import numcodecs
-
         if not exists_ok:
             await ensure_no_existing_node(store_path, zarr_format=2)
         if order is None:
@@ -315,15 +313,9 @@ async def _create_v2(
             chunks=chunks,
             order=order,
             dimension_separator=dimension_separator,
-            fill_value=0 if fill_value is None else fill_value,
-            compressor=(
-                numcodecs.get_codec(compressor).get_config() if compressor is not None else None
-            ),
-            filters=(
-                [numcodecs.get_codec(filter).get_config() for filter in filters]
-                if filters is not None
-                else None
-            ),
+            fill_value=fill_value,
+            compressor=compressor,
+            filters=filters,
             attributes=attributes,
         )
         array = cls(metadata=metadata, store_path=store_path)
diff --git a/src/zarr/core/common.py b/src/zarr/core/common.py
index 99ab58fae9..906467005f 100644
--- a/src/zarr/core/common.py
+++ b/src/zarr/core/common.py
@@ -4,7 +4,7 @@
 import contextvars
 import functools
 import operator
-from collections.abc import Iterable
+from collections.abc import Iterable, Mapping
 from enum import Enum
 from typing import (
     TYPE_CHECKING,
@@ -32,7 +32,7 @@
 ChunkCoords = tuple[int, ...]
 ChunkCoordsLike = Iterable[int]
 ZarrFormat = Literal[2, 3]
-JSON = None | str | int | float | Enum | dict[str, "JSON"] | list["JSON"] | tuple["JSON", ...]
+JSON = None | str | int | float | Mapping[str, "JSON"] | tuple["JSON", ...]
 MemoryOrder = Literal["C", "F"]
 AccessModeLiteral = Literal["r", "r+", "a", "w", "w-"]
 
@@ -80,7 +80,7 @@ def enum_names(enum: type[E]) -> Iterator[str]:
         yield item.name
 
 
-def parse_enum(data: JSON, cls: type[E]) -> E:
+def parse_enum(data: object, cls: type[E]) -> E:
     if isinstance(data, cls):
         return data
     if not isinstance(data, str):
diff --git a/src/zarr/core/metadata/v2.py b/src/zarr/core/metadata/v2.py
index 6d5ecd7e86..af7821bea7 100644
--- a/src/zarr/core/metadata/v2.py
+++ b/src/zarr/core/metadata/v2.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+from collections.abc import Iterable
 from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
@@ -14,6 +15,7 @@
 import json
 from dataclasses import dataclass, field, replace
 
+import numcodecs
 import numpy as np
 
 from zarr.core.array_spec import ArraySpec
@@ -31,9 +33,9 @@ class ArrayV2Metadata(ArrayMetadata):
     data_type: np.dtype[Any]
     fill_value: None | int | float = 0
     order: Literal["C", "F"] = "C"
-    filters: list[dict[str, JSON]] | None = None
+    filters: tuple[numcodecs.abc.Codec, ...] | None = None
     dimension_separator: Literal[".", "/"] = "."
-    compressor: dict[str, JSON] | None = None
+    compressor: numcodecs.abc.Codec | None = None
     attributes: dict[str, JSON] = field(default_factory=dict)
     zarr_format: Literal[2] = field(init=False, default=2)
 
@@ -46,8 +48,8 @@ def __init__(
         fill_value: Any,
         order: Literal["C", "F"],
         dimension_separator: Literal[".", "/"] = ".",
-        compressor: dict[str, JSON] | None = None,
-        filters: list[dict[str, JSON]] | None = None,
+        compressor: numcodecs.abc.Codec | dict[str, JSON] | None = None,
+        filters: Iterable[numcodecs.abc.Codec | dict[str, JSON]] | None = None,
         attributes: dict[str, JSON] | None = None,
     ):
         """
@@ -104,11 +106,6 @@ def _json_convert(
             raise TypeError
 
         zarray_dict = self.to_dict()
-
-        # todo: remove this check when we can ensure that to_dict always returns dicts.
-        if not isinstance(zarray_dict, dict):
-            raise TypeError(f"Invalid type: got {type(zarray_dict)}, expected dict.")
-
         zattrs_dict = zarray_dict.pop("attributes", {})
         json_indent = config.get("json_indent")
         return {
@@ -128,13 +125,8 @@ def from_dict(cls, data: dict[str, Any]) -> ArrayV2Metadata:
         _ = parse_zarr_format(_data.pop("zarr_format"))
         return cls(**_data)
 
-    def to_dict(self) -> JSON:
+    def to_dict(self) -> dict[str, JSON]:
         zarray_dict = super().to_dict()
-
-        # todo: remove this check when we can ensure that to_dict always returns dicts.
-        if not isinstance(zarray_dict, dict):
-            raise TypeError(f"Invalid type: got {type(zarray_dict)}, expected dict.")
-
         _ = zarray_dict.pop("chunk_grid")
         zarray_dict["chunks"] = self.chunk_grid.chunk_shape
 
@@ -165,18 +157,44 @@ def update_attributes(self, attributes: dict[str, JSON]) -> Self:
         return replace(self, attributes=attributes)
 
 
-def parse_zarr_format(data: Literal[2]) -> Literal[2]:
+def parse_zarr_format(data: object) -> Literal[2]:
     if data == 2:
-        return data
+        return 2
     raise ValueError(f"Invalid value. Expected 2. Got {data}.")
 
 
-def parse_filters(data: list[dict[str, JSON]] | None) -> list[dict[str, JSON]] | None:
-    return data
+def parse_filters(data: object) -> tuple[numcodecs.abc.Codec, ...] | None:
+    """
+    Parse a potential tuple of filters
+    """
+    out: list[numcodecs.abc.Codec] = []
 
+    if data is None:
+        return data
+    if isinstance(data, Iterable):
+        for idx, val in enumerate(data):
+            if isinstance(val, numcodecs.abc.Codec):
+                out.append(val)
+            elif isinstance(val, dict):
+                out.append(numcodecs.get_codec(val))
+            else:
+                msg = f"Invalid filter at index {idx}. Expected a numcodecs.abc.Codec or a dict representation of numcodecs.abc.Codec. Got {type(val)} instead."
+                raise TypeError(msg)
+        return tuple(out)
+    msg = f"Invalid filters. Expected None, an iterable of numcodecs.abc.Codec or dict representations of numcodecs.abc.Codec. Got {type(data)} instead."
+    raise TypeError(msg)
 
-def parse_compressor(data: dict[str, JSON] | None) -> dict[str, JSON] | None:
-    return data
+
+def parse_compressor(data: object) -> numcodecs.abc.Codec | None:
+    """
+    Parse a potential compressor.
+    """
+    if data is None or isinstance(data, numcodecs.abc.Codec):
+        return data
+    if isinstance(data, dict):
+        return numcodecs.get_codec(data)
+    msg = f"Invalid compressor. Expected None, a numcodecs.abc.Codec, or a dict representation of a numcodecs.abc.Codec. Got {type(data)} instead."
+    raise ValueError(msg)
 
 
 def parse_metadata(data: ArrayV2Metadata) -> ArrayV2Metadata:
@@ -189,7 +207,7 @@ def parse_metadata(data: ArrayV2Metadata) -> ArrayV2Metadata:
     return data
 
 
-def parse_fill_value(fill_value: Any, dtype: np.dtype[Any]) -> Any:
+def parse_fill_value(fill_value: object, dtype: np.dtype[Any]) -> Any:
     """
     Parse a potential fill value into a value that is compatible with the provided dtype.
 
diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py
index 195c3bd0a2..068a079f76 100644
--- a/src/zarr/core/metadata/v3.py
+++ b/src/zarr/core/metadata/v3.py
@@ -30,19 +30,19 @@
 from zarr.registry import get_codec_class, get_pipeline_class
 
 
-def parse_zarr_format(data: Literal[3]) -> Literal[3]:
+def parse_zarr_format(data: object) -> Literal[3]:
     if data == 3:
-        return data
+        return 3
     raise ValueError(f"Invalid value. Expected 3. Got {data}.")
 
 
-def parse_node_type_array(data: Literal["array"]) -> Literal["array"]:
+def parse_node_type_array(data: object) -> Literal["array"]:
     if data == "array":
-        return data
+        return "array"
     raise ValueError(f"Invalid value. Expected 'array'. Got {data}.")
 
 
-def parse_codecs(data: Iterable[Codec | dict[str, JSON]]) -> tuple[Codec, ...]:
+def parse_codecs(data: object) -> tuple[Codec, ...]:
     out: tuple[Codec, ...] = ()
 
     if not isinstance(data, Iterable):
@@ -60,10 +60,10 @@ def parse_codecs(data: Iterable[Codec | dict[str, JSON]]) -> tuple[Codec, ...]:
     return out
 
 
-def parse_dimension_names(data: None | Iterable[str | None]) -> tuple[str | None, ...] | None:
+def parse_dimension_names(data: object) -> tuple[str | None, ...] | None:
     if data is None:
         return data
-    elif all(isinstance(x, type(None) | str) for x in data):
+    elif isinstance(data, Iterable) and all(isinstance(x, type(None) | str) for x in data):
         return tuple(data)
     else:
         msg = f"Expected either None or a iterable of str, got {type(data)}"
@@ -169,7 +169,7 @@ def encode_chunk_key(self, chunk_coords: ChunkCoords) -> str:
         return self.chunk_key_encoding.encode_chunk_key(chunk_coords)
 
     def to_buffer_dict(self, prototype: BufferPrototype) -> dict[str, Buffer]:
-        def _json_convert(o: Any) -> Any:
+        def _json_convert(o: object) -> Any:
             if isinstance(o, np.dtype):
                 return str(o)
             if np.isscalar(o):
@@ -206,14 +206,14 @@ def _json_convert(o: Any) -> Any:
         }
 
     @classmethod
-    def from_dict(cls, data: dict[str, JSON]) -> ArrayV3Metadata:
+    def from_dict(cls, data: dict[str, JSON]) -> Self:
         # make a copy because we are modifying the dict
         _data = data.copy()
-        # TODO: Remove the type: ignores[] comments below and use a TypedDict to type `data`
+
         # check that the zarr_format attribute is correct
-        _ = parse_zarr_format(_data.pop("zarr_format"))  # type: ignore[arg-type]
+        _ = parse_zarr_format(_data.pop("zarr_format"))
         # check that the node_type attribute is correct
-        _ = parse_node_type_array(_data.pop("node_type"))  # type: ignore[arg-type]
+        _ = parse_node_type_array(_data.pop("node_type"))
 
         # dimension_names key is optional, normalize missing to `None`
         _data["dimension_names"] = _data.pop("dimension_names", None)
@@ -221,7 +221,7 @@ def from_dict(cls, data: dict[str, JSON]) -> ArrayV3Metadata:
         _data["attributes"] = _data.pop("attributes", None)
         return cls(**_data)  # type: ignore[arg-type]
 
-    def to_dict(self) -> dict[str, Any]:
+    def to_dict(self) -> dict[str, JSON]:
         out_dict = super().to_dict()
 
         if not isinstance(out_dict, dict):
@@ -266,23 +266,23 @@ def create_pipeline(data: Iterable[Codec | JSON]) -> CodecPipeline:
 
 
 @overload
-def parse_fill_value(fill_value: Any, dtype: BOOL_DTYPE) -> BOOL: ...
+def parse_fill_value(fill_value: object, dtype: BOOL_DTYPE) -> BOOL: ...
 
 
 @overload
-def parse_fill_value(fill_value: Any, dtype: INTEGER_DTYPE) -> INTEGER: ...
+def parse_fill_value(fill_value: object, dtype: INTEGER_DTYPE) -> INTEGER: ...
 
 
 @overload
-def parse_fill_value(fill_value: Any, dtype: FLOAT_DTYPE) -> FLOAT: ...
+def parse_fill_value(fill_value: object, dtype: FLOAT_DTYPE) -> FLOAT: ...
 
 
 @overload
-def parse_fill_value(fill_value: Any, dtype: COMPLEX_DTYPE) -> COMPLEX: ...
+def parse_fill_value(fill_value: object, dtype: COMPLEX_DTYPE) -> COMPLEX: ...
 
 
 @overload
-def parse_fill_value(fill_value: Any, dtype: np.dtype[Any]) -> Any:
+def parse_fill_value(fill_value: object, dtype: np.dtype[Any]) -> Any:
     # This dtype[Any] is unfortunately necessary right now.
     # See https://github.com/zarr-developers/zarr-python/issues/2131#issuecomment-2318010899
     # for more details, but `dtype` here (which comes from `parse_dtype`)
@@ -294,7 +294,8 @@ def parse_fill_value(fill_value: Any, dtype: np.dtype[Any]) -> Any:
 
 
 def parse_fill_value(
-    fill_value: Any, dtype: BOOL_DTYPE | INTEGER_DTYPE | FLOAT_DTYPE | COMPLEX_DTYPE | np.dtype[Any]
+    fill_value: object,
+    dtype: BOOL_DTYPE | INTEGER_DTYPE | FLOAT_DTYPE | COMPLEX_DTYPE | np.dtype[Any],
 ) -> BOOL | INTEGER | FLOAT | COMPLEX | Any:
     """
     Parse `fill_value`, a potential fill value, into an instance of `dtype`, a data type.
@@ -333,7 +334,7 @@ def parse_fill_value(
                 raise ValueError(msg)
         msg = f"Cannot parse non-string sequence {fill_value} as a scalar with type {dtype}."
         raise TypeError(msg)
-    return dtype.type(fill_value)
+    return dtype.type(fill_value)  # type: ignore[arg-type]
 
 
 # For type checking
diff --git a/tests/v3/test_metadata/test_v2.py b/tests/v3/test_metadata/test_v2.py
index 4465a86471..3ea702eecd 100644
--- a/tests/v3/test_metadata/test_v2.py
+++ b/tests/v3/test_metadata/test_v2.py
@@ -9,9 +9,9 @@
 
     from zarr.abc.codec import Codec
 
+import numcodecs
 import pytest
 
-from zarr.codecs import GzipCodec
 from zarr.core.metadata.v2 import parse_zarr_format
 
 
@@ -26,14 +26,14 @@ def test_parse_zarr_format_invalid(data: Any) -> None:
 
 
 @pytest.mark.parametrize("attributes", [None, {"foo": "bar"}])
-@pytest.mark.parametrize("filters", [(), (GzipCodec().to_dict())])
-@pytest.mark.parametrize("compressor", [None, GzipCodec().to_dict()])
+@pytest.mark.parametrize("filters", [None, (), (numcodecs.GZip(),)])
+@pytest.mark.parametrize("compressor", [None, numcodecs.GZip()])
 @pytest.mark.parametrize("fill_value", [0, 1])
 @pytest.mark.parametrize("order", ["C", "F"])
 @pytest.mark.parametrize("dimension_separator", [".", "/", None])
 def test_metadata_to_dict(
     compressor: Codec | None,
-    filters: list[Codec] | None,
+    filters: tuple[Codec] | None,
     fill_value: Any,
     order: Literal["C", "F"],
     dimension_separator: Literal[".", "/"] | None,

From 7984ea7fb873cae7a3126cf293445b6b696b35db Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Thu, 12 Sep 2024 22:48:40 +0200
Subject: [PATCH 2/4] remove unneeded conditional

---
 src/zarr/core/array.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
index b16daba0e2..aaa8d4047d 100644
--- a/src/zarr/core/array.py
+++ b/src/zarr/core/array.py
@@ -90,7 +90,7 @@ def create_codec_pipeline(metadata: ArrayV2Metadata | ArrayV3Metadata) -> CodecP
         return get_pipeline_class().from_list(metadata.codecs)
     elif isinstance(metadata, ArrayV2Metadata):
         return get_pipeline_class().from_list(
-            [V2Filters(metadata.filters or ()), V2Compressor(metadata.compressor)]
+            [V2Filters(metadata.filters), V2Compressor(metadata.compressor)]
         )
     else:
         raise TypeError

From fdffbb81226515a5bfd3d74422f0c51414a3eaf8 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Fri, 13 Sep 2024 11:53:56 +0200
Subject: [PATCH 3/4] codecpipeline no longer inherits from metadata, ditches
 to_dict and from_dict methods

---
 src/zarr/abc/codec.py        | 12 +-----------
 src/zarr/codecs/pipeline.py  | 31 ++++---------------------------
 src/zarr/core/metadata/v3.py | 10 ++--------
 3 files changed, 7 insertions(+), 46 deletions(-)

diff --git a/src/zarr/abc/codec.py b/src/zarr/abc/codec.py
index cc32b9bcfc..d74e92464c 100644
--- a/src/zarr/abc/codec.py
+++ b/src/zarr/abc/codec.py
@@ -17,7 +17,6 @@
     from zarr.abc.store import ByteGetter, ByteSetter
     from zarr.core.array_spec import ArraySpec
     from zarr.core.chunk_grids import ChunkGrid
-    from zarr.core.common import JSON
     from zarr.core.indexing import SelectorTuple
 
 __all__ = [
@@ -242,7 +241,7 @@ async def encode_partial(
         )
 
 
-class CodecPipeline(Metadata):
+class CodecPipeline:
     """Base class for implementing CodecPipeline.
     A CodecPipeline implements the read and write paths for chunk data.
     On the read path, it is responsible for fetching chunks from a store (via ByteGetter),
@@ -402,15 +401,6 @@ async def write(
         """
         ...
 
-    @classmethod
-    def from_dict(cls, data: Iterable[JSON | Codec]) -> Self:
-        """
-        Create an instance of the model from a dictionary
-        """
-        ...
-
-        return cls(**data)
-
 
 async def _batching_helper(
     func: Callable[[CodecInput, ArraySpec], Awaitable[CodecOutput | None]],
diff --git a/src/zarr/codecs/pipeline.py b/src/zarr/codecs/pipeline.py
index a1a4dbaab1..8d3e354c5e 100644
--- a/src/zarr/codecs/pipeline.py
+++ b/src/zarr/codecs/pipeline.py
@@ -1,6 +1,5 @@
 from __future__ import annotations
 
-from collections.abc import Iterable, Iterator
 from dataclasses import dataclass
 from itertools import islice, pairwise
 from typing import TYPE_CHECKING, Any, TypeVar
@@ -15,12 +14,14 @@
     Codec,
     CodecPipeline,
 )
-from zarr.core.common import JSON, ChunkCoords, concurrent_map, parse_named_configuration
+from zarr.core.common import ChunkCoords, concurrent_map
 from zarr.core.config import config
 from zarr.core.indexing import SelectorTuple, is_scalar, is_total_slice
-from zarr.registry import get_codec_class, register_pipeline
+from zarr.registry import register_pipeline
 
 if TYPE_CHECKING:
+    from collections.abc import Iterable, Iterator
+
     import numpy as np
     from typing_extensions import Self
 
@@ -68,30 +69,6 @@ class BatchedCodecPipeline(CodecPipeline):
     bytes_bytes_codecs: tuple[BytesBytesCodec, ...]
     batch_size: int
 
-    @classmethod
-    def from_dict(cls, data: Iterable[JSON | Codec], *, batch_size: int | None = None) -> Self:
-        out: list[Codec] = []
-        if not isinstance(data, Iterable):
-            raise TypeError(f"Expected iterable, got {type(data)}")
-
-        for c in data:
-            if isinstance(
-                c, ArrayArrayCodec | ArrayBytesCodec | BytesBytesCodec
-            ):  # Can't use Codec here because of mypy limitation
-                out.append(c)
-            else:
-                name_parsed, _ = parse_named_configuration(c, require_configuration=False)
-                out.append(get_codec_class(name_parsed).from_dict(c))  # type: ignore[arg-type]
-        return cls.from_list(out, batch_size=batch_size)
-
-    def to_dict(self) -> dict[str, JSON]:
-        return {
-            "array_array_codecs": tuple(c.to_dict() for c in self.array_array_codecs),
-            "array_bytes_codec": self.array_bytes_codec.to_dict(),
-            "bytes_bytes_codec": tuple(c.to_dict() for c in self.bytes_bytes_codecs),
-            "batch_size": self.batch_size,
-        }
-
     def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self:
         return type(self).from_list([c.evolve_from_array_spec(array_spec=array_spec) for c in self])
 
diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py
index 068a079f76..10047cbb93 100644
--- a/src/zarr/core/metadata/v3.py
+++ b/src/zarr/core/metadata/v3.py
@@ -19,7 +19,7 @@
 import numcodecs.abc
 import numpy as np
 
-from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec, Codec, CodecPipeline
+from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec, Codec
 from zarr.core.array_spec import ArraySpec
 from zarr.core.buffer import default_buffer_prototype
 from zarr.core.chunk_grids import ChunkGrid, RegularChunkGrid
@@ -27,7 +27,7 @@
 from zarr.core.common import ZARR_JSON, parse_dtype, parse_named_configuration, parse_shapelike
 from zarr.core.config import config
 from zarr.core.metadata.common import ArrayMetadata, parse_attributes
-from zarr.registry import get_codec_class, get_pipeline_class
+from zarr.registry import get_codec_class
 
 
 def parse_zarr_format(data: object) -> Literal[3]:
@@ -240,12 +240,6 @@ def update_attributes(self, attributes: dict[str, JSON]) -> Self:
         return replace(self, attributes=attributes)
 
 
-def create_pipeline(data: Iterable[Codec | JSON]) -> CodecPipeline:
-    if not isinstance(data, Iterable):
-        raise TypeError(f"Expected iterable, got {type(data)}")
-    return get_pipeline_class().from_dict(data)
-
-
 BOOL = np.bool_
 BOOL_DTYPE = np.dtypes.BoolDType
 INTEGER_DTYPE = (

From 14db760c02206613ed7d3e38f2893e858876cd87 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Fri, 13 Sep 2024 11:55:06 +0200
Subject: [PATCH 4/4] rename from_list to from_codecs

---
 src/zarr/abc/codec.py       | 6 +++---
 src/zarr/codecs/pipeline.py | 4 ++--
 src/zarr/codecs/sharding.py | 8 ++++----
 src/zarr/core/array.py      | 4 ++--
 4 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/zarr/abc/codec.py b/src/zarr/abc/codec.py
index d74e92464c..2098d989e9 100644
--- a/src/zarr/abc/codec.py
+++ b/src/zarr/abc/codec.py
@@ -265,12 +265,12 @@ def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self:
 
     @classmethod
     @abstractmethod
-    def from_list(cls, codecs: Iterable[Codec]) -> Self:
-        """Creates a codec pipeline from a list of codecs.
+    def from_codecs(cls, codecs: Iterable[Codec]) -> Self:
+        """Creates a codec pipeline from an iterable of codecs.
 
         Parameters
         ----------
-        codecs : list[Codec]
+        codecs : Iterable[Codec]
 
         Returns
         -------
diff --git a/src/zarr/codecs/pipeline.py b/src/zarr/codecs/pipeline.py
index 8d3e354c5e..182621c59f 100644
--- a/src/zarr/codecs/pipeline.py
+++ b/src/zarr/codecs/pipeline.py
@@ -70,10 +70,10 @@ class BatchedCodecPipeline(CodecPipeline):
     batch_size: int
 
     def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self:
-        return type(self).from_list([c.evolve_from_array_spec(array_spec=array_spec) for c in self])
+        return type(self).from_codecs(c.evolve_from_array_spec(array_spec=array_spec) for c in self)
 
     @classmethod
-    def from_list(cls, codecs: Iterable[Codec], *, batch_size: int | None = None) -> Self:
+    def from_codecs(cls, codecs: Iterable[Codec], *, batch_size: int | None = None) -> Self:
         array_array_codecs, array_bytes_codec, bytes_bytes_codecs = codecs_from_list(codecs)
 
         return cls(
diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py
index 6f9df65692..3ae51ce54b 100644
--- a/src/zarr/codecs/sharding.py
+++ b/src/zarr/codecs/sharding.py
@@ -373,7 +373,7 @@ def from_dict(cls, data: dict[str, JSON]) -> Self:
 
     @property
     def codec_pipeline(self) -> CodecPipeline:
-        return get_pipeline_class().from_list(self.codecs)
+        return get_pipeline_class().from_codecs(self.codecs)
 
     def to_dict(self) -> dict[str, JSON]:
         return {
@@ -620,7 +620,7 @@ async def _decode_shard_index(
         index_array = next(
             iter(
                 await get_pipeline_class()
-                .from_list(self.index_codecs)
+                .from_codecs(self.index_codecs)
                 .decode(
                     [(index_bytes, self._get_index_chunk_spec(chunks_per_shard))],
                 )
@@ -633,7 +633,7 @@ async def _encode_shard_index(self, index: _ShardIndex) -> Buffer:
         index_bytes = next(
             iter(
                 await get_pipeline_class()
-                .from_list(self.index_codecs)
+                .from_codecs(self.index_codecs)
                 .encode(
                     [
                         (
@@ -651,7 +651,7 @@ async def _encode_shard_index(self, index: _ShardIndex) -> Buffer:
     def _shard_index_size(self, chunks_per_shard: ChunkCoords) -> int:
         return (
             get_pipeline_class()
-            .from_list(self.index_codecs)
+            .from_codecs(self.index_codecs)
             .compute_encoded_size(
                 16 * product(chunks_per_shard), self._get_index_chunk_spec(chunks_per_shard)
             )
diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
index aaa8d4047d..3a455b239f 100644
--- a/src/zarr/core/array.py
+++ b/src/zarr/core/array.py
@@ -87,9 +87,9 @@ def parse_array_metadata(data: Any) -> ArrayV2Metadata | ArrayV3Metadata:
 
 def create_codec_pipeline(metadata: ArrayV2Metadata | ArrayV3Metadata) -> CodecPipeline:
     if isinstance(metadata, ArrayV3Metadata):
-        return get_pipeline_class().from_list(metadata.codecs)
+        return get_pipeline_class().from_codecs(metadata.codecs)
     elif isinstance(metadata, ArrayV2Metadata):
-        return get_pipeline_class().from_list(
+        return get_pipeline_class().from_codecs(
             [V2Filters(metadata.filters), V2Compressor(metadata.compressor)]
         )
     else: