From e642337c38a14c4430d8ca1dd912830358a4d9b2 Mon Sep 17 00:00:00 2001
From: "Mads R. B. Kristensen" <madsbk@gmail.com>
Date: Tue, 30 Apr 2024 09:53:11 +0200
Subject: [PATCH 01/45] use Buffer

---
 src/zarr/v3/__init__.py        |  1 +
 src/zarr/v3/abc/codec.py       | 15 ++++----
 src/zarr/v3/abc/store.py       | 13 ++++---
 src/zarr/v3/array.py           | 11 +++---
 src/zarr/v3/array_v2.py        | 19 +++++----
 src/zarr/v3/buffer.py          | 70 ++++++++++++++++++++++++++++++++++
 src/zarr/v3/codecs/blosc.py    | 18 ++++-----
 src/zarr/v3/codecs/bytes.py    | 13 ++++---
 src/zarr/v3/codecs/crc32c_.py  | 21 +++++-----
 src/zarr/v3/codecs/gzip.py     | 15 ++++----
 src/zarr/v3/codecs/pipeline.py |  8 ++--
 src/zarr/v3/codecs/sharding.py | 55 +++++++++++++-------------
 src/zarr/v3/codecs/zstd.py     | 15 ++++----
 src/zarr/v3/group.py           | 33 ++++++++++------
 src/zarr/v3/metadata.py        |  5 ++-
 src/zarr/v3/store/core.py      |  8 ++--
 src/zarr/v3/store/local.py     | 23 +++++------
 src/zarr/v3/store/memory.py    | 18 ++++-----
 src/zarr/v3/store/remote.py    |  6 +--
 tests/v3/test_codecs.py        | 33 ++++++++--------
 20 files changed, 249 insertions(+), 151 deletions(-)
 create mode 100644 src/zarr/v3/buffer.py

diff --git a/src/zarr/v3/__init__.py b/src/zarr/v3/__init__.py
index 3441fa67be..c046cc01f0 100644
--- a/src/zarr/v3/__init__.py
+++ b/src/zarr/v3/__init__.py
@@ -2,6 +2,7 @@
 
 from typing import Union
 
+
 import zarr.v3.codecs  # noqa: F401
 from zarr.v3.array import Array, AsyncArray  # noqa: F401
 from zarr.v3.array_v2 import ArrayV2
diff --git a/src/zarr/v3/abc/codec.py b/src/zarr/v3/abc/codec.py
index d0e51ff894..796f321465 100644
--- a/src/zarr/v3/abc/codec.py
+++ b/src/zarr/v3/abc/codec.py
@@ -7,12 +7,13 @@
 from zarr.v3.abc.metadata import Metadata
 
 from zarr.v3.common import ArraySpec
+from zarr.v3.buffer import Buffer
 from zarr.v3.store import StorePath
 
 
 if TYPE_CHECKING:
     from typing_extensions import Self
-    from zarr.v3.common import BytesLike, SliceSelection
+    from zarr.v3.common import SliceSelection
     from zarr.v3.metadata import ArrayMetadata
     from zarr.v3.config import RuntimeConfiguration
 
@@ -58,7 +59,7 @@ class ArrayBytesCodec(Codec):
     @abstractmethod
     async def decode(
         self,
-        chunk_array: BytesLike,
+        chunk_array: Buffer,
         chunk_spec: ArraySpec,
         runtime_configuration: RuntimeConfiguration,
     ) -> np.ndarray:
@@ -70,7 +71,7 @@ async def encode(
         chunk_array: np.ndarray,
         chunk_spec: ArraySpec,
         runtime_configuration: RuntimeConfiguration,
-    ) -> Optional[BytesLike]:
+    ) -> Optional[Buffer]:
         pass
 
 
@@ -103,17 +104,17 @@ class BytesBytesCodec(Codec):
     @abstractmethod
     async def decode(
         self,
-        chunk_array: BytesLike,
+        chunk_array: Buffer,
         chunk_spec: ArraySpec,
         runtime_configuration: RuntimeConfiguration,
-    ) -> BytesLike:
+    ) -> Buffer:
         pass
 
     @abstractmethod
     async def encode(
         self,
-        chunk_array: BytesLike,
+        chunk_array: Buffer,
         chunk_spec: ArraySpec,
         runtime_configuration: RuntimeConfiguration,
-    ) -> Optional[BytesLike]:
+    ) -> Optional[Buffer]:
         pass
diff --git a/src/zarr/v3/abc/store.py b/src/zarr/v3/abc/store.py
index ce5de279c4..7f9698f88b 100644
--- a/src/zarr/v3/abc/store.py
+++ b/src/zarr/v3/abc/store.py
@@ -2,12 +2,14 @@
 
 from typing import List, Tuple, Optional
 
+from zarr.v3.buffer import Buffer
+
 
 class Store(ABC):
     @abstractmethod
     async def get(
         self, key: str, byte_range: Optional[Tuple[int, Optional[int]]] = None
-    ) -> Optional[bytes]:
+    ) -> Optional[Buffer]:
         """Retrieve the value associated with a given key.
 
         Parameters
@@ -17,14 +19,14 @@ async def get(
 
         Returns
         -------
-        bytes
+        Buffer
         """
         ...
 
     @abstractmethod
     async def get_partial_values(
         self, key_ranges: List[Tuple[str, Tuple[int, int]]]
-    ) -> List[bytes]:
+    ) -> List[Buffer]:
         """Retrieve possibly partial values from given key_ranges.
 
         Parameters
@@ -34,8 +36,7 @@ async def get_partial_values(
 
         Returns
         -------
-        list[bytes]
-            list of values, in the order of the key_ranges, may contain null/none for missing keys
+        list of values, in the order of the key_ranges, may contain null/none for missing keys
         """
         ...
 
@@ -60,7 +61,7 @@ def supports_writes(self) -> bool:
         ...
 
     @abstractmethod
-    async def set(self, key: str, value: bytes) -> None:
+    async def set(self, key: str, value: Buffer) -> None:
         """Store a (key, value) pair.
 
         Parameters
diff --git a/src/zarr/v3/array.py b/src/zarr/v3/array.py
index c0a00a624e..54f0d03a95 100644
--- a/src/zarr/v3/array.py
+++ b/src/zarr/v3/array.py
@@ -35,6 +35,7 @@
 from zarr.v3.chunk_grids import RegularChunkGrid
 from zarr.v3.chunk_key_encodings import DefaultChunkKeyEncoding, V2ChunkKeyEncoding
 from zarr.v3.metadata import ArrayMetadata
+from zarr.v3.buffer import as_buffer
 from zarr.v3.store import StoreLike, StorePath, make_store_path
 from zarr.v3.sync import sync
 
@@ -150,7 +151,7 @@ async def open(
         assert zarr_json_bytes is not None
         return cls.from_dict(
             store_path,
-            json.loads(zarr_json_bytes),
+            json.loads(zarr_json_bytes.as_bytearray()),
             runtime_configuration=runtime_configuration,
         )
 
@@ -165,7 +166,7 @@ async def open_auto(
         if v3_metadata_bytes is not None:
             return cls.from_dict(
                 store_path,
-                json.loads(v3_metadata_bytes),
+                json.loads(v3_metadata_bytes.as_bytearray()),
                 runtime_configuration=runtime_configuration or RuntimeConfiguration(),
             )
         else:
@@ -223,7 +224,7 @@ async def getitem(self, selection: Selection):
             return out[()]
 
     async def _save_metadata(self) -> None:
-        await (self.store_path / ZARR_JSON).set(self.metadata.to_bytes())
+        await (self.store_path / ZARR_JSON).set(as_buffer(self.metadata.to_bytes()))
 
     async def _read_chunk(
         self,
@@ -392,14 +393,14 @@ async def _delete_key(key: str) -> None:
         )
 
         # Write new metadata
-        await (self.store_path / ZARR_JSON).set(new_metadata.to_bytes())
+        await (self.store_path / ZARR_JSON).set(as_buffer(new_metadata))
         return replace(self, metadata=new_metadata)
 
     async def update_attributes(self, new_attributes: Dict[str, Any]) -> AsyncArray:
         new_metadata = replace(self.metadata, attributes=new_attributes)
 
         # Write new metadata
-        await (self.store_path / ZARR_JSON).set(new_metadata.to_bytes())
+        await (self.store_path / ZARR_JSON).set(as_buffer(new_metadata))
         return replace(self, metadata=new_metadata)
 
     def __repr__(self):
diff --git a/src/zarr/v3/array_v2.py b/src/zarr/v3/array_v2.py
index f150d2dbd2..4f6cbece8c 100644
--- a/src/zarr/v3/array_v2.py
+++ b/src/zarr/v3/array_v2.py
@@ -23,6 +23,7 @@
 from zarr.v3.config import RuntimeConfiguration
 from zarr.v3.indexing import BasicIndexer, all_chunk_coords, is_total_slice
 from zarr.v3.metadata import ArrayV2Metadata
+from zarr.v3.buffer import as_buffer, as_bytearray
 from zarr.v3.store import StoreLike, StorePath, make_store_path
 from zarr.v3.sync import sync
 
@@ -152,8 +153,10 @@ async def open_async(
         assert zarray_bytes is not None
         return cls.from_dict(
             store_path,
-            zarray_json=json.loads(zarray_bytes),
-            zattrs_json=json.loads(zattrs_bytes) if zattrs_bytes is not None else None,
+            zarray_json=json.loads(zarray_bytes.as_bytearray()),
+            zattrs_json=json.loads(zattrs_bytes.as_bytearray())
+            if zattrs_bytes is not None
+            else None,
             runtime_configuration=runtime_configuration,
         )
 
@@ -192,7 +195,7 @@ async def _save_metadata(self) -> None:
         await (self.store_path / ZARRAY_JSON).set(self.metadata.to_bytes())
         if self.attributes is not None and len(self.attributes) > 0:
             await (self.store_path / ZATTRS_JSON).set(
-                json.dumps(self.attributes).encode(),
+                as_buffer(json.dumps(self.attributes).encode()),
             )
         else:
             await (self.store_path / ZATTRS_JSON).delete()
@@ -258,7 +261,7 @@ async def _read_chunk(
     ):
         store_path = self.store_path / self._encode_chunk_key(chunk_coords)
 
-        chunk_array = await self._decode_chunk(await store_path.get())
+        chunk_array = await self._decode_chunk(as_bytearray(await store_path.get()))
         if chunk_array is not None:
             tmp = chunk_array[chunk_selection]
             out[out_selection] = tmp
@@ -359,7 +362,7 @@ async def _write_chunk(
         else:
             # writing partial chunks
             # read chunk first
-            tmp = await self._decode_chunk(await store_path.get())
+            tmp = await self._decode_chunk(as_bytearray(await store_path.get()))
 
             # merge new value
             if tmp is None:
@@ -387,7 +390,7 @@ async def _write_chunk_to_store(self, store_path: StorePath, chunk_array: np.nda
             if chunk_bytes is None:
                 await store_path.delete()
             else:
-                await store_path.set(chunk_bytes)
+                await store_path.set(as_buffer(chunk_bytes))
 
     async def _encode_chunk(self, chunk_array: np.ndarray) -> Optional[BytesLike]:
         chunk_array = chunk_array.ravel(order=self.metadata.order)
@@ -506,7 +509,7 @@ async def convert_to_v3_async(self) -> Array:
         )
 
         new_metadata_bytes = new_metadata.to_bytes()
-        await (self.store_path / ZARR_JSON).set(new_metadata_bytes)
+        await (self.store_path / ZARR_JSON).set(as_buffer(new_metadata_bytes))
 
         return Array.from_dict(
             store_path=self.store_path,
@@ -515,7 +518,7 @@ async def convert_to_v3_async(self) -> Array:
         )
 
     async def update_attributes_async(self, new_attributes: Dict[str, Any]) -> ArrayV2:
-        await (self.store_path / ZATTRS_JSON).set(json.dumps(new_attributes).encode())
+        await (self.store_path / ZATTRS_JSON).set(as_buffer(json.dumps(new_attributes).encode()))
         return replace(self, attributes=new_attributes)
 
     def update_attributes(self, new_attributes: Dict[str, Any]) -> ArrayV2:
diff --git a/src/zarr/v3/buffer.py b/src/zarr/v3/buffer.py
new file mode 100644
index 0000000000..669e9059c7
--- /dev/null
+++ b/src/zarr/v3/buffer.py
@@ -0,0 +1,70 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any, Optional
+import numpy as np
+
+
+if TYPE_CHECKING:
+    from typing_extensions import Self
+
+
+class NDBuffer:
+    # TODO: replace np.ndarray with this n-dimensional buffer
+    pass
+
+
+class Buffer(NDBuffer):
+    """Contiguous memory block
+
+    We use `Buffer` throughout Zarr to represent a contiguous block of memory.
+    For now, we only support host memory but the plan is to support other types
+    of memory such as CUDA device memory.
+    """
+
+    def __init__(self, data: bytearray):
+        assert isinstance(data, bytearray)
+        self._data = data
+
+    def as_bytearray(self) -> bytearray:
+        return self._data
+
+    def as_numpy_array(self, dtype: np.DTypeLike) -> np.ndarray:
+        return np.frombuffer(self._data, dtype=dtype)
+
+    def __getitem__(self, key) -> Self:
+        return self.__class__(self.as_bytearray().__getitem__(key))
+
+    def __setitem__(self, key, value) -> None:
+        self.as_bytearray().__setitem__(key, value)
+
+    def __len__(self) -> int:
+        return len(self.as_bytearray())
+
+    def __add__(self, other: Buffer) -> Self:
+        return self.__class__(self.as_bytearray() + other.as_bytearray())
+
+
+def as_buffer(data: Any) -> Buffer:
+    if isinstance(data, Buffer):
+        return data
+    if isinstance(data, bytearray):
+        return Buffer(data)
+    if isinstance(data, bytes):
+        return Buffer(bytearray(data))
+    if hasattr(data, "to_bytes"):
+        return as_buffer(data.to_bytes())
+    return Buffer(bytearray(np.asarray(data)))
+
+
+def as_bytes_wrapper(func, buf: Buffer) -> Buffer:
+    return as_buffer(func(buf.as_bytearray()))
+
+
+def return_as_bytes_wrapper(func, *arg, **kwargs) -> Buffer:
+    return as_buffer(func(*arg, **kwargs))
+
+
+def as_bytearray(data: Optional[Buffer]):
+    if data is None:
+        return data
+    return data.as_bytearray()
diff --git a/src/zarr/v3/codecs/blosc.py b/src/zarr/v3/codecs/blosc.py
index 479865241f..f59e08ac17 100644
--- a/src/zarr/v3/codecs/blosc.py
+++ b/src/zarr/v3/codecs/blosc.py
@@ -6,17 +6,17 @@
 from typing import TYPE_CHECKING, Union
 
 import numcodecs
-import numpy as np
 from numcodecs.blosc import Blosc
 
 from zarr.v3.abc.codec import BytesBytesCodec
 from zarr.v3.codecs.registry import register_codec
 from zarr.v3.common import parse_enum, parse_named_configuration, to_thread
+from zarr.v3.buffer import Buffer, as_bytes_wrapper, return_as_bytes_wrapper
 
 if TYPE_CHECKING:
     from typing import Dict, Optional
     from typing_extensions import Self
-    from zarr.v3.common import JSON, ArraySpec, BytesLike
+    from zarr.v3.common import JSON, ArraySpec
     from zarr.v3.config import RuntimeConfiguration
 
 
@@ -161,20 +161,20 @@ def _blosc_codec(self) -> Blosc:
 
     async def decode(
         self,
-        chunk_bytes: bytes,
+        chunk_bytes: Buffer,
         _chunk_spec: ArraySpec,
         _runtime_configuration: RuntimeConfiguration,
-    ) -> BytesLike:
-        return await to_thread(self._blosc_codec.decode, chunk_bytes)
+    ) -> Buffer:
+        return await to_thread(as_bytes_wrapper, self._blosc_codec.decode, chunk_bytes)
 
     async def encode(
         self,
-        chunk_bytes: bytes,
+        chunk_bytes: Buffer,
         chunk_spec: ArraySpec,
         _runtime_configuration: RuntimeConfiguration,
-    ) -> Optional[BytesLike]:
-        chunk_array = np.frombuffer(chunk_bytes, dtype=chunk_spec.dtype)
-        return await to_thread(self._blosc_codec.encode, chunk_array)
+    ) -> Optional[Buffer]:
+        chunk_array = chunk_bytes.as_numpy_array(chunk_spec.dtype)
+        return await to_thread(return_as_bytes_wrapper, self._blosc_codec.encode, chunk_array)
 
     def compute_encoded_size(self, _input_byte_length: int, _chunk_spec: ArraySpec) -> int:
         raise NotImplementedError
diff --git a/src/zarr/v3/codecs/bytes.py b/src/zarr/v3/codecs/bytes.py
index f92fe5606d..6ae9e7ddda 100644
--- a/src/zarr/v3/codecs/bytes.py
+++ b/src/zarr/v3/codecs/bytes.py
@@ -10,9 +10,10 @@
 from zarr.v3.abc.codec import ArrayBytesCodec
 from zarr.v3.codecs.registry import register_codec
 from zarr.v3.common import parse_enum, parse_named_configuration
+from zarr.v3.buffer import Buffer, as_buffer
 
 if TYPE_CHECKING:
-    from zarr.v3.common import JSON, ArraySpec, BytesLike
+    from zarr.v3.common import JSON, ArraySpec
     from zarr.v3.config import RuntimeConfiguration
     from typing_extensions import Self
 
@@ -70,10 +71,11 @@ def _get_byteorder(self, array: np.ndarray) -> Endian:
 
     async def decode(
         self,
-        chunk_bytes: BytesLike,
+        chunk_bytes: Buffer,
         chunk_spec: ArraySpec,
         _runtime_configuration: RuntimeConfiguration,
     ) -> np.ndarray:
+        assert isinstance(chunk_bytes, Buffer)
         if chunk_spec.dtype.itemsize > 0:
             if self.endian == Endian.little:
                 prefix = "<"
@@ -82,8 +84,7 @@ async def decode(
             dtype = np.dtype(f"{prefix}{chunk_spec.dtype.str[1:]}")
         else:
             dtype = np.dtype(f"|{chunk_spec.dtype.str[1:]}")
-        print(dtype)
-        chunk_array = np.frombuffer(chunk_bytes, dtype)
+        chunk_array = chunk_bytes.as_numpy_array(dtype)
 
         # ensure correct chunk shape
         if chunk_array.shape != chunk_spec.shape:
@@ -97,13 +98,13 @@ async def encode(
         chunk_array: np.ndarray,
         _chunk_spec: ArraySpec,
         _runtime_configuration: RuntimeConfiguration,
-    ) -> Optional[BytesLike]:
+    ) -> Optional[Buffer]:
         if chunk_array.dtype.itemsize > 1:
             byteorder = self._get_byteorder(chunk_array)
             if self.endian is not None and self.endian != byteorder:
                 new_dtype = chunk_array.dtype.newbyteorder(self.endian.name)
                 chunk_array = chunk_array.astype(new_dtype)
-        return chunk_array.tobytes()
+        return as_buffer(chunk_array)
 
     def compute_encoded_size(self, input_byte_length: int, _chunk_spec: ArraySpec) -> int:
         return input_byte_length
diff --git a/src/zarr/v3/codecs/crc32c_.py b/src/zarr/v3/codecs/crc32c_.py
index 555bdeae3b..5677018f5e 100644
--- a/src/zarr/v3/codecs/crc32c_.py
+++ b/src/zarr/v3/codecs/crc32c_.py
@@ -10,11 +10,12 @@
 from zarr.v3.abc.codec import BytesBytesCodec
 from zarr.v3.codecs.registry import register_codec
 from zarr.v3.common import parse_named_configuration
+from zarr.v3.buffer import Buffer
 
 if TYPE_CHECKING:
     from typing import Dict, Optional
     from typing_extensions import Self
-    from zarr.v3.common import JSON, BytesLike, ArraySpec
+    from zarr.v3.common import JSON, ArraySpec
     from zarr.v3.config import RuntimeConfiguration
 
 
@@ -32,12 +33,13 @@ def to_dict(self) -> Dict[str, JSON]:
 
     async def decode(
         self,
-        chunk_bytes: bytes,
+        chunk_bytes: Buffer,
         _chunk_spec: ArraySpec,
         _runtime_configuration: RuntimeConfiguration,
-    ) -> BytesLike:
-        crc32_bytes = chunk_bytes[-4:]
-        inner_bytes = chunk_bytes[:-4]
+    ) -> Buffer:
+        data = chunk_bytes.as_bytearray()
+        crc32_bytes = data[-4:]
+        inner_bytes = data[:-4]
 
         computed_checksum = np.uint32(crc32c(inner_bytes)).tobytes()
         stored_checksum = bytes(crc32_bytes)
@@ -46,15 +48,16 @@ async def decode(
                 "Stored and computed checksum do not match. "
                 + f"Stored: {stored_checksum!r}. Computed: {computed_checksum!r}."
             )
-        return inner_bytes
+        return Buffer(inner_bytes)
 
     async def encode(
         self,
-        chunk_bytes: bytes,
+        chunk_bytes: Buffer,
         _chunk_spec: ArraySpec,
         _runtime_configuration: RuntimeConfiguration,
-    ) -> Optional[BytesLike]:
-        return chunk_bytes + np.uint32(crc32c(chunk_bytes)).tobytes()
+    ) -> Optional[Buffer]:
+        bytes = chunk_bytes.as_bytearray()
+        return Buffer(bytes + np.uint32(crc32c(bytes)).tobytes())
 
     def compute_encoded_size(self, input_byte_length: int, _chunk_spec: ArraySpec) -> int:
         return input_byte_length + 4
diff --git a/src/zarr/v3/codecs/gzip.py b/src/zarr/v3/codecs/gzip.py
index 478eee90c1..cc1b30b86c 100644
--- a/src/zarr/v3/codecs/gzip.py
+++ b/src/zarr/v3/codecs/gzip.py
@@ -7,11 +7,12 @@
 from zarr.v3.abc.codec import BytesBytesCodec
 from zarr.v3.codecs.registry import register_codec
 from zarr.v3.common import parse_named_configuration, to_thread
+from zarr.v3.buffer import Buffer, as_bytes_wrapper
 
 if TYPE_CHECKING:
     from typing import Optional, Dict
     from typing_extensions import Self
-    from zarr.v3.common import JSON, ArraySpec, BytesLike
+    from zarr.v3.common import JSON, ArraySpec
     from zarr.v3.config import RuntimeConfiguration
 
 
@@ -46,19 +47,19 @@ def to_dict(self) -> Dict[str, JSON]:
 
     async def decode(
         self,
-        chunk_bytes: bytes,
+        chunk_bytes: Buffer,
         _chunk_spec: ArraySpec,
         _runtime_configuration: RuntimeConfiguration,
-    ) -> BytesLike:
-        return await to_thread(GZip(self.level).decode, chunk_bytes)
+    ) -> Buffer:
+        return await to_thread(as_bytes_wrapper, GZip(self.level).decode, chunk_bytes)
 
     async def encode(
         self,
-        chunk_bytes: bytes,
+        chunk_bytes: Buffer,
         _chunk_spec: ArraySpec,
         _runtime_configuration: RuntimeConfiguration,
-    ) -> Optional[BytesLike]:
-        return await to_thread(GZip(self.level).encode, chunk_bytes)
+    ) -> Optional[Buffer]:
+        return await to_thread(as_bytes_wrapper, GZip(self.level).encode, chunk_bytes)
 
     def compute_encoded_size(
         self,
diff --git a/src/zarr/v3/codecs/pipeline.py b/src/zarr/v3/codecs/pipeline.py
index 7bb872eb79..91ba1926cc 100644
--- a/src/zarr/v3/codecs/pipeline.py
+++ b/src/zarr/v3/codecs/pipeline.py
@@ -16,13 +16,14 @@
 from zarr.v3.abc.metadata import Metadata
 from zarr.v3.codecs.registry import get_codec_class
 from zarr.v3.common import parse_named_configuration
+from zarr.v3.buffer import Buffer
 
 if TYPE_CHECKING:
     from typing import Iterator, List, Optional, Tuple, Union
     from zarr.v3.store import StorePath
     from zarr.v3.metadata import ArrayMetadata
     from zarr.v3.config import RuntimeConfiguration
-    from zarr.v3.common import JSON, ArraySpec, BytesLike, SliceSelection
+    from zarr.v3.common import JSON, ArraySpec, SliceSelection
 
 
 @dataclass(frozen=True)
@@ -149,7 +150,7 @@ def _codecs_with_resolved_metadata(
 
     async def decode(
         self,
-        chunk_bytes: BytesLike,
+        chunk_bytes: Buffer,
         array_spec: ArraySpec,
         runtime_configuration: RuntimeConfiguration,
     ) -> np.ndarray:
@@ -188,7 +189,7 @@ async def encode(
         chunk_array: np.ndarray,
         array_spec: ArraySpec,
         runtime_configuration: RuntimeConfiguration,
-    ) -> Optional[BytesLike]:
+    ) -> Optional[Buffer]:
         (
             aa_codecs_with_spec,
             ab_codec_with_spec,
@@ -217,6 +218,7 @@ async def encode(
                 return None
             chunk_bytes = chunk_bytes_maybe
 
+        assert isinstance(chunk_bytes, Buffer)
         return chunk_bytes
 
     async def encode_partial(
diff --git a/src/zarr/v3/codecs/sharding.py b/src/zarr/v3/codecs/sharding.py
index 0385154c0f..25e440dbc8 100644
--- a/src/zarr/v3/codecs/sharding.py
+++ b/src/zarr/v3/codecs/sharding.py
@@ -37,6 +37,7 @@
     runtime_configuration as make_runtime_configuration,
     parse_codecs,
 )
+from zarr.v3.buffer import Buffer, as_buffer
 
 if TYPE_CHECKING:
     from typing import Awaitable, Callable, Dict, Iterator, List, Optional, Set, Tuple
@@ -46,7 +47,6 @@
     from zarr.v3.common import (
         JSON,
         ChunkCoords,
-        BytesLike,
         SliceSelection,
     )
     from zarr.v3.config import RuntimeConfiguration
@@ -128,15 +128,15 @@ def create_empty(cls, chunks_per_shard: ChunkCoords) -> _ShardIndex:
 
 class _ShardProxy(Mapping):
     index: _ShardIndex
-    buf: BytesLike
+    buf: Buffer
 
     @classmethod
     async def from_bytes(
-        cls, buf: BytesLike, codec: ShardingCodec, chunks_per_shard: ChunkCoords
+        cls, buf: Buffer, codec: ShardingCodec, chunks_per_shard: ChunkCoords
     ) -> _ShardProxy:
         shard_index_size = codec._shard_index_size(chunks_per_shard)
         obj = cls()
-        obj.buf = memoryview(buf)
+        obj.buf = buf
         if codec.index_location == ShardingCodecIndexLocation.start:
             shard_index_bytes = obj.buf[:shard_index_size]
         else:
@@ -149,11 +149,11 @@ async def from_bytes(
     def create_empty(cls, chunks_per_shard: ChunkCoords) -> _ShardProxy:
         index = _ShardIndex.create_empty(chunks_per_shard)
         obj = cls()
-        obj.buf = memoryview(b"")
+        obj.buf = as_buffer(memoryview(b""))
         obj.index = index
         return obj
 
-    def __getitem__(self, chunk_coords: ChunkCoords) -> Optional[BytesLike]:
+    def __getitem__(self, chunk_coords: ChunkCoords) -> Optional[Buffer]:
         chunk_byte_slice = self.index.get_chunk_slice(chunk_coords)
         if chunk_byte_slice:
             return self.buf[chunk_byte_slice[0] : chunk_byte_slice[1]]
@@ -167,7 +167,7 @@ def __iter__(self) -> Iterator[ChunkCoords]:
 
 
 class _ShardBuilder(_ShardProxy):
-    buf: bytearray
+    buf: Buffer
     index: _ShardIndex
 
     @classmethod
@@ -175,7 +175,7 @@ def merge_with_morton_order(
         cls,
         chunks_per_shard: ChunkCoords,
         tombstones: Set[ChunkCoords],
-        *shard_dicts: Mapping[ChunkCoords, BytesLike],
+        *shard_dicts: Mapping[ChunkCoords, Buffer],
     ) -> _ShardBuilder:
         obj = cls.create_empty(chunks_per_shard)
         for chunk_coords in morton_order_iter(chunks_per_shard):
@@ -191,31 +191,29 @@ def merge_with_morton_order(
     @classmethod
     def create_empty(cls, chunks_per_shard: ChunkCoords) -> _ShardBuilder:
         obj = cls()
-        obj.buf = bytearray()
+        obj.buf = Buffer(bytearray(0))
         obj.index = _ShardIndex.create_empty(chunks_per_shard)
         return obj
 
-    def append(self, chunk_coords: ChunkCoords, value: BytesLike):
+    def append(self, chunk_coords: ChunkCoords, value: Buffer):
         chunk_start = len(self.buf)
         chunk_length = len(value)
-        self.buf.extend(value)
+        self.buf = self.buf + value
         self.index.set_chunk_slice(chunk_coords, slice(chunk_start, chunk_start + chunk_length))
 
     async def finalize(
         self,
         index_location: ShardingCodecIndexLocation,
-        index_encoder: Callable[[_ShardIndex], Awaitable[BytesLike]],
-    ) -> BytesLike:
+        index_encoder: Callable[[_ShardIndex], Awaitable[Buffer]],
+    ) -> Buffer:
         index_bytes = await index_encoder(self.index)
         if index_location == ShardingCodecIndexLocation.start:
             self.index.offsets_and_lengths[..., 0] += len(index_bytes)
             index_bytes = await index_encoder(self.index)  # encode again with corrected offsets
-            out_buf = bytearray(index_bytes)
-            out_buf.extend(self.buf)
+            out_buf = index_bytes + self.buf
         else:
-            out_buf = self.buf
-            out_buf.extend(index_bytes)
-        return out_buf
+            out_buf = self.buf + index_bytes
+        return as_buffer(out_buf)
 
 
 @dataclass(frozen=True)
@@ -300,7 +298,7 @@ def validate(self, array_metadata: ArrayMetadata) -> None:
 
     async def decode(
         self,
-        shard_bytes: BytesLike,
+        shard_bytes: Buffer,
         shard_spec: ArraySpec,
         runtime_configuration: RuntimeConfiguration,
     ) -> np.ndarray:
@@ -375,7 +373,7 @@ async def decode_partial(
         all_chunk_coords = set(chunk_coords for chunk_coords, _, _ in indexed_chunks)
 
         # reading bytes of all requested chunks
-        shard_dict: Mapping[ChunkCoords, BytesLike] = {}
+        shard_dict: Mapping[ChunkCoords, Buffer] = {}
         if self._is_total_shard(all_chunk_coords, chunks_per_shard):
             # read entire shard
             shard_dict_maybe = await self._load_full_shard_maybe(store_path, chunks_per_shard)
@@ -417,7 +415,7 @@ async def decode_partial(
 
     async def _read_chunk(
         self,
-        shard_dict: Mapping[ChunkCoords, Optional[BytesLike]],
+        shard_dict: Mapping[ChunkCoords, Optional[Buffer]],
         chunk_coords: ChunkCoords,
         chunk_selection: SliceSelection,
         out_selection: SliceSelection,
@@ -439,7 +437,7 @@ async def encode(
         shard_array: np.ndarray,
         shard_spec: ArraySpec,
         runtime_configuration: RuntimeConfiguration,
-    ) -> Optional[BytesLike]:
+    ) -> Optional[Buffer]:
         shard_shape = shard_spec.shape
         chunk_shape = self.chunk_shape
         chunks_per_shard = self._get_chunks_per_shard(shard_spec)
@@ -457,7 +455,7 @@ async def _write_chunk(
             chunk_coords: ChunkCoords,
             chunk_selection: SliceSelection,
             out_selection: SliceSelection,
-        ) -> Tuple[ChunkCoords, Optional[BytesLike]]:
+        ) -> Tuple[ChunkCoords, Optional[Buffer]]:
             if is_total_slice(chunk_selection, chunk_shape):
                 chunk_array = shard_array[out_selection]
             else:
@@ -477,7 +475,7 @@ async def _write_chunk(
             return (chunk_coords, None)
 
         # assembling and encoding chunks within the shard
-        encoded_chunks: List[Tuple[ChunkCoords, Optional[BytesLike]]] = await concurrent_map(
+        encoded_chunks: List[Tuple[ChunkCoords, Optional[Buffer]]] = await concurrent_map(
             [
                 (shard_array, chunk_coords, chunk_selection, out_selection)
                 for chunk_coords, chunk_selection, out_selection in indexer
@@ -527,7 +525,7 @@ async def _write_chunk(
             chunk_coords: ChunkCoords,
             chunk_selection: SliceSelection,
             out_selection: SliceSelection,
-        ) -> Tuple[ChunkCoords, Optional[BytesLike]]:
+        ) -> Tuple[ChunkCoords, Optional[Buffer]]:
             chunk_array = None
             if is_total_slice(chunk_selection, self.chunk_shape):
                 chunk_array = shard_array[out_selection]
@@ -557,7 +555,7 @@ async def _write_chunk(
             else:
                 return (chunk_coords, None)
 
-        encoded_chunks: List[Tuple[ChunkCoords, Optional[BytesLike]]] = await concurrent_map(
+        encoded_chunks: List[Tuple[ChunkCoords, Optional[Buffer]]] = await concurrent_map(
             [
                 (
                     chunk_coords,
@@ -601,7 +599,7 @@ def _is_total_shard(
         )
 
     async def _decode_shard_index(
-        self, index_bytes: BytesLike, chunks_per_shard: ChunkCoords
+        self, index_bytes: Buffer, chunks_per_shard: ChunkCoords
     ) -> _ShardIndex:
         return _ShardIndex(
             await self.index_codecs.decode(
@@ -611,13 +609,14 @@ async def _decode_shard_index(
             )
         )
 
-    async def _encode_shard_index(self, index: _ShardIndex) -> BytesLike:
+    async def _encode_shard_index(self, index: _ShardIndex) -> Buffer:
         index_bytes = await self.index_codecs.encode(
             index.offsets_and_lengths,
             self._get_index_chunk_spec(index.chunks_per_shard),
             make_runtime_configuration("C"),
         )
         assert index_bytes is not None
+        assert isinstance(index_bytes, Buffer)
         return index_bytes
 
     def _shard_index_size(self, chunks_per_shard: ChunkCoords) -> int:
diff --git a/src/zarr/v3/codecs/zstd.py b/src/zarr/v3/codecs/zstd.py
index 774bb8bdbb..ebd60d5746 100644
--- a/src/zarr/v3/codecs/zstd.py
+++ b/src/zarr/v3/codecs/zstd.py
@@ -8,12 +8,13 @@
 from zarr.v3.abc.codec import BytesBytesCodec
 from zarr.v3.codecs.registry import register_codec
 from zarr.v3.common import parse_named_configuration, to_thread
+from zarr.v3.buffer import Buffer, as_bytes_wrapper
 
 if TYPE_CHECKING:
     from typing import Dict, Optional
     from typing_extensions import Self
     from zarr.v3.config import RuntimeConfiguration
-    from zarr.v3.common import BytesLike, JSON, ArraySpec
+    from zarr.v3.common import JSON, ArraySpec
 
 
 def parse_zstd_level(data: JSON) -> int:
@@ -62,19 +63,19 @@ def _decompress(self, data: bytes) -> bytes:
 
     async def decode(
         self,
-        chunk_bytes: bytes,
+        chunk_bytes: Buffer,
         _chunk_spec: ArraySpec,
         _runtime_configuration: RuntimeConfiguration,
-    ) -> BytesLike:
-        return await to_thread(self._decompress, chunk_bytes)
+    ) -> Buffer:
+        return await to_thread(as_bytes_wrapper, self._decompress, chunk_bytes)
 
     async def encode(
         self,
-        chunk_bytes: bytes,
+        chunk_bytes: Buffer,
         _chunk_spec: ArraySpec,
         _runtime_configuration: RuntimeConfiguration,
-    ) -> Optional[BytesLike]:
-        return await to_thread(self._compress, chunk_bytes)
+    ) -> Optional[Buffer]:
+        return await to_thread(as_bytes_wrapper, self._compress, chunk_bytes)
 
     def compute_encoded_size(self, _input_byte_length: int, _chunk_spec: ArraySpec) -> int:
         raise NotImplementedError
diff --git a/src/zarr/v3/group.py b/src/zarr/v3/group.py
index fcd2fea215..d0f5754e6a 100644
--- a/src/zarr/v3/group.py
+++ b/src/zarr/v3/group.py
@@ -11,6 +11,7 @@
 from zarr.v3.attributes import Attributes
 from zarr.v3.common import ZARR_JSON, ZARRAY_JSON, ZATTRS_JSON, ZGROUP_JSON
 from zarr.v3.config import RuntimeConfiguration, SyncConfiguration
+from zarr.v3.buffer import as_buffer
 from zarr.v3.store import StoreLike, StorePath, make_store_path
 from zarr.v3.sync import SyncMixin, sync
 
@@ -113,7 +114,9 @@ async def open(
             # (it is optional in the case of implicit groups)
             zarr_json_bytes = await (store_path / ZARR_JSON).get()
             zarr_json = (
-                json.loads(zarr_json_bytes) if zarr_json_bytes is not None else {"zarr_format": 3}
+                json.loads(zarr_json_bytes.as_bytearray())
+                if zarr_json_bytes is not None
+                else {"zarr_format": 3}
             )
 
         elif zarr_format == 2:
@@ -123,11 +126,15 @@ async def open(
                 (store_path / ZGROUP_JSON).get(), (store_path / ZATTRS_JSON).get()
             )
             zgroup = (
-                json.loads(json.loads(zgroup_bytes))
+                json.loads(json.loads(zgroup_bytes.as_bytearray()))
                 if zgroup_bytes is not None
                 else {"zarr_format": 2}
             )
-            zattrs = json.loads(json.loads(zattrs_bytes)) if zattrs_bytes is not None else {}
+            zattrs = (
+                json.loads(json.loads(zattrs_bytes.as_bytearray()))
+                if zattrs_bytes is not None
+                else {}
+            )
             zarr_json = {**zgroup, "attributes": zattrs}
         else:
             raise ValueError(f"unexpected zarr_format: {zarr_format}")
@@ -164,7 +171,7 @@ async def getitem(
                     "attributes": {},
                 }
             else:
-                zarr_json = json.loads(zarr_json_bytes)
+                zarr_json = json.loads(zarr_json_bytes.as_bytearray())
             if zarr_json["node_type"] == "group":
                 return type(self).from_dict(store_path, zarr_json, self.runtime_configuration)
             elif zarr_json["node_type"] == "array":
@@ -183,9 +190,9 @@ async def getitem(
             )
 
             # unpack the zarray, if this is None then we must be opening a group
-            zarray = json.loads(zarray_bytes) if zarray_bytes else None
+            zarray = json.loads(zarray_bytes.as_bytearray()) if zarray_bytes else None
             # unpack the zattrs, this can be None if no attrs were written
-            zattrs = json.loads(zattrs_bytes) if zattrs_bytes is not None else {}
+            zattrs = json.loads(zattrs_bytes.as_bytearray()) if zattrs_bytes is not None else {}
 
             if zarray is not None:
                 # TODO: update this once the V2 array support is part of the primary array class
@@ -198,7 +205,7 @@ async def getitem(
                     # implicit group?
                     logger.warning("group at {} is an implicit group", store_path)
                 zgroup = (
-                    json.loads(zgroup_bytes)
+                    json.loads(zgroup_bytes.as_bytearray())
                     if zgroup_bytes is not None
                     else {"zarr_format": self.metadata.zarr_format}
                 )
@@ -221,7 +228,9 @@ async def delitem(self, key: str) -> None:
 
     async def _save_metadata(self) -> None:
         to_save = self.metadata.to_bytes()
-        awaitables = [(self.store_path / key).set(value) for key, value in to_save.items()]
+        awaitables = [
+            (self.store_path / key).set(as_buffer(value)) for key, value in to_save.items()
+        ]
         await asyncio.gather(*awaitables)
 
     @property
@@ -257,9 +266,9 @@ async def update_attributes(self, new_attributes: Dict[str, Any]):
         to_save = self.metadata.to_bytes()
         if self.metadata.zarr_format == 2:
             # only save the .zattrs object
-            await (self.store_path / ZATTRS_JSON).set(to_save[ZATTRS_JSON])
+            await (self.store_path / ZATTRS_JSON).set(as_buffer(to_save[ZATTRS_JSON]))
         else:
-            await (self.store_path / ZARR_JSON).set(to_save[ZARR_JSON])
+            await (self.store_path / ZARR_JSON).set(as_buffer(to_save[ZARR_JSON]))
 
         self.metadata.attributes.clear()
         self.metadata.attributes.update(new_attributes)
@@ -383,7 +392,9 @@ async def update_attributes_async(self, new_attributes: Dict[str, Any]) -> Group
 
         # Write new metadata
         to_save = new_metadata.to_bytes()
-        awaitables = [(self.store_path / key).set(value) for key, value in to_save.items()]
+        awaitables = [
+            (self.store_path / key).set(as_buffer(value)) for key, value in to_save.items()
+        ]
         await asyncio.gather(*awaitables)
 
         async_group = replace(self._async_group, metadata=new_metadata)
diff --git a/src/zarr/v3/metadata.py b/src/zarr/v3/metadata.py
index 573b8484f0..8543bbcb77 100644
--- a/src/zarr/v3/metadata.py
+++ b/src/zarr/v3/metadata.py
@@ -8,6 +8,7 @@
 
 from zarr.v3.chunk_grids import ChunkGrid, RegularChunkGrid
 from zarr.v3.chunk_key_encodings import ChunkKeyEncoding, parse_separator
+from zarr.v3.buffer import Buffer, as_buffer
 
 
 if TYPE_CHECKING:
@@ -291,7 +292,7 @@ def __init__(
     def ndim(self) -> int:
         return len(self.shape)
 
-    def to_bytes(self) -> bytes:
+    def to_bytes(self) -> Buffer:
         def _json_convert(o):
             if isinstance(o, np.dtype):
                 if o.fields is None:
@@ -300,7 +301,7 @@ def _json_convert(o):
                     return o.descr
             raise TypeError
 
-        return json.dumps(self.to_dict(), default=_json_convert).encode()
+        return as_buffer(json.dumps(self.to_dict(), default=_json_convert).encode())
 
     @classmethod
     def from_dict(cls, data: Dict[str, Any]) -> ArrayV2Metadata:
diff --git a/src/zarr/v3/store/core.py b/src/zarr/v3/store/core.py
index 16714d9e30..f214afe0b6 100644
--- a/src/zarr/v3/store/core.py
+++ b/src/zarr/v3/store/core.py
@@ -3,8 +3,8 @@
 from pathlib import Path
 from typing import Any, Optional, Tuple, Union
 
-from zarr.v3.common import BytesLike
 from zarr.v3.abc.store import Store
+from zarr.v3.buffer import Buffer
 from zarr.v3.store.local import LocalStore
 
 
@@ -25,12 +25,10 @@ def __init__(self, store: Store, path: Optional[str] = None):
         self.store = store
         self.path = path or ""
 
-    async def get(
-        self, byte_range: Optional[Tuple[int, Optional[int]]] = None
-    ) -> Optional[BytesLike]:
+    async def get(self, byte_range: Optional[Tuple[int, Optional[int]]] = None) -> Optional[Buffer]:
         return await self.store.get(self.path, byte_range)
 
-    async def set(self, value: BytesLike, byte_range: Optional[Tuple[int, int]] = None) -> None:
+    async def set(self, value: Buffer, byte_range: Optional[Tuple[int, int]] = None) -> None:
         if byte_range is not None:
             raise NotImplementedError("Store.set does not have partial writes yet")
         await self.store.set(self.path, value)
diff --git a/src/zarr/v3/store/local.py b/src/zarr/v3/store/local.py
index 8f02b904c0..502c52f155 100644
--- a/src/zarr/v3/store/local.py
+++ b/src/zarr/v3/store/local.py
@@ -6,15 +6,16 @@
 from typing import Union, Optional, List, Tuple
 
 from zarr.v3.abc.store import Store
-from zarr.v3.common import BytesLike, concurrent_map, to_thread
+from zarr.v3.common import concurrent_map, to_thread
+from zarr.v3.buffer import Buffer, as_buffer
 
 
-def _get(path: Path, byte_range: Optional[Tuple[int, Optional[int]]] = None) -> bytes:
+def _get(path: Path, byte_range: Optional[Tuple[int, Optional[int]]] = None) -> Buffer:
     if byte_range is not None:
         start = byte_range[0]
         end = (start + byte_range[1]) if byte_range[1] is not None else None
     else:
-        return path.read_bytes()
+        return as_buffer(path.read_bytes())
     with path.open("rb") as f:
         size = f.seek(0, io.SEEK_END)
         if start is not None:
@@ -25,13 +26,13 @@ def _get(path: Path, byte_range: Optional[Tuple[int, Optional[int]]] = None) ->
         if end is not None:
             if end < 0:
                 end = size + end
-            return f.read(end - f.tell())
-        return f.read()
+            return as_buffer(f.read(end - f.tell()))
+        return as_buffer(f.read())
 
 
 def _put(
     path: Path,
-    value: BytesLike,
+    value: Buffer,
     start: Optional[int] = None,
     auto_mkdir: bool = True,
 ):
@@ -40,9 +41,9 @@ def _put(
     if start is not None:
         with path.open("r+b") as f:
             f.seek(start)
-            f.write(value)
+            f.write(value.as_bytearray())
     else:
-        return path.write_bytes(value)
+        return path.write_bytes(value.as_bytearray())
 
 
 class LocalStore(Store):
@@ -72,7 +73,7 @@ def __eq__(self, other: object) -> bool:
 
     async def get(
         self, key: str, byte_range: Optional[Tuple[int, Optional[int]]] = None
-    ) -> Optional[bytes]:
+    ) -> Optional[Buffer]:
         assert isinstance(key, str)
         path = self.root / key
 
@@ -83,7 +84,7 @@ async def get(
 
     async def get_partial_values(
         self, key_ranges: List[Tuple[str, Tuple[int, int]]]
-    ) -> List[bytes]:
+    ) -> List[Buffer]:
         args = []
         for key, byte_range in key_ranges:
             assert isinstance(key, str)
@@ -94,7 +95,7 @@ async def get_partial_values(
                 args.append((_get, path))
         return await concurrent_map(args, to_thread, limit=None)  # TODO: fix limit
 
-    async def set(self, key: str, value: BytesLike) -> None:
+    async def set(self, key: str, value: Buffer) -> None:
         assert isinstance(key, str)
         path = self.root / key
         await to_thread(_put, path, value)
diff --git a/src/zarr/v3/store/memory.py b/src/zarr/v3/store/memory.py
index afacfa4321..2b389dbef6 100644
--- a/src/zarr/v3/store/memory.py
+++ b/src/zarr/v3/store/memory.py
@@ -2,8 +2,8 @@
 
 from typing import Optional, MutableMapping, List, Tuple
 
-from zarr.v3.common import BytesLike
 from zarr.v3.abc.store import Store
+from zarr.v3.buffer import Buffer
 
 
 # TODO: this store could easily be extended to wrap any MutuableMapping store from v2
@@ -13,9 +13,9 @@ class MemoryStore(Store):
     supports_partial_writes: bool = True
     supports_listing: bool = True
 
-    _store_dict: MutableMapping[str, bytes]
+    _store_dict: MutableMapping[str, Buffer]
 
-    def __init__(self, store_dict: Optional[MutableMapping[str, bytes]] = None):
+    def __init__(self, store_dict: Optional[MutableMapping[str, Buffer]] = None):
         self._store_dict = store_dict or {}
 
     def __str__(self) -> str:
@@ -26,7 +26,7 @@ def __repr__(self) -> str:
 
     async def get(
         self, key: str, byte_range: Optional[Tuple[int, Optional[int]]] = None
-    ) -> Optional[BytesLike]:
+    ) -> Optional[Buffer]:
         assert isinstance(key, str)
         try:
             value = self._store_dict[key]
@@ -38,21 +38,21 @@ async def get(
 
     async def get_partial_values(
         self, key_ranges: List[Tuple[str, Tuple[int, int]]]
-    ) -> List[bytes]:
+    ) -> List[Buffer]:
         raise NotImplementedError
 
     async def exists(self, key: str) -> bool:
         return key in self._store_dict
 
     async def set(
-        self, key: str, value: BytesLike, byte_range: Optional[Tuple[int, int]] = None
+        self, key: str, value: Buffer, byte_range: Optional[Tuple[int, int]] = None
     ) -> None:
         assert isinstance(key, str)
-        if not isinstance(value, (bytes, bytearray, memoryview)):
-            raise TypeError(f"Expected BytesLike. Got {type(value)}.")
+        if not isinstance(value, Buffer):
+            raise TypeError(f"Expected Buffer. Got {type(value)}.")
 
         if byte_range is not None:
-            buf = bytearray(self._store_dict[key])
+            buf = self._store_dict[key]
             buf[byte_range[0] : byte_range[1]] = value
             self._store_dict[key] = buf
         else:
diff --git a/src/zarr/v3/store/remote.py b/src/zarr/v3/store/remote.py
index 0e6fc84e08..e903eab888 100644
--- a/src/zarr/v3/store/remote.py
+++ b/src/zarr/v3/store/remote.py
@@ -3,8 +3,8 @@
 from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Union
 
 from zarr.v3.abc.store import Store
+from zarr.v3.buffer import Buffer
 from zarr.v3.store.core import _dereference_path
-from zarr.v3.common import BytesLike
 
 
 if TYPE_CHECKING:
@@ -52,7 +52,7 @@ def _make_fs(self) -> Tuple[AsyncFileSystem, str]:
 
     async def get(
         self, key: str, byte_range: Optional[Tuple[int, Optional[int]]] = None
-    ) -> Optional[BytesLike]:
+    ) -> Optional[Buffer]:
         assert isinstance(key, str)
         fs, root = self._make_fs()
         path = _dereference_path(root, key)
@@ -69,7 +69,7 @@ async def get(
         return value
 
     async def set(
-        self, key: str, value: BytesLike, byte_range: Optional[Tuple[int, int]] = None
+        self, key: str, value: Buffer, byte_range: Optional[Tuple[int, int]] = None
     ) -> None:
         assert isinstance(key, str)
         fs, root = self._make_fs()
diff --git a/tests/v3/test_codecs.py b/tests/v3/test_codecs.py
index 333c2094bf..d32cebbd6d 100644
--- a/tests/v3/test_codecs.py
+++ b/tests/v3/test_codecs.py
@@ -24,6 +24,7 @@
 from zarr.v3.metadata import runtime_configuration
 
 from zarr.v3.abc.store import Store
+from zarr.v3.buffer import as_bytearray
 from zarr.v3.store import MemoryStore, StorePath
 
 
@@ -295,7 +296,7 @@ async def test_order(
             fill_value=1,
         )
         z[:, :] = data
-        assert await (store / "order/0.0").get() == z._store["0.0"]
+        assert as_bytearray(await (store / "order/0.0").get()) == z._store["0.0"]
 
 
 @pytest.mark.parametrize("input_order", ["F", "C"])
@@ -671,10 +672,10 @@ async def test_zarr_compat(store: Store):
     assert np.array_equal(data, await _AsyncArrayProxy(a)[:16, :18].get())
     assert np.array_equal(data, z2[:16, :18])
 
-    assert z2._store["0.0"] == await (store / "zarr_compat3/0.0").get()
-    assert z2._store["0.1"] == await (store / "zarr_compat3/0.1").get()
-    assert z2._store["1.0"] == await (store / "zarr_compat3/1.0").get()
-    assert z2._store["1.1"] == await (store / "zarr_compat3/1.1").get()
+    assert z2._store["0.0"] == as_bytearray(await (store / "zarr_compat3/0.0").get())
+    assert z2._store["0.1"] == as_bytearray(await (store / "zarr_compat3/0.1").get())
+    assert z2._store["1.0"] == as_bytearray(await (store / "zarr_compat3/1.0").get())
+    assert z2._store["1.1"] == as_bytearray(await (store / "zarr_compat3/1.1").get())
 
 
 @pytest.mark.asyncio
@@ -705,10 +706,10 @@ async def test_zarr_compat_F(store: Store):
     assert np.array_equal(data, await _AsyncArrayProxy(a)[:16, :18].get())
     assert np.array_equal(data, z2[:16, :18])
 
-    assert z2._store["0.0"] == await (store / "zarr_compatF3/0.0").get()
-    assert z2._store["0.1"] == await (store / "zarr_compatF3/0.1").get()
-    assert z2._store["1.0"] == await (store / "zarr_compatF3/1.0").get()
-    assert z2._store["1.1"] == await (store / "zarr_compatF3/1.1").get()
+    assert z2._store["0.0"] == as_bytearray(await (store / "zarr_compatF3/0.0").get())
+    assert z2._store["0.1"] == as_bytearray(await (store / "zarr_compatF3/0.1").get())
+    assert z2._store["1.0"] == as_bytearray(await (store / "zarr_compatF3/1.0").get())
+    assert z2._store["1.1"] == as_bytearray(await (store / "zarr_compatF3/1.1").get())
 
 
 @pytest.mark.asyncio
@@ -738,7 +739,7 @@ async def test_dimension_names(store: Store):
     )
 
     assert (await AsyncArray.open(store / "dimension_names2")).metadata.dimension_names is None
-    zarr_json_bytes = await (store / "dimension_names2" / "zarr.json").get()
+    zarr_json_bytes = as_bytearray(await (store / "dimension_names2" / "zarr.json").get())
     assert zarr_json_bytes is not None
     assert "dimension_names" not in json.loads(zarr_json_bytes)
 
@@ -804,7 +805,7 @@ async def test_endian(store: Store, endian: Literal["big", "little"]):
         fill_value=1,
     )
     z[:, :] = data
-    assert await (store / "endian/0.0").get() == z._store["0.0"]
+    assert as_bytearray(await (store / "endian/0.0").get()) == z._store["0.0"]
 
 
 @pytest.mark.parametrize("dtype_input_endian", [">u2", "<u2"])
@@ -840,7 +841,7 @@ async def test_endian_write(
         fill_value=1,
     )
     z[:, :] = data
-    assert await (store / "endian/0.0").get() == z._store["0.0"]
+    assert as_bytearray(await (store / "endian/0.0").get()) == z._store["0.0"]
 
 
 def test_invalid_metadata(store: Store):
@@ -966,7 +967,7 @@ async def test_blosc_evolve(store: Store):
         codecs=[BytesCodec(), BloscCodec()],
     )
 
-    zarr_json = json.loads(await (store / "blosc_evolve_u1" / "zarr.json").get())
+    zarr_json = json.loads(as_bytearray(await (store / "blosc_evolve_u1" / "zarr.json").get()))
     blosc_configuration_json = zarr_json["codecs"][1]["configuration"]
     assert blosc_configuration_json["typesize"] == 1
     assert blosc_configuration_json["shuffle"] == "bitshuffle"
@@ -980,7 +981,7 @@ async def test_blosc_evolve(store: Store):
         codecs=[BytesCodec(), BloscCodec()],
     )
 
-    zarr_json = json.loads(await (store / "blosc_evolve_u2" / "zarr.json").get())
+    zarr_json = json.loads(as_bytearray(await (store / "blosc_evolve_u2" / "zarr.json").get()))
     blosc_configuration_json = zarr_json["codecs"][1]["configuration"]
     assert blosc_configuration_json["typesize"] == 2
     assert blosc_configuration_json["shuffle"] == "shuffle"
@@ -994,7 +995,9 @@ async def test_blosc_evolve(store: Store):
         codecs=[ShardingCodec(chunk_shape=(16, 16), codecs=[BytesCodec(), BloscCodec()])],
     )
 
-    zarr_json = json.loads(await (store / "sharding_blosc_evolve" / "zarr.json").get())
+    zarr_json = json.loads(
+        as_bytearray(await (store / "sharding_blosc_evolve" / "zarr.json").get())
+    )
     blosc_configuration_json = zarr_json["codecs"][0]["configuration"]["codecs"][1]["configuration"]
     assert blosc_configuration_json["typesize"] == 2
     assert blosc_configuration_json["shuffle"] == "shuffle"

From 23a71a70199867235e417c6736d97b13a90b13bc Mon Sep 17 00:00:00 2001
From: "Mads R. B. Kristensen" <madsbk@gmail.com>
Date: Tue, 30 Apr 2024 13:30:17 +0200
Subject: [PATCH 02/45] use memoryview as the underlying memory

---
 src/zarr/v3/array.py           |  4 ++--
 src/zarr/v3/array_v2.py        |  6 ++----
 src/zarr/v3/buffer.py          | 38 +++++++++++++++++++---------------
 src/zarr/v3/codecs/crc32c_.py  |  8 +++----
 src/zarr/v3/codecs/sharding.py |  2 +-
 src/zarr/v3/group.py           | 16 +++++++-------
 src/zarr/v3/store/local.py     |  4 ++--
 7 files changed, 39 insertions(+), 39 deletions(-)

diff --git a/src/zarr/v3/array.py b/src/zarr/v3/array.py
index 54f0d03a95..003fa77194 100644
--- a/src/zarr/v3/array.py
+++ b/src/zarr/v3/array.py
@@ -151,7 +151,7 @@ async def open(
         assert zarr_json_bytes is not None
         return cls.from_dict(
             store_path,
-            json.loads(zarr_json_bytes.as_bytearray()),
+            json.loads(zarr_json_bytes.to_bytes()),
             runtime_configuration=runtime_configuration,
         )
 
@@ -166,7 +166,7 @@ async def open_auto(
         if v3_metadata_bytes is not None:
             return cls.from_dict(
                 store_path,
-                json.loads(v3_metadata_bytes.as_bytearray()),
+                json.loads(v3_metadata_bytes.to_bytes()),
                 runtime_configuration=runtime_configuration or RuntimeConfiguration(),
             )
         else:
diff --git a/src/zarr/v3/array_v2.py b/src/zarr/v3/array_v2.py
index 4f6cbece8c..2229c739ce 100644
--- a/src/zarr/v3/array_v2.py
+++ b/src/zarr/v3/array_v2.py
@@ -153,10 +153,8 @@ async def open_async(
         assert zarray_bytes is not None
         return cls.from_dict(
             store_path,
-            zarray_json=json.loads(zarray_bytes.as_bytearray()),
-            zattrs_json=json.loads(zattrs_bytes.as_bytearray())
-            if zattrs_bytes is not None
-            else None,
+            zarray_json=json.loads(zarray_bytes.to_bytes()),
+            zattrs_json=json.loads(zattrs_bytes.to_bytes()) if zattrs_bytes is not None else None,
             runtime_configuration=runtime_configuration,
         )
 
diff --git a/src/zarr/v3/buffer.py b/src/zarr/v3/buffer.py
index 669e9059c7..3850eff6d4 100644
--- a/src/zarr/v3/buffer.py
+++ b/src/zarr/v3/buffer.py
@@ -21,50 +21,54 @@ class Buffer(NDBuffer):
     of memory such as CUDA device memory.
     """
 
-    def __init__(self, data: bytearray):
-        assert isinstance(data, bytearray)
+    def __init__(self, data: memoryview):
+        assert isinstance(data, memoryview)
+        assert data.ndim == 1
+        assert data.contiguous
+        assert data.itemsize == 1
         self._data = data
 
-    def as_bytearray(self) -> bytearray:
+    def memoryview(self) -> memoryview:
         return self._data
 
+    def to_bytes(self) -> bytes:
+        return bytes(self.memoryview())
+
     def as_numpy_array(self, dtype: np.DTypeLike) -> np.ndarray:
-        return np.frombuffer(self._data, dtype=dtype)
+        return np.frombuffer(self.memoryview(), dtype=dtype)
 
     def __getitem__(self, key) -> Self:
-        return self.__class__(self.as_bytearray().__getitem__(key))
+        return self.__class__(self.memoryview().__getitem__(key))
 
     def __setitem__(self, key, value) -> None:
-        self.as_bytearray().__setitem__(key, value)
+        self.memoryview().__setitem__(key, value)
 
     def __len__(self) -> int:
-        return len(self.as_bytearray())
+        return len(self.memoryview())
 
     def __add__(self, other: Buffer) -> Self:
-        return self.__class__(self.as_bytearray() + other.as_bytearray())
+        return self.__class__(memoryview(self.to_bytes() + other.to_bytes()))
 
 
 def as_buffer(data: Any) -> Buffer:
     if isinstance(data, Buffer):
         return data
-    if isinstance(data, bytearray):
-        return Buffer(data)
-    if isinstance(data, bytes):
-        return Buffer(bytearray(data))
+    if isinstance(data, bytearray | bytes):
+        return Buffer(memoryview(data))
     if hasattr(data, "to_bytes"):
-        return as_buffer(data.to_bytes())
-    return Buffer(bytearray(np.asarray(data)))
+        return as_buffer(memoryview(data.to_bytes()))
+    return Buffer(memoryview(np.asanyarray(data).reshape(-1).view(dtype="int8")))
 
 
 def as_bytes_wrapper(func, buf: Buffer) -> Buffer:
-    return as_buffer(func(buf.as_bytearray()))
+    return as_buffer(func(buf.to_bytes()))
 
 
 def return_as_bytes_wrapper(func, *arg, **kwargs) -> Buffer:
     return as_buffer(func(*arg, **kwargs))
 
 
-def as_bytearray(data: Optional[Buffer]):
+def as_bytearray(data: Optional[Buffer]) -> Optional[bytes]:
     if data is None:
         return data
-    return data.as_bytearray()
+    return data.to_bytes()
diff --git a/src/zarr/v3/codecs/crc32c_.py b/src/zarr/v3/codecs/crc32c_.py
index 5677018f5e..ba32455199 100644
--- a/src/zarr/v3/codecs/crc32c_.py
+++ b/src/zarr/v3/codecs/crc32c_.py
@@ -10,7 +10,7 @@
 from zarr.v3.abc.codec import BytesBytesCodec
 from zarr.v3.codecs.registry import register_codec
 from zarr.v3.common import parse_named_configuration
-from zarr.v3.buffer import Buffer
+from zarr.v3.buffer import Buffer, as_buffer
 
 if TYPE_CHECKING:
     from typing import Dict, Optional
@@ -37,7 +37,7 @@ async def decode(
         _chunk_spec: ArraySpec,
         _runtime_configuration: RuntimeConfiguration,
     ) -> Buffer:
-        data = chunk_bytes.as_bytearray()
+        data = chunk_bytes.memoryview()
         crc32_bytes = data[-4:]
         inner_bytes = data[:-4]
 
@@ -56,8 +56,8 @@ async def encode(
         _chunk_spec: ArraySpec,
         _runtime_configuration: RuntimeConfiguration,
     ) -> Optional[Buffer]:
-        bytes = chunk_bytes.as_bytearray()
-        return Buffer(bytes + np.uint32(crc32c(bytes)).tobytes())
+        checksum = crc32c(chunk_bytes.memoryview())
+        return as_buffer(chunk_bytes.to_bytes() + np.uint32(checksum).tobytes())
 
     def compute_encoded_size(self, input_byte_length: int, _chunk_spec: ArraySpec) -> int:
         return input_byte_length + 4
diff --git a/src/zarr/v3/codecs/sharding.py b/src/zarr/v3/codecs/sharding.py
index 25e440dbc8..849103a38e 100644
--- a/src/zarr/v3/codecs/sharding.py
+++ b/src/zarr/v3/codecs/sharding.py
@@ -191,7 +191,7 @@ def merge_with_morton_order(
     @classmethod
     def create_empty(cls, chunks_per_shard: ChunkCoords) -> _ShardBuilder:
         obj = cls()
-        obj.buf = Buffer(bytearray(0))
+        obj.buf = Buffer(memoryview(b""))
         obj.index = _ShardIndex.create_empty(chunks_per_shard)
         return obj
 
diff --git a/src/zarr/v3/group.py b/src/zarr/v3/group.py
index d0f5754e6a..7cf51f673e 100644
--- a/src/zarr/v3/group.py
+++ b/src/zarr/v3/group.py
@@ -114,7 +114,7 @@ async def open(
             # (it is optional in the case of implicit groups)
             zarr_json_bytes = await (store_path / ZARR_JSON).get()
             zarr_json = (
-                json.loads(zarr_json_bytes.as_bytearray())
+                json.loads(zarr_json_bytes.to_bytes())
                 if zarr_json_bytes is not None
                 else {"zarr_format": 3}
             )
@@ -126,14 +126,12 @@ async def open(
                 (store_path / ZGROUP_JSON).get(), (store_path / ZATTRS_JSON).get()
             )
             zgroup = (
-                json.loads(json.loads(zgroup_bytes.as_bytearray()))
+                json.loads(json.loads(zgroup_bytes.to_bytes()))
                 if zgroup_bytes is not None
                 else {"zarr_format": 2}
             )
             zattrs = (
-                json.loads(json.loads(zattrs_bytes.as_bytearray()))
-                if zattrs_bytes is not None
-                else {}
+                json.loads(json.loads(zattrs_bytes.to_bytes())) if zattrs_bytes is not None else {}
             )
             zarr_json = {**zgroup, "attributes": zattrs}
         else:
@@ -171,7 +169,7 @@ async def getitem(
                     "attributes": {},
                 }
             else:
-                zarr_json = json.loads(zarr_json_bytes.as_bytearray())
+                zarr_json = json.loads(zarr_json_bytes.to_bytes())
             if zarr_json["node_type"] == "group":
                 return type(self).from_dict(store_path, zarr_json, self.runtime_configuration)
             elif zarr_json["node_type"] == "array":
@@ -190,9 +188,9 @@ async def getitem(
             )
 
             # unpack the zarray, if this is None then we must be opening a group
-            zarray = json.loads(zarray_bytes.as_bytearray()) if zarray_bytes else None
+            zarray = json.loads(zarray_bytes.to_bytes()) if zarray_bytes else None
             # unpack the zattrs, this can be None if no attrs were written
-            zattrs = json.loads(zattrs_bytes.as_bytearray()) if zattrs_bytes is not None else {}
+            zattrs = json.loads(zattrs_bytes.to_bytes()) if zattrs_bytes is not None else {}
 
             if zarray is not None:
                 # TODO: update this once the V2 array support is part of the primary array class
@@ -205,7 +203,7 @@ async def getitem(
                     # implicit group?
                     logger.warning("group at {} is an implicit group", store_path)
                 zgroup = (
-                    json.loads(zgroup_bytes.as_bytearray())
+                    json.loads(zgroup_bytes.to_bytes())
                     if zgroup_bytes is not None
                     else {"zarr_format": self.metadata.zarr_format}
                 )
diff --git a/src/zarr/v3/store/local.py b/src/zarr/v3/store/local.py
index 502c52f155..b9ab68e53c 100644
--- a/src/zarr/v3/store/local.py
+++ b/src/zarr/v3/store/local.py
@@ -41,9 +41,9 @@ def _put(
     if start is not None:
         with path.open("r+b") as f:
             f.seek(start)
-            f.write(value.as_bytearray())
+            f.write(value.memoryview())
     else:
-        return path.write_bytes(value.as_bytearray())
+        return path.write_bytes(value.memoryview())
 
 
 class LocalStore(Store):

From eb6d097946916e4956ee8bcece7baab2ca1d2bf7 Mon Sep 17 00:00:00 2001
From: "Mads R. B. Kristensen" <madsbk@gmail.com>
Date: Wed, 1 May 2024 16:39:46 +0200
Subject: [PATCH 03/45] use NDBuffer

---
 src/zarr/v3/abc/codec.py        |  19 +++--
 src/zarr/v3/array.py            |  37 ++++++----
 src/zarr/v3/array_v2.py         |  14 ++--
 src/zarr/v3/buffer.py           | 120 ++++++++++++++++++++++++++------
 src/zarr/v3/codecs/bytes.py     |  20 ++----
 src/zarr/v3/codecs/crc32c_.py   |   2 +-
 src/zarr/v3/codecs/pipeline.py  |  11 ++-
 src/zarr/v3/codecs/sharding.py  |  57 +++++++--------
 src/zarr/v3/codecs/transpose.py |  10 +--
 9 files changed, 182 insertions(+), 108 deletions(-)

diff --git a/src/zarr/v3/abc/codec.py b/src/zarr/v3/abc/codec.py
index 796f321465..4dd2207e09 100644
--- a/src/zarr/v3/abc/codec.py
+++ b/src/zarr/v3/abc/codec.py
@@ -3,11 +3,10 @@
 from abc import abstractmethod
 from typing import TYPE_CHECKING, Optional
 
-import numpy as np
 from zarr.v3.abc.metadata import Metadata
 
 from zarr.v3.common import ArraySpec
-from zarr.v3.buffer import Buffer
+from zarr.v3.buffer import Buffer, NDBuffer
 from zarr.v3.store import StorePath
 
 
@@ -39,19 +38,19 @@ class ArrayArrayCodec(Codec):
     @abstractmethod
     async def decode(
         self,
-        chunk_array: np.ndarray,
+        chunk_array: NDBuffer,
         chunk_spec: ArraySpec,
         runtime_configuration: RuntimeConfiguration,
-    ) -> np.ndarray:
+    ) -> NDBuffer:
         pass
 
     @abstractmethod
     async def encode(
         self,
-        chunk_array: np.ndarray,
+        chunk_array: NDBuffer,
         chunk_spec: ArraySpec,
         runtime_configuration: RuntimeConfiguration,
-    ) -> Optional[np.ndarray]:
+    ) -> Optional[NDBuffer]:
         pass
 
 
@@ -62,13 +61,13 @@ async def decode(
         chunk_array: Buffer,
         chunk_spec: ArraySpec,
         runtime_configuration: RuntimeConfiguration,
-    ) -> np.ndarray:
+    ) -> NDBuffer:
         pass
 
     @abstractmethod
     async def encode(
         self,
-        chunk_array: np.ndarray,
+        chunk_array: NDBuffer,
         chunk_spec: ArraySpec,
         runtime_configuration: RuntimeConfiguration,
     ) -> Optional[Buffer]:
@@ -83,7 +82,7 @@ async def decode_partial(
         selection: SliceSelection,
         chunk_spec: ArraySpec,
         runtime_configuration: RuntimeConfiguration,
-    ) -> Optional[np.ndarray]:
+    ) -> Optional[NDBuffer]:
         pass
 
 
@@ -92,7 +91,7 @@ class ArrayBytesCodecPartialEncodeMixin:
     async def encode_partial(
         self,
         store_path: StorePath,
-        chunk_array: np.ndarray,
+        chunk_array: NDBuffer,
         selection: SliceSelection,
         chunk_spec: ArraySpec,
         runtime_configuration: RuntimeConfiguration,
diff --git a/src/zarr/v3/array.py b/src/zarr/v3/array.py
index 003fa77194..550b978f91 100644
--- a/src/zarr/v3/array.py
+++ b/src/zarr/v3/array.py
@@ -35,7 +35,7 @@
 from zarr.v3.chunk_grids import RegularChunkGrid
 from zarr.v3.chunk_key_encodings import DefaultChunkKeyEncoding, V2ChunkKeyEncoding
 from zarr.v3.metadata import ArrayMetadata
-from zarr.v3.buffer import as_buffer
+from zarr.v3.buffer import NDBuffer, as_buffer, as_nd_buffer
 from zarr.v3.store import StoreLike, StorePath, make_store_path
 from zarr.v3.sync import sync
 
@@ -202,8 +202,8 @@ async def getitem(self, selection: Selection):
         )
 
         # setup output array
-        out = np.zeros(
-            indexer.shape,
+        out = NDBuffer.create_zeros(
+            shape=indexer.shape,
             dtype=self.metadata.dtype,
             order=self.runtime_configuration.order,
         )
@@ -218,10 +218,11 @@ async def getitem(self, selection: Selection):
             self.runtime_configuration.concurrency,
         )
 
+        # We always return a numpy array to the user
         if out.shape:
-            return out
+            return out.as_numpy_array()
         else:
-            return out[()]
+            return out.as_numpy_array()[()]
 
     async def _save_metadata(self) -> None:
         await (self.store_path / ZARR_JSON).set(as_buffer(self.metadata.to_bytes()))
@@ -231,7 +232,7 @@ async def _read_chunk(
         chunk_coords: ChunkCoords,
         chunk_selection: SliceSelection,
         out_selection: SliceSelection,
-        out: np.ndarray,
+        out: NDBuffer,
     ):
         chunk_spec = self.metadata.get_chunk_spec(chunk_coords)
         chunk_key_encoding = self.metadata.chunk_key_encoding
@@ -258,6 +259,7 @@ async def _read_chunk(
                 out[out_selection] = self.metadata.fill_value
 
     async def setitem(self, selection: Selection, value: np.ndarray) -> None:
+        assert isinstance(value, np.ndarray)
         assert isinstance(self.metadata.chunk_grid, RegularChunkGrid)
         chunk_shape = self.metadata.chunk_grid.chunk_shape
         indexer = BasicIndexer(
@@ -279,6 +281,10 @@ async def setitem(self, selection: Selection, value: np.ndarray) -> None:
             if value.dtype.name != self.metadata.dtype.name:
                 value = value.astype(self.metadata.dtype, order="A")
 
+        # We accept a numpy array as input from the user and convert it to a NDBuffer.
+        # From this point onwards, we only pass Buffer and NDBuffer between components.
+        value = as_nd_buffer(value)
+
         # merging with existing data and encoding chunks
         await concurrent_map(
             [
@@ -297,12 +303,13 @@ async def setitem(self, selection: Selection, value: np.ndarray) -> None:
 
     async def _write_chunk(
         self,
-        value: np.ndarray,
+        value: NDBuffer,
         chunk_shape: ChunkCoords,
         chunk_coords: ChunkCoords,
         chunk_selection: SliceSelection,
         out_selection: SliceSelection,
     ):
+        assert isinstance(value, NDBuffer)
         chunk_spec = self.metadata.get_chunk_spec(chunk_coords)
         chunk_key_encoding = self.metadata.chunk_key_encoding
         chunk_key = chunk_key_encoding.encode_chunk_key(chunk_coords)
@@ -311,8 +318,8 @@ async def _write_chunk(
         if is_total_slice(chunk_selection, chunk_shape):
             # write entire chunks
             if np.isscalar(value):
-                chunk_array = np.empty(
-                    chunk_shape,
+                chunk_array = NDBuffer.create_empty(
+                    shape=chunk_shape,
                     dtype=self.metadata.dtype,
                 )
                 chunk_array.fill(value)
@@ -336,8 +343,8 @@ async def _write_chunk(
 
             # merge new value
             if chunk_bytes is None:
-                chunk_array = np.empty(
-                    chunk_shape,
+                chunk_array = NDBuffer.create_empty(
+                    shape=chunk_shape,
                     dtype=self.metadata.dtype,
                 )
                 chunk_array.fill(self.metadata.fill_value)
@@ -350,9 +357,9 @@ async def _write_chunk(
             await self._write_chunk_to_store(store_path, chunk_array, chunk_spec)
 
     async def _write_chunk_to_store(
-        self, store_path: StorePath, chunk_array: np.ndarray, chunk_spec: ArraySpec
+        self, store_path: StorePath, chunk_array: NDBuffer, chunk_spec: ArraySpec
     ):
-        if np.all(chunk_array == self.metadata.fill_value):
+        if np.all(chunk_array.as_numpy_array() == self.metadata.fill_value):
             # chunks that only contain fill_value will be removed
             await store_path.delete()
         else:
@@ -393,14 +400,14 @@ async def _delete_key(key: str) -> None:
         )
 
         # Write new metadata
-        await (self.store_path / ZARR_JSON).set(as_buffer(new_metadata))
+        await (self.store_path / ZARR_JSON).set(as_buffer(new_metadata.to_bytes()))
         return replace(self, metadata=new_metadata)
 
     async def update_attributes(self, new_attributes: Dict[str, Any]) -> AsyncArray:
         new_metadata = replace(self.metadata, attributes=new_attributes)
 
         # Write new metadata
-        await (self.store_path / ZARR_JSON).set(as_buffer(new_metadata))
+        await (self.store_path / ZARR_JSON).set(as_buffer(new_metadata.to_bytes()))
         return replace(self, metadata=new_metadata)
 
     def __repr__(self):
diff --git a/src/zarr/v3/array_v2.py b/src/zarr/v3/array_v2.py
index 2229c739ce..79a5b2ecad 100644
--- a/src/zarr/v3/array_v2.py
+++ b/src/zarr/v3/array_v2.py
@@ -23,7 +23,7 @@
 from zarr.v3.config import RuntimeConfiguration
 from zarr.v3.indexing import BasicIndexer, all_chunk_coords, is_total_slice
 from zarr.v3.metadata import ArrayV2Metadata
-from zarr.v3.buffer import as_buffer, as_bytearray
+from zarr.v3.buffer import NDBuffer, as_buffer, as_bytearray
 from zarr.v3.store import StoreLike, StorePath, make_store_path
 from zarr.v3.sync import sync
 
@@ -230,8 +230,8 @@ async def get_async(self, selection: Selection):
         )
 
         # setup output array
-        out = np.zeros(
-            indexer.shape,
+        out = NDBuffer.create_zeros(
+            shape=indexer.shape,
             dtype=self.metadata.dtype,
             order=self.metadata.order,
         )
@@ -347,8 +347,8 @@ async def _write_chunk(
         if is_total_slice(chunk_selection, chunk_shape):
             # write entire chunks
             if np.isscalar(value):
-                chunk_array = np.empty(
-                    chunk_shape,
+                chunk_array = NDBuffer.create_empty(
+                    shape=chunk_shape,
                     dtype=self.metadata.dtype,
                     order=self.metadata.order,
                 )
@@ -364,8 +364,8 @@ async def _write_chunk(
 
             # merge new value
             if tmp is None:
-                chunk_array = np.empty(
-                    chunk_shape,
+                chunk_array = NDBuffer.create_empty(
+                    shape=chunk_shape,
                     dtype=self.metadata.dtype,
                     order=self.metadata.order,
                 )
diff --git a/src/zarr/v3/buffer.py b/src/zarr/v3/buffer.py
index 3850eff6d4..e9affa3877 100644
--- a/src/zarr/v3/buffer.py
+++ b/src/zarr/v3/buffer.py
@@ -1,16 +1,83 @@
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Any, Optional
+import sys
+from typing import TYPE_CHECKING, Any, Iterable, Literal, Optional, Tuple
 import numpy as np
 
 
 if TYPE_CHECKING:
     from typing_extensions import Self
+    from zarr.v3.codecs.bytes import Endian
 
 
 class NDBuffer:
-    # TODO: replace np.ndarray with this n-dimensional buffer
-    pass
+    def __init__(self, array: np.ndarray):
+        assert isinstance(array, np.ndarray)
+        assert array.dtype != object
+        self._data = array
+
+    @classmethod
+    def create_empty(
+        cls, *, shape: Iterable[int], dtype: np.DTypeLike, order: Literal["C", "F"] = "C"
+    ):
+        return cls(np.empty(shape=shape, dtype=dtype, order=order))
+
+    @classmethod
+    def create_zeros(
+        cls, *, shape: Iterable[int], dtype: np.DTypeLike, order: Literal["C", "F"] = "C"
+    ):
+        return cls(np.zeros(shape=shape, dtype=dtype, order=order))
+
+    def as_numpy_array(self, dtype: Optional[np.DTypeLike] = None) -> np.ndarray:
+        if dtype is None:
+            return self._data
+        return self._data.astype(dtype=dtype, copy=False)
+
+    @property
+    def dtype(self) -> np.DTypeLike:
+        return self.as_numpy_array().dtype
+
+    @property
+    def shape(self) -> Tuple[int, ...]:
+        return self.as_numpy_array().shape
+
+    @property
+    def byteorder(self) -> Endian:
+        from zarr.v3.codecs.bytes import Endian
+
+        if self.dtype.byteorder == "<":
+            return Endian.little
+        elif self.dtype.byteorder == ">":
+            return Endian.big
+        else:
+            return Endian(sys.byteorder)
+
+    def reshape(self, newshape: Iterable[int]) -> Self:
+        return self.__class__(self.as_numpy_array().reshape(newshape))
+
+    def astype(self, dtype: np.DTypeLike, order: Literal["K", "A", "C", "F"] = "K") -> Self:
+        return self.__class__(self.as_numpy_array().astype(dtype=dtype, order=order))
+
+    def __getitem__(self, key) -> Self:
+        # print("__getitem__: \n", np.asanyarray(self.as_numpy_array().__getitem__(key)))
+        return self.__class__(np.asanyarray(self.as_numpy_array().__getitem__(key)))
+
+    def __setitem__(self, key, value) -> None:
+        if isinstance(value, NDBuffer):
+            value = value.as_numpy_array()
+        self.as_numpy_array().__setitem__(key, value)
+
+    def __len__(self) -> int:
+        return self.as_numpy_array().__len__()
+
+    def fill(self, value: Any) -> None:
+        self.as_numpy_array().fill(value)
+
+    def copy(self) -> Self:
+        return self.__class__(self.as_numpy_array().copy())
+
+    def transpose(self, *axes) -> Self:
+        return self.__class__(self.as_numpy_array().transpose(*axes))
 
 
 class Buffer(NDBuffer):
@@ -21,43 +88,52 @@ class Buffer(NDBuffer):
     of memory such as CUDA device memory.
     """
 
-    def __init__(self, data: memoryview):
-        assert isinstance(data, memoryview)
-        assert data.ndim == 1
-        assert data.contiguous
-        assert data.itemsize == 1
-        self._data = data
+    @classmethod
+    def create_empty(
+        cls, *, shape: Iterable[int], dtype: np.DTypeLike = "b", order: Literal["C", "F"] = "C"
+    ):
+        return cls(np.empty(shape=shape, dtype=dtype, order=order))
 
     def memoryview(self) -> memoryview:
-        return self._data
+        return memoryview(self._data.reshape(-1).view(dtype="b"))
+
+    def as_numpy_array(self, dtype: Optional[np.DTypeLike] = "b") -> np.ndarray:
+        return self._data.reshape(-1).view(dtype=dtype)
 
     def to_bytes(self) -> bytes:
         return bytes(self.memoryview())
 
-    def as_numpy_array(self, dtype: np.DTypeLike) -> np.ndarray:
-        return np.frombuffer(self.memoryview(), dtype=dtype)
-
     def __getitem__(self, key) -> Self:
-        return self.__class__(self.memoryview().__getitem__(key))
+        return self.__class__(self.as_numpy_array().__getitem__(key))
 
     def __setitem__(self, key, value) -> None:
-        self.memoryview().__setitem__(key, value)
+        self.as_numpy_array().__setitem__(key, value)
 
     def __len__(self) -> int:
-        return len(self.memoryview())
+        return self._data.nbytes
 
     def __add__(self, other: Buffer) -> Self:
-        return self.__class__(memoryview(self.to_bytes() + other.to_bytes()))
+        return self.__class__(np.frombuffer(self.to_bytes() + other.to_bytes(), dtype="b"))
+
+
+def as_nd_buffer(data: Any) -> NDBuffer:
+    if isinstance(data, NDBuffer):
+        return data
+    return NDBuffer(np.asanyarray(data))
+
+
+def as_ndarray(data: Optional[NDBuffer]) -> Optional[np.ndarray]:
+    if data is None:
+        return data
+    return data.as_numpy_array()
 
 
 def as_buffer(data: Any) -> Buffer:
     if isinstance(data, Buffer):
         return data
-    if isinstance(data, bytearray | bytes):
-        return Buffer(memoryview(data))
-    if hasattr(data, "to_bytes"):
-        return as_buffer(memoryview(data.to_bytes()))
-    return Buffer(memoryview(np.asanyarray(data).reshape(-1).view(dtype="int8")))
+    if isinstance(data, NDBuffer):
+        return Buffer(data.as_numpy_array())
+    return Buffer(np.asanyarray(data))
 
 
 def as_bytes_wrapper(func, buf: Buffer) -> Buffer:
diff --git a/src/zarr/v3/codecs/bytes.py b/src/zarr/v3/codecs/bytes.py
index 6ae9e7ddda..f0e5c04d30 100644
--- a/src/zarr/v3/codecs/bytes.py
+++ b/src/zarr/v3/codecs/bytes.py
@@ -10,7 +10,7 @@
 from zarr.v3.abc.codec import ArrayBytesCodec
 from zarr.v3.codecs.registry import register_codec
 from zarr.v3.common import parse_enum, parse_named_configuration
-from zarr.v3.buffer import Buffer, as_buffer
+from zarr.v3.buffer import Buffer, NDBuffer, as_buffer
 
 if TYPE_CHECKING:
     from zarr.v3.common import JSON, ArraySpec
@@ -61,20 +61,12 @@ def evolve(self, array_spec: ArraySpec) -> Self:
             )
         return self
 
-    def _get_byteorder(self, array: np.ndarray) -> Endian:
-        if array.dtype.byteorder == "<":
-            return Endian.little
-        elif array.dtype.byteorder == ">":
-            return Endian.big
-        else:
-            return default_system_endian
-
     async def decode(
         self,
         chunk_bytes: Buffer,
         chunk_spec: ArraySpec,
         _runtime_configuration: RuntimeConfiguration,
-    ) -> np.ndarray:
+    ) -> NDBuffer:
         assert isinstance(chunk_bytes, Buffer)
         if chunk_spec.dtype.itemsize > 0:
             if self.endian == Endian.little:
@@ -84,7 +76,7 @@ async def decode(
             dtype = np.dtype(f"{prefix}{chunk_spec.dtype.str[1:]}")
         else:
             dtype = np.dtype(f"|{chunk_spec.dtype.str[1:]}")
-        chunk_array = chunk_bytes.as_numpy_array(dtype)
+        chunk_array = NDBuffer(chunk_bytes.as_numpy_array(dtype))
 
         # ensure correct chunk shape
         if chunk_array.shape != chunk_spec.shape:
@@ -95,13 +87,13 @@ async def decode(
 
     async def encode(
         self,
-        chunk_array: np.ndarray,
+        chunk_array: NDBuffer,
         _chunk_spec: ArraySpec,
         _runtime_configuration: RuntimeConfiguration,
     ) -> Optional[Buffer]:
+        assert isinstance(chunk_array, NDBuffer)
         if chunk_array.dtype.itemsize > 1:
-            byteorder = self._get_byteorder(chunk_array)
-            if self.endian is not None and self.endian != byteorder:
+            if self.endian is not None and self.endian != chunk_array.byteorder:
                 new_dtype = chunk_array.dtype.newbyteorder(self.endian.name)
                 chunk_array = chunk_array.astype(new_dtype)
         return as_buffer(chunk_array)
diff --git a/src/zarr/v3/codecs/crc32c_.py b/src/zarr/v3/codecs/crc32c_.py
index ba32455199..4d174eaed0 100644
--- a/src/zarr/v3/codecs/crc32c_.py
+++ b/src/zarr/v3/codecs/crc32c_.py
@@ -48,7 +48,7 @@ async def decode(
                 "Stored and computed checksum do not match. "
                 + f"Stored: {stored_checksum!r}. Computed: {computed_checksum!r}."
             )
-        return Buffer(inner_bytes)
+        return as_buffer(inner_bytes)
 
     async def encode(
         self,
diff --git a/src/zarr/v3/codecs/pipeline.py b/src/zarr/v3/codecs/pipeline.py
index 91ba1926cc..bd68f58e36 100644
--- a/src/zarr/v3/codecs/pipeline.py
+++ b/src/zarr/v3/codecs/pipeline.py
@@ -1,7 +1,6 @@
 from __future__ import annotations
 
 from typing import TYPE_CHECKING, Iterable
-import numpy as np
 from dataclasses import dataclass
 from warnings import warn
 
@@ -16,7 +15,7 @@
 from zarr.v3.abc.metadata import Metadata
 from zarr.v3.codecs.registry import get_codec_class
 from zarr.v3.common import parse_named_configuration
-from zarr.v3.buffer import Buffer
+from zarr.v3.buffer import Buffer, NDBuffer
 
 if TYPE_CHECKING:
     from typing import Iterator, List, Optional, Tuple, Union
@@ -153,7 +152,7 @@ async def decode(
         chunk_bytes: Buffer,
         array_spec: ArraySpec,
         runtime_configuration: RuntimeConfiguration,
-    ) -> np.ndarray:
+    ) -> NDBuffer:
         (
             aa_codecs_with_spec,
             ab_codec_with_spec,
@@ -177,7 +176,7 @@ async def decode_partial(
         selection: SliceSelection,
         chunk_spec: ArraySpec,
         runtime_configuration: RuntimeConfiguration,
-    ) -> Optional[np.ndarray]:
+    ) -> Optional[NDBuffer]:
         assert self.supports_partial_decode
         assert isinstance(self.array_bytes_codec, ArrayBytesCodecPartialDecodeMixin)
         return await self.array_bytes_codec.decode_partial(
@@ -186,7 +185,7 @@ async def decode_partial(
 
     async def encode(
         self,
-        chunk_array: np.ndarray,
+        chunk_array: NDBuffer,
         array_spec: ArraySpec,
         runtime_configuration: RuntimeConfiguration,
     ) -> Optional[Buffer]:
@@ -224,7 +223,7 @@ async def encode(
     async def encode_partial(
         self,
         store_path: StorePath,
-        chunk_array: np.ndarray,
+        chunk_array: NDBuffer,
         selection: SliceSelection,
         chunk_spec: ArraySpec,
         runtime_configuration: RuntimeConfiguration,
diff --git a/src/zarr/v3/codecs/sharding.py b/src/zarr/v3/codecs/sharding.py
index 849103a38e..fbb037ef91 100644
--- a/src/zarr/v3/codecs/sharding.py
+++ b/src/zarr/v3/codecs/sharding.py
@@ -37,7 +37,7 @@
     runtime_configuration as make_runtime_configuration,
     parse_codecs,
 )
-from zarr.v3.buffer import Buffer, as_buffer
+from zarr.v3.buffer import Buffer, NDBuffer, as_buffer, as_nd_buffer
 
 if TYPE_CHECKING:
     from typing import Awaitable, Callable, Dict, Iterator, List, Optional, Set, Tuple
@@ -149,7 +149,7 @@ async def from_bytes(
     def create_empty(cls, chunks_per_shard: ChunkCoords) -> _ShardProxy:
         index = _ShardIndex.create_empty(chunks_per_shard)
         obj = cls()
-        obj.buf = as_buffer(memoryview(b""))
+        obj.buf = as_buffer(np.array([], dtype="b"))
         obj.index = index
         return obj
 
@@ -191,7 +191,7 @@ def merge_with_morton_order(
     @classmethod
     def create_empty(cls, chunks_per_shard: ChunkCoords) -> _ShardBuilder:
         obj = cls()
-        obj.buf = Buffer(memoryview(b""))
+        obj.buf = as_buffer(np.array([], dtype="b"))
         obj.index = _ShardIndex.create_empty(chunks_per_shard)
         return obj
 
@@ -213,7 +213,7 @@ async def finalize(
             out_buf = index_bytes + self.buf
         else:
             out_buf = self.buf + index_bytes
-        return as_buffer(out_buf)
+        return out_buf
 
 
 @dataclass(frozen=True)
@@ -301,7 +301,7 @@ async def decode(
         shard_bytes: Buffer,
         shard_spec: ArraySpec,
         runtime_configuration: RuntimeConfiguration,
-    ) -> np.ndarray:
+    ) -> NDBuffer:
         # print("decode")
         shard_shape = shard_spec.shape
         chunk_shape = self.chunk_shape
@@ -314,8 +314,8 @@ async def decode(
         )
 
         # setup output array
-        out = np.zeros(
-            shard_shape,
+        out = NDBuffer.create_zeros(
+            shape=shard_shape,
             dtype=shard_spec.dtype,
             order=runtime_configuration.order,
         )
@@ -351,7 +351,7 @@ async def decode_partial(
         selection: SliceSelection,
         shard_spec: ArraySpec,
         runtime_configuration: RuntimeConfiguration,
-    ) -> Optional[np.ndarray]:
+    ) -> Optional[NDBuffer]:
         shard_shape = shard_spec.shape
         chunk_shape = self.chunk_shape
         chunks_per_shard = self._get_chunks_per_shard(shard_spec)
@@ -363,8 +363,8 @@ async def decode_partial(
         )
 
         # setup output array
-        out = np.zeros(
-            indexer.shape,
+        out = NDBuffer.create_zeros(
+            shape=indexer.shape,
             dtype=shard_spec.dtype,
             order=runtime_configuration.order,
         )
@@ -410,7 +410,6 @@ async def decode_partial(
             self._read_chunk,
             runtime_configuration.concurrency,
         )
-
         return out
 
     async def _read_chunk(
@@ -421,7 +420,7 @@ async def _read_chunk(
         out_selection: SliceSelection,
         shard_spec: ArraySpec,
         runtime_configuration: RuntimeConfiguration,
-        out: np.ndarray,
+        out: NDBuffer,
     ):
         chunk_spec = self._get_chunk_spec(shard_spec)
         chunk_bytes = shard_dict.get(chunk_coords, None)
@@ -434,7 +433,7 @@ async def _read_chunk(
 
     async def encode(
         self,
-        shard_array: np.ndarray,
+        shard_array: NDBuffer,
         shard_spec: ArraySpec,
         runtime_configuration: RuntimeConfiguration,
     ) -> Optional[Buffer]:
@@ -451,22 +450,23 @@ async def encode(
         )
 
         async def _write_chunk(
-            shard_array: np.ndarray,
+            shard_array: NDBuffer,
             chunk_coords: ChunkCoords,
             chunk_selection: SliceSelection,
             out_selection: SliceSelection,
         ) -> Tuple[ChunkCoords, Optional[Buffer]]:
+            assert isinstance(shard_array, NDBuffer)
             if is_total_slice(chunk_selection, chunk_shape):
                 chunk_array = shard_array[out_selection]
             else:
                 # handling writing partial chunks
-                chunk_array = np.empty(
-                    chunk_shape,
+                chunk_array = NDBuffer.create_empty(
+                    shape=chunk_shape,
                     dtype=shard_spec.dtype,
                 )
                 chunk_array.fill(shard_spec.fill_value)
                 chunk_array[chunk_selection] = shard_array[out_selection]
-            if not np.array_equiv(chunk_array, shard_spec.fill_value):
+            if not np.array_equiv(chunk_array.as_numpy_array(), shard_spec.fill_value):
                 chunk_spec = self._get_chunk_spec(shard_spec)
                 return (
                     chunk_coords,
@@ -496,7 +496,7 @@ async def _write_chunk(
     async def encode_partial(
         self,
         store_path: StorePath,
-        shard_array: np.ndarray,
+        shard_array: NDBuffer,
         selection: SliceSelection,
         shard_spec: ArraySpec,
         runtime_configuration: RuntimeConfiguration,
@@ -526,7 +526,6 @@ async def _write_chunk(
             chunk_selection: SliceSelection,
             out_selection: SliceSelection,
         ) -> Tuple[ChunkCoords, Optional[Buffer]]:
-            chunk_array = None
             if is_total_slice(chunk_selection, self.chunk_shape):
                 chunk_array = shard_array[out_selection]
             else:
@@ -536,8 +535,8 @@ async def _write_chunk(
 
                 # merge new value
                 if chunk_bytes is None:
-                    chunk_array = np.empty(
-                        self.chunk_shape,
+                    chunk_array = NDBuffer.create_empty(
+                        shape=self.chunk_shape,
                         dtype=shard_spec.dtype,
                     )
                     chunk_array.fill(shard_spec.fill_value)
@@ -547,7 +546,7 @@ async def _write_chunk(
                     ).copy()  # make a writable copy
                 chunk_array[chunk_selection] = shard_array[out_selection]
 
-            if not np.array_equiv(chunk_array, shard_spec.fill_value):
+            if not np.array_equiv(chunk_array.as_numpy_array(), shard_spec.fill_value):
                 return (
                     chunk_coords,
                     await self.codecs.encode(chunk_array, chunk_spec, runtime_configuration),
@@ -602,16 +601,18 @@ async def _decode_shard_index(
         self, index_bytes: Buffer, chunks_per_shard: ChunkCoords
     ) -> _ShardIndex:
         return _ShardIndex(
-            await self.index_codecs.decode(
-                index_bytes,
-                self._get_index_chunk_spec(chunks_per_shard),
-                make_runtime_configuration("C"),
-            )
+            (
+                await self.index_codecs.decode(
+                    index_bytes,
+                    self._get_index_chunk_spec(chunks_per_shard),
+                    make_runtime_configuration("C"),
+                )
+            ).as_numpy_array()
         )
 
     async def _encode_shard_index(self, index: _ShardIndex) -> Buffer:
         index_bytes = await self.index_codecs.encode(
-            index.offsets_and_lengths,
+            as_nd_buffer(index.offsets_and_lengths),
             self._get_index_chunk_spec(index.chunks_per_shard),
             make_runtime_configuration("C"),
         )
diff --git a/src/zarr/v3/codecs/transpose.py b/src/zarr/v3/codecs/transpose.py
index b663230e35..b09072705e 100644
--- a/src/zarr/v3/codecs/transpose.py
+++ b/src/zarr/v3/codecs/transpose.py
@@ -3,6 +3,7 @@
 
 from dataclasses import dataclass, replace
 
+from zarr.v3.buffer import NDBuffer
 from zarr.v3.common import JSON, ArraySpec, ChunkCoordsLike, parse_named_configuration
 
 if TYPE_CHECKING:
@@ -10,7 +11,6 @@
     from typing import TYPE_CHECKING, Optional, Tuple
     from typing_extensions import Self
 
-import numpy as np
 
 from zarr.v3.abc.codec import ArrayArrayCodec
 from zarr.v3.codecs.registry import register_codec
@@ -75,10 +75,10 @@ def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec:
 
     async def decode(
         self,
-        chunk_array: np.ndarray,
+        chunk_array: NDBuffer,
         chunk_spec: ArraySpec,
         _runtime_configuration: RuntimeConfiguration,
-    ) -> np.ndarray:
+    ) -> NDBuffer:
         inverse_order = [0] * chunk_spec.ndim
         for x, i in enumerate(self.order):
             inverse_order[x] = i
@@ -87,10 +87,10 @@ async def decode(
 
     async def encode(
         self,
-        chunk_array: np.ndarray,
+        chunk_array: NDBuffer,
         chunk_spec: ArraySpec,
         _runtime_configuration: RuntimeConfiguration,
-    ) -> Optional[np.ndarray]:
+    ) -> Optional[NDBuffer]:
         chunk_array = chunk_array.transpose(self.order)
         return chunk_array
 

From 2982c9baf5303fd21b57346bf0756f20e03ab6e0 Mon Sep 17 00:00:00 2001
From: "Mads R. B. Kristensen" <madsbk@gmail.com>
Date: Thu, 2 May 2024 09:32:12 +0200
Subject: [PATCH 04/45] convert to Buffer for the v2 tests

---
 src/zarr/buffer.py       | 7 +++++++
 src/zarr/store/local.py  | 5 +++++
 src/zarr/store/memory.py | 5 ++++-
 3 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/src/zarr/buffer.py b/src/zarr/buffer.py
index 644668c104..68de0a63e7 100644
--- a/src/zarr/buffer.py
+++ b/src/zarr/buffer.py
@@ -115,6 +115,13 @@ def __len__(self) -> int:
     def __add__(self, other: Buffer) -> Self:
         return self.__class__(np.frombuffer(self.to_bytes() + other.to_bytes(), dtype="b"))
 
+    def __eq__(self, other: Any) -> bool:
+        if isinstance(other, (bytes, bytearray)):
+            return self.to_bytes() == other
+        raise ValueError(
+            f"equal operator not supported between {self.__class__} and {other.__class__}"
+        )
+
 
 def as_nd_buffer(data: Any) -> NDBuffer:
     if isinstance(data, NDBuffer):
diff --git a/src/zarr/store/local.py b/src/zarr/store/local.py
index 059e122636..6e1f353f43 100644
--- a/src/zarr/store/local.py
+++ b/src/zarr/store/local.py
@@ -98,6 +98,11 @@ async def get_partial_values(
 
     async def set(self, key: str, value: Buffer) -> None:
         assert isinstance(key, str)
+        if isinstance(value, (bytes, bytearray)):
+            # TODO: to support the v2 tests, we convert bytes to Buffer here
+            value = as_buffer(value)
+        if not isinstance(value, Buffer):
+            raise TypeError("LocalStore.set(): `value` must a Buffer instance")
         path = self.root / key
         await to_thread(_put, path, value)
 
diff --git a/src/zarr/store/memory.py b/src/zarr/store/memory.py
index d496c3cb5f..93d2ace150 100644
--- a/src/zarr/store/memory.py
+++ b/src/zarr/store/memory.py
@@ -3,7 +3,7 @@
 from typing import Optional, MutableMapping, List, Tuple
 
 from zarr.abc.store import Store
-from zarr.buffer import Buffer
+from zarr.buffer import Buffer, as_buffer
 
 
 # TODO: this store could easily be extended to wrap any MutuableMapping store from v2
@@ -48,6 +48,9 @@ async def set(
         self, key: str, value: Buffer, byte_range: Optional[Tuple[int, int]] = None
     ) -> None:
         assert isinstance(key, str)
+        if isinstance(value, (bytes, bytearray)):
+            # TODO: to support the v2 tests, we convert bytes to Buffer here
+            value = as_buffer(value)
         if not isinstance(value, Buffer):
             raise TypeError(f"Expected Buffer. Got {type(value)}.")
 

From 45ad25471f4fcb1f50aa924cd2da44414337aaea Mon Sep 17 00:00:00 2001
From: "Mads R. B. Kristensen" <madsbk@gmail.com>
Date: Thu, 2 May 2024 09:37:44 +0200
Subject: [PATCH 05/45] clean up

---
 tests/v3/test_codecs.py | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/tests/v3/test_codecs.py b/tests/v3/test_codecs.py
index 9607ced37d..4f84f99436 100644
--- a/tests/v3/test_codecs.py
+++ b/tests/v3/test_codecs.py
@@ -296,7 +296,7 @@ async def test_order(
             fill_value=1,
         )
         z[:, :] = data
-        assert as_bytearray(await (store / "order/0.0").get()) == z._store["0.0"]
+        assert (await (store / "order/0.0").get()) == z._store["0.0"]
 
 
 @pytest.mark.parametrize("input_order", ["F", "C"])
@@ -672,10 +672,10 @@ async def test_zarr_compat(store: Store):
     assert np.array_equal(data, await _AsyncArrayProxy(a)[:16, :18].get())
     assert np.array_equal(data, z2[:16, :18])
 
-    assert z2._store["0.0"] == as_bytearray(await (store / "zarr_compat3/0.0").get())
-    assert z2._store["0.1"] == as_bytearray(await (store / "zarr_compat3/0.1").get())
-    assert z2._store["1.0"] == as_bytearray(await (store / "zarr_compat3/1.0").get())
-    assert z2._store["1.1"] == as_bytearray(await (store / "zarr_compat3/1.1").get())
+    assert z2._store["0.0"] == await (store / "zarr_compat3/0.0").get()
+    assert z2._store["0.1"] == await (store / "zarr_compat3/0.1").get()
+    assert z2._store["1.0"] == await (store / "zarr_compat3/1.0").get()
+    assert z2._store["1.1"] == await (store / "zarr_compat3/1.1").get()
 
 
 @pytest.mark.asyncio
@@ -706,10 +706,10 @@ async def test_zarr_compat_F(store: Store):
     assert np.array_equal(data, await _AsyncArrayProxy(a)[:16, :18].get())
     assert np.array_equal(data, z2[:16, :18])
 
-    assert z2._store["0.0"] == as_bytearray(await (store / "zarr_compatF3/0.0").get())
-    assert z2._store["0.1"] == as_bytearray(await (store / "zarr_compatF3/0.1").get())
-    assert z2._store["1.0"] == as_bytearray(await (store / "zarr_compatF3/1.0").get())
-    assert z2._store["1.1"] == as_bytearray(await (store / "zarr_compatF3/1.1").get())
+    assert z2._store["0.0"] == await (store / "zarr_compatF3/0.0").get()
+    assert z2._store["0.1"] == await (store / "zarr_compatF3/0.1").get()
+    assert z2._store["1.0"] == await (store / "zarr_compatF3/1.0").get()
+    assert z2._store["1.1"] == await (store / "zarr_compatF3/1.1").get()
 
 
 @pytest.mark.asyncio
@@ -805,7 +805,7 @@ async def test_endian(store: Store, endian: Literal["big", "little"]):
         fill_value=1,
     )
     z[:, :] = data
-    assert as_bytearray(await (store / "endian/0.0").get()) == z._store["0.0"]
+    assert await (store / "endian/0.0").get() == z._store["0.0"]
 
 
 @pytest.mark.parametrize("dtype_input_endian", [">u2", "<u2"])
@@ -841,7 +841,7 @@ async def test_endian_write(
         fill_value=1,
     )
     z[:, :] = data
-    assert as_bytearray(await (store / "endian/0.0").get()) == z._store["0.0"]
+    assert await (store / "endian/0.0").get() == z._store["0.0"]
 
 
 def test_invalid_metadata(store: Store):

From d848bbebf49712e5707aa5622d9d3d755a68aeca Mon Sep 17 00:00:00 2001
From: "Mads R. B. Kristensen" <madsbk@gmail.com>
Date: Mon, 6 May 2024 11:57:59 +0200
Subject: [PATCH 06/45] spilling

---
 src/zarr/store/memory.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/zarr/store/memory.py b/src/zarr/store/memory.py
index 93d2ace150..c2e7cf3807 100644
--- a/src/zarr/store/memory.py
+++ b/src/zarr/store/memory.py
@@ -6,7 +6,7 @@
 from zarr.buffer import Buffer, as_buffer
 
 
-# TODO: this store could easily be extended to wrap any MutuableMapping store from v2
+# TODO: this store could easily be extended to wrap any MutableMapping store from v2
 # When that is done, the `MemoryStore` will just be a store that wraps a dict.
 class MemoryStore(Store):
     supports_writes: bool = True

From 71dcff1df83b18c186d15e35ad587e9dc6aac772 Mon Sep 17 00:00:00 2001
From: "Mads R. B. Kristensen" <madsbk@gmail.com>
Date: Mon, 6 May 2024 12:03:28 +0200
Subject: [PATCH 07/45] remove return_as_bytes_wrapper

---
 src/zarr/buffer.py       | 4 ----
 src/zarr/codecs/blosc.py | 4 ++--
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/src/zarr/buffer.py b/src/zarr/buffer.py
index 68de0a63e7..1ef264d246 100644
--- a/src/zarr/buffer.py
+++ b/src/zarr/buffer.py
@@ -147,10 +147,6 @@ def as_bytes_wrapper(func: Callable[[bytes], bytes], buf: Buffer) -> Buffer:
     return as_buffer(func(buf.to_bytes()))
 
 
-def return_as_bytes_wrapper(func: Callable[[Any], Any], *arg: Any, **kwargs: Any) -> Buffer:
-    return as_buffer(func(*arg, **kwargs))
-
-
 def as_bytearray(data: Optional[Buffer]) -> Optional[bytes]:
     if data is None:
         return data
diff --git a/src/zarr/codecs/blosc.py b/src/zarr/codecs/blosc.py
index 182eda3e8a..1e9d6ab153 100644
--- a/src/zarr/codecs/blosc.py
+++ b/src/zarr/codecs/blosc.py
@@ -9,7 +9,7 @@
 from numcodecs.blosc import Blosc
 
 from zarr.abc.codec import BytesBytesCodec
-from zarr.buffer import Buffer, as_bytes_wrapper, return_as_bytes_wrapper
+from zarr.buffer import Buffer, as_buffer, as_bytes_wrapper
 from zarr.codecs.registry import register_codec
 from zarr.common import parse_enum, parse_named_configuration, to_thread
 
@@ -174,7 +174,7 @@ async def encode(
         _runtime_configuration: RuntimeConfiguration,
     ) -> Optional[Buffer]:
         chunk_array = chunk_bytes.as_numpy_array(chunk_spec.dtype)
-        return await to_thread(return_as_bytes_wrapper, self._blosc_codec.encode, chunk_array)
+        return await to_thread(lambda: as_buffer(self._blosc_codec.encode(chunk_array)))
 
     def compute_encoded_size(self, _input_byte_length: int, _chunk_spec: ArraySpec) -> int:
         raise NotImplementedError

From 48edc4e4750623430e633042afe0079cdb6ab7c1 Mon Sep 17 00:00:00 2001
From: "Mads R. B. Kristensen" <madsbk@gmail.com>
Date: Mon, 6 May 2024 12:06:30 +0200
Subject: [PATCH 08/45] remove as_ndarray

---
 src/zarr/buffer.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/src/zarr/buffer.py b/src/zarr/buffer.py
index 1ef264d246..5b4275cf84 100644
--- a/src/zarr/buffer.py
+++ b/src/zarr/buffer.py
@@ -129,12 +129,6 @@ def as_nd_buffer(data: Any) -> NDBuffer:
     return NDBuffer(np.asanyarray(data))
 
 
-def as_ndarray(data: Optional[NDBuffer]) -> Optional[np.ndarray]:
-    if data is None:
-        return data
-    return data.as_numpy_array()
-
-
 def as_buffer(data: Any) -> Buffer:
     if isinstance(data, Buffer):
         return data

From 5a83442ac7ee22661e1334e1161cfcb5c1a21381 Mon Sep 17 00:00:00 2001
From: "Mads R. B. Kristensen" <madsbk@gmail.com>
Date: Mon, 6 May 2024 12:12:29 +0200
Subject: [PATCH 09/45] doc

---
 src/zarr/buffer.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/zarr/buffer.py b/src/zarr/buffer.py
index 5b4275cf84..14378dfd89 100644
--- a/src/zarr/buffer.py
+++ b/src/zarr/buffer.py
@@ -11,6 +11,13 @@
 
 
 class NDBuffer:
+    """A n-dimensional memory block
+
+    We use `NDBuffer` throughout Zarr to represent a block of memory.
+    For now, we only support host memory but the plan is to support other types
+    of memory such as CUDA device memory.
+    """
+
     def __init__(self, array: np.ndarray):
         assert isinstance(array, np.ndarray)
         assert array.dtype != object
@@ -81,12 +88,7 @@ def transpose(self, *axes: np.SupportsIndex) -> Self:
 
 
 class Buffer(NDBuffer):
-    """Contiguous memory block
-
-    We use `Buffer` throughout Zarr to represent a contiguous block of memory.
-    For now, we only support host memory but the plan is to support other types
-    of memory such as CUDA device memory.
-    """
+    """A flat contiguous version of `NDBuffer` with an item size of 1"""
 
     @classmethod
     def create_empty(

From e6d49f39f317c7bd54334ee57f7cc07527a424fc Mon Sep 17 00:00:00 2001
From: "Mads R. B. Kristensen" <madsbk@gmail.com>
Date: Mon, 6 May 2024 12:14:01 +0200
Subject: [PATCH 10/45] clean up

---
 src/zarr/buffer.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/zarr/buffer.py b/src/zarr/buffer.py
index 14378dfd89..04774963bb 100644
--- a/src/zarr/buffer.py
+++ b/src/zarr/buffer.py
@@ -66,7 +66,6 @@ def astype(self, dtype: np.DTypeLike, order: Literal["K", "A", "C", "F"] = "K")
         return self.__class__(self.as_numpy_array().astype(dtype=dtype, order=order))
 
     def __getitem__(self, key: Any) -> Self:
-        # print("__getitem__: \n", np.asanyarray(self.as_numpy_array().__getitem__(key)))
         return self.__class__(np.asanyarray(self.as_numpy_array().__getitem__(key)))
 
     def __setitem__(self, key: Any, value: Any) -> None:

From 009ad29c403420d31fc017e510cecc7529896bc8 Mon Sep 17 00:00:00 2001
From: "Mads R. B. Kristensen" <madsbk@gmail.com>
Date: Wed, 8 May 2024 13:39:15 +0200
Subject: [PATCH 11/45] as_buffer(): handle bytes like

---
 src/zarr/buffer.py        |  4 +++-
 src/zarr/group.py         |  6 +++---
 src/zarr/testing/store.py | 21 +++++++++++----------
 3 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/src/zarr/buffer.py b/src/zarr/buffer.py
index 04774963bb..c753c85b5c 100644
--- a/src/zarr/buffer.py
+++ b/src/zarr/buffer.py
@@ -41,7 +41,7 @@ def as_numpy_array(self, dtype: Optional[np.DTypeLike] = None) -> np.ndarray:
         return self._data.astype(dtype=dtype, copy=False)
 
     @property
-    def dtype(self) -> np.DTypeLike:
+    def dtype(self) -> np.dtype[Any]:
         return self.as_numpy_array().dtype
 
     @property
@@ -135,6 +135,8 @@ def as_buffer(data: Any) -> Buffer:
         return data
     if isinstance(data, NDBuffer):
         return Buffer(data.as_numpy_array())
+    if isinstance(data, (bytes, bytearray, memoryview)):
+        return Buffer(np.frombuffer(data, dtype="b"))
     return Buffer(np.asanyarray(data))
 
 
diff --git a/src/zarr/group.py b/src/zarr/group.py
index b7cf82a21d..852ab2c7ff 100644
--- a/src/zarr/group.py
+++ b/src/zarr/group.py
@@ -148,13 +148,13 @@ async def open(
         if zarr_format == 2:
             # V2 groups are comprised of a .zgroup and .zattrs objects
             assert zgroup_bytes is not None
-            zgroup = json.loads(zgroup_bytes)
-            zattrs = json.loads(zattrs_bytes) if zattrs_bytes is not None else {}
+            zgroup = json.loads(zgroup_bytes.to_bytes())
+            zattrs = json.loads(zattrs_bytes.to_bytes()) if zattrs_bytes is not None else {}
             group_metadata = {**zgroup, "attributes": zattrs}
         else:
             # V3 groups are comprised of a zarr.json object
             assert zarr_json_bytes is not None
-            group_metadata = json.loads(zarr_json_bytes)
+            group_metadata = json.loads(zarr_json_bytes.to_bytes())
 
         return cls.from_dict(store_path, group_metadata, runtime_configuration)
 
diff --git a/src/zarr/testing/store.py b/src/zarr/testing/store.py
index 601ef7f393..48f2ce46b0 100644
--- a/src/zarr/testing/store.py
+++ b/src/zarr/testing/store.py
@@ -1,6 +1,7 @@
 import pytest
 
 from zarr.abc.store import Store
+from zarr.buffer import as_buffer
 
 
 class StoreTests:
@@ -25,14 +26,14 @@ def test_store_capabilities(self, store: Store) -> None:
     @pytest.mark.parametrize("key", ["c/0", "foo/c/0.0", "foo/0/0"])
     @pytest.mark.parametrize("data", [b"\x01\x02\x03\x04", b""])
     async def test_set_get_bytes_roundtrip(self, store: Store, key: str, data: bytes) -> None:
-        await store.set(key, data)
+        await store.set(key, as_buffer(data))
         assert await store.get(key) == data
 
     @pytest.mark.parametrize("key", ["foo/c/0"])
     @pytest.mark.parametrize("data", [b"\x01\x02\x03\x04", b""])
     async def test_get_partial_values(self, store: Store, key: str, data: bytes) -> None:
         # put all of the data
-        await store.set(key, data)
+        await store.set(key, as_buffer(data))
         # read back just part of it
         vals = await store.get_partial_values([(key, (0, 2))])
         assert vals == [data[0:2]]
@@ -43,18 +44,18 @@ async def test_get_partial_values(self, store: Store, key: str, data: bytes) ->
 
     async def test_exists(self, store: Store) -> None:
         assert not await store.exists("foo")
-        await store.set("foo/zarr.json", b"bar")
+        await store.set("foo/zarr.json", as_buffer(b"bar"))
         assert await store.exists("foo/zarr.json")
 
     async def test_delete(self, store: Store) -> None:
-        await store.set("foo/zarr.json", b"bar")
+        await store.set("foo/zarr.json", as_buffer(b"bar"))
         assert await store.exists("foo/zarr.json")
         await store.delete("foo/zarr.json")
         assert not await store.exists("foo/zarr.json")
 
     async def test_list(self, store: Store) -> None:
         assert [k async for k in store.list()] == []
-        await store.set("foo/zarr.json", b"bar")
+        await store.set("foo/zarr.json", as_buffer(b"bar"))
         keys = [k async for k in store.list()]
         assert keys == ["foo/zarr.json"], keys
 
@@ -62,7 +63,7 @@ async def test_list(self, store: Store) -> None:
         for i in range(10):
             key = f"foo/c/{i}"
             expected.append(key)
-            await store.set(f"foo/c/{i}", i.to_bytes(length=3, byteorder="little"))
+            await store.set(f"foo/c/{i}", as_buffer(i.to_bytes(length=3, byteorder="little")))
 
     async def test_list_prefix(self, store: Store) -> None:
         # TODO: we currently don't use list_prefix anywhere
@@ -71,11 +72,11 @@ async def test_list_prefix(self, store: Store) -> None:
     async def test_list_dir(self, store: Store) -> None:
         assert [k async for k in store.list_dir("")] == []
         assert [k async for k in store.list_dir("foo")] == []
-        await store.set("foo/zarr.json", b"bar")
-        await store.set("foo/c/1", b"\x01")
+        await store.set("foo/zarr.json", as_buffer(b"bar"))
+        await store.set("foo/c/1", as_buffer(b"\x01"))
 
         keys = [k async for k in store.list_dir("foo")]
-        assert keys == ["zarr.json", "c"], keys
+        assert set(keys) == set(["zarr.json", "c"]), keys
 
         keys = [k async for k in store.list_dir("foo/")]
-        assert keys == ["zarr.json", "c"], keys
+        assert set(keys) == set(["zarr.json", "c"]), keys

From c189a4f14bb49a27b9fc981c4b88367351b5c52c Mon Sep 17 00:00:00 2001
From: "Mads R. B. Kristensen" <madsbk@gmail.com>
Date: Mon, 13 May 2024 10:58:05 +0200
Subject: [PATCH 12/45] removed sync.py again

---
 src/zarr/v3/sync.py | 131 --------------------------------------------
 1 file changed, 131 deletions(-)
 delete mode 100644 src/zarr/v3/sync.py

diff --git a/src/zarr/v3/sync.py b/src/zarr/v3/sync.py
deleted file mode 100644
index a152030e89..0000000000
--- a/src/zarr/v3/sync.py
+++ /dev/null
@@ -1,131 +0,0 @@
-from __future__ import annotations
-from typing import TYPE_CHECKING, TypeVar
-
-if TYPE_CHECKING:
-    from typing import Any, AsyncIterator, Coroutine
-
-import asyncio
-from concurrent.futures import wait
-import threading
-
-from typing_extensions import ParamSpec
-
-from zarr.config import SyncConfiguration
-
-P = ParamSpec("P")
-T = TypeVar("T")
-
-# From https://github.com/fsspec/filesystem_spec/blob/master/fsspec/asyn.py
-
-iothread: list[threading.Thread | None] = [None]  # dedicated IO thread
-loop: list[asyncio.AbstractEventLoop | None] = [
-    None
-]  # global event loop for any non-async instance
-_lock: threading.Lock | None = None  # global lock placeholder
-get_running_loop = asyncio.get_running_loop
-
-
-class SyncError(Exception):
-    pass
-
-
-def _get_lock() -> threading.Lock:
-    """Allocate or return a threading lock.
-
-    The lock is allocated on first use to allow setting one lock per forked process.
-    """
-    global _lock
-    if not _lock:
-        _lock = threading.Lock()
-    return _lock
-
-
-async def _runner(coro: Coroutine[Any, Any, T]) -> T | BaseException:
-    """
-    Await a coroutine and return the result of running it. If awaiting the coroutine raises an
-    exception, the exception will be returned.
-    """
-    try:
-        return await coro
-    except Exception as ex:
-        return ex
-
-
-def sync(
-    coro: Coroutine[Any, Any, T],
-    loop: asyncio.AbstractEventLoop | None = None,
-    timeout: float | None = None,
-) -> T:
-    """
-    Make loop run coroutine until it returns. Runs in other thread
-
-    Examples
-    --------
-    >>> sync(async_function(), existing_loop)
-    """
-    if loop is None:
-        # NB: if the loop is not running *yet*, it is OK to submit work
-        # and we will wait for it
-        loop = _get_loop()
-    if not isinstance(loop, asyncio.AbstractEventLoop):
-        raise TypeError(f"loop cannot be of type {type(loop)}")
-    if loop.is_closed():
-        raise RuntimeError("Loop is not running")
-    try:
-        loop0 = asyncio.events.get_running_loop()
-        if loop0 is loop:
-            raise SyncError("Calling sync() from within a running loop")
-    except RuntimeError:
-        pass
-
-    future = asyncio.run_coroutine_threadsafe(_runner(coro), loop)
-
-    finished, unfinished = wait([future], return_when=asyncio.ALL_COMPLETED, timeout=timeout)
-    if len(unfinished) > 0:
-        raise asyncio.TimeoutError(f"Coroutine {coro} failed to finish in within {timeout}s")
-    assert len(finished) == 1
-    return_result = list(finished)[0].result()
-
-    if isinstance(return_result, BaseException):
-        raise return_result
-    else:
-        return return_result
-
-
-def _get_loop() -> asyncio.AbstractEventLoop:
-    """Create or return the default fsspec IO loop
-
-    The loop will be running on a separate thread.
-    """
-    if loop[0] is None:
-        with _get_lock():
-            # repeat the check just in case the loop got filled between the
-            # previous two calls from another thread
-            if loop[0] is None:
-                new_loop = asyncio.new_event_loop()
-                loop[0] = new_loop
-                th = threading.Thread(target=new_loop.run_forever, name="zarrIO")
-                th.daemon = True
-                th.start()
-                iothread[0] = th
-    assert loop[0] is not None
-    return loop[0]
-
-
-class SyncMixin:
-    _sync_configuration: SyncConfiguration
-
-    def _sync(self, coroutine: Coroutine[Any, Any, T]) -> T:
-        # TODO: refactor this to to take *args and **kwargs and pass those to the method
-        # this should allow us to better type the sync wrapper
-        return sync(
-            coroutine,
-            loop=self._sync_configuration.asyncio_loop,
-            timeout=self._sync_configuration.timeout,
-        )
-
-    def _sync_iter(self, async_iterator: AsyncIterator[T]) -> list[T]:
-        async def iter_to_list() -> list[T]:
-            return [item async for item in async_iterator]
-
-        return self._sync(iter_to_list())

From 7cb9346780c40786f07778ddbb12d7c83e41eecc Mon Sep 17 00:00:00 2001
From: "Mads R. B. Kristensen" <madsbk@gmail.com>
Date: Mon, 13 May 2024 12:27:36 +0200
Subject: [PATCH 13/45] separate Buffer and NNBuffer

---
 src/zarr/buffer.py | 75 +++++++++++++++++++++++-----------------------
 1 file changed, 37 insertions(+), 38 deletions(-)

diff --git a/src/zarr/buffer.py b/src/zarr/buffer.py
index c753c85b5c..1a2419ab69 100644
--- a/src/zarr/buffer.py
+++ b/src/zarr/buffer.py
@@ -10,6 +10,43 @@
     from zarr.codecs.bytes import Endian
 
 
+class Buffer:
+    """A flat contiguous version of `NDBuffer` with an item size of 1"""
+
+    def __init__(self, array: np.ndarray):
+        assert isinstance(array, np.ndarray)
+        assert array.dtype != object
+        self._data = array
+
+    def memoryview(self) -> memoryview:
+        return memoryview(self._data.reshape(-1).view(dtype="b"))
+
+    def as_numpy_array(self, dtype: Optional[np.DTypeLike] = "b") -> np.ndarray:
+        return self._data.reshape(-1).view(dtype=dtype)
+
+    def to_bytes(self) -> bytes:
+        return bytes(self.memoryview())
+
+    def __getitem__(self, key: Any) -> Self:
+        return self.__class__(self.as_numpy_array().__getitem__(key))
+
+    def __setitem__(self, key: Any, value: Any) -> None:
+        self.as_numpy_array().__setitem__(key, value)
+
+    def __len__(self) -> int:
+        return self._data.nbytes
+
+    def __add__(self, other: Buffer) -> Self:
+        return self.__class__(np.frombuffer(self.to_bytes() + other.to_bytes(), dtype="b"))
+
+    def __eq__(self, other: Any) -> bool:
+        if isinstance(other, (bytes, bytearray)):
+            return self.to_bytes() == other
+        raise ValueError(
+            f"equal operator not supported between {self.__class__} and {other.__class__}"
+        )
+
+
 class NDBuffer:
     """A n-dimensional memory block
 
@@ -86,44 +123,6 @@ def transpose(self, *axes: np.SupportsIndex) -> Self:
         return self.__class__(self.as_numpy_array().transpose(*axes))
 
 
-class Buffer(NDBuffer):
-    """A flat contiguous version of `NDBuffer` with an item size of 1"""
-
-    @classmethod
-    def create_empty(
-        cls, *, shape: Iterable[int], dtype: np.DTypeLike = "b", order: Literal["C", "F"] = "C"
-    ) -> Self:
-        return cls(np.empty(shape=shape, dtype=dtype, order=order))
-
-    def memoryview(self) -> memoryview:
-        return memoryview(self._data.reshape(-1).view(dtype="b"))
-
-    def as_numpy_array(self, dtype: Optional[np.DTypeLike] = "b") -> np.ndarray:
-        return self._data.reshape(-1).view(dtype=dtype)
-
-    def to_bytes(self) -> bytes:
-        return bytes(self.memoryview())
-
-    def __getitem__(self, key: Any) -> Self:
-        return self.__class__(self.as_numpy_array().__getitem__(key))
-
-    def __setitem__(self, key: Any, value: Any) -> None:
-        self.as_numpy_array().__setitem__(key, value)
-
-    def __len__(self) -> int:
-        return self._data.nbytes
-
-    def __add__(self, other: Buffer) -> Self:
-        return self.__class__(np.frombuffer(self.to_bytes() + other.to_bytes(), dtype="b"))
-
-    def __eq__(self, other: Any) -> bool:
-        if isinstance(other, (bytes, bytearray)):
-            return self.to_bytes() == other
-        raise ValueError(
-            f"equal operator not supported between {self.__class__} and {other.__class__}"
-        )
-
-
 def as_nd_buffer(data: Any) -> NDBuffer:
     if isinstance(data, NDBuffer):
         return data

From 2ba8510fbbfcaf26e977a893fd9d61befd8a27e2 Mon Sep 17 00:00:00 2001
From: "Mads R. B. Kristensen" <madsbk@gmail.com>
Date: Mon, 13 May 2024 13:19:12 +0200
Subject: [PATCH 14/45] impl. NDBuffer.from_numpy_array()

---
 src/zarr/array.py           |  4 ++--
 src/zarr/buffer.py          | 10 ++++------
 src/zarr/codecs/sharding.py |  4 ++--
 3 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/src/zarr/array.py b/src/zarr/array.py
index 6d606a14ee..8b6f3312d6 100644
--- a/src/zarr/array.py
+++ b/src/zarr/array.py
@@ -20,7 +20,7 @@
 
 
 # from zarr.array_v2 import ArrayV2
-from zarr.buffer import NDBuffer, as_buffer, as_nd_buffer
+from zarr.buffer import NDBuffer, as_buffer
 from zarr.codecs import BytesCodec
 from zarr.common import (
     ZARR_JSON,
@@ -269,7 +269,7 @@ async def setitem(self, selection: Selection, value: npt.NDArray[Any]) -> None:
 
         # We accept a numpy array as input from the user and convert it to a NDBuffer.
         # From this point onwards, we only pass Buffer and NDBuffer between components.
-        value = as_nd_buffer(value)
+        value = NDBuffer.from_numpy_array(value)
 
         # merging with existing data and encoding chunks
         await concurrent_map(
diff --git a/src/zarr/buffer.py b/src/zarr/buffer.py
index 1a2419ab69..727ad79b96 100644
--- a/src/zarr/buffer.py
+++ b/src/zarr/buffer.py
@@ -60,6 +60,10 @@ def __init__(self, array: np.ndarray):
         assert array.dtype != object
         self._data = array
 
+    @classmethod
+    def from_numpy_array(cls, array: np.ArrayLike) -> Self:
+        return cls(np.asanyarray(array))
+
     @classmethod
     def create_empty(
         cls, *, shape: Iterable[int], dtype: np.DTypeLike, order: Literal["C", "F"] = "C"
@@ -123,12 +127,6 @@ def transpose(self, *axes: np.SupportsIndex) -> Self:
         return self.__class__(self.as_numpy_array().transpose(*axes))
 
 
-def as_nd_buffer(data: Any) -> NDBuffer:
-    if isinstance(data, NDBuffer):
-        return data
-    return NDBuffer(np.asanyarray(data))
-
-
 def as_buffer(data: Any) -> Buffer:
     if isinstance(data, Buffer):
         return data
diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py
index 966033d63a..b16a35e66f 100644
--- a/src/zarr/codecs/sharding.py
+++ b/src/zarr/codecs/sharding.py
@@ -37,7 +37,7 @@
     ArrayMetadata,
     parse_codecs,
 )
-from zarr.buffer import Buffer, NDBuffer, as_buffer, as_nd_buffer
+from zarr.buffer import Buffer, NDBuffer, as_buffer
 
 if TYPE_CHECKING:
     from typing import Awaitable, Callable, Dict, Iterator, List, Optional, Set, Tuple
@@ -603,7 +603,7 @@ async def _decode_shard_index(
 
     async def _encode_shard_index(self, index: _ShardIndex) -> Buffer:
         index_bytes = await self.index_codecs.encode(
-            as_nd_buffer(index.offsets_and_lengths),
+            NDBuffer.from_numpy_array(index.offsets_and_lengths),
             self._get_index_chunk_spec(index.chunks_per_shard),
         )
         assert index_bytes is not None

From fccd95664c0c513bc53a865d02668a1573f267d4 Mon Sep 17 00:00:00 2001
From: "Mads R. B. Kristensen" <madsbk@gmail.com>
Date: Mon, 13 May 2024 14:12:23 +0200
Subject: [PATCH 15/45] remove as_buffer()

---
 src/zarr/array.py           |  8 ++++----
 src/zarr/array_v2.py        | 12 +++++++-----
 src/zarr/buffer.py          | 34 +++++++++++++++++++---------------
 src/zarr/codecs/blosc.py    | 10 +++++++---
 src/zarr/codecs/bytes.py    |  4 ++--
 src/zarr/codecs/crc32c_.py  |  6 +++---
 src/zarr/codecs/sharding.py |  6 +++---
 src/zarr/group.py           | 10 +++++-----
 src/zarr/metadata.py        |  4 ++--
 src/zarr/store/local.py     | 10 +++++-----
 src/zarr/store/memory.py    |  4 ++--
 src/zarr/testing/store.py   | 20 +++++++++++---------
 tests/v3/test_group.py      |  4 ++--
 13 files changed, 72 insertions(+), 60 deletions(-)

diff --git a/src/zarr/array.py b/src/zarr/array.py
index 8b6f3312d6..8f5be1b223 100644
--- a/src/zarr/array.py
+++ b/src/zarr/array.py
@@ -20,7 +20,7 @@
 
 
 # from zarr.array_v2 import ArrayV2
-from zarr.buffer import NDBuffer, as_buffer
+from zarr.buffer import Buffer, NDBuffer
 from zarr.codecs import BytesCodec
 from zarr.common import (
     ZARR_JSON,
@@ -216,7 +216,7 @@ async def getitem(self, selection: Selection) -> npt.NDArray[Any]:
             return out.as_numpy_array()[()]
 
     async def _save_metadata(self) -> None:
-        await (self.store_path / ZARR_JSON).set(as_buffer(self.metadata.to_bytes()))
+        await (self.store_path / ZARR_JSON).set(Buffer.from_bytes(self.metadata.to_bytes()))
 
     async def _read_chunk(
         self,
@@ -382,14 +382,14 @@ async def _delete_key(key: str) -> None:
         )
 
         # Write new metadata
-        await (self.store_path / ZARR_JSON).set(as_buffer(new_metadata.to_bytes()))
+        await (self.store_path / ZARR_JSON).set(Buffer.from_bytes(new_metadata.to_bytes()))
         return replace(self, metadata=new_metadata)
 
     async def update_attributes(self, new_attributes: Dict[str, Any]) -> AsyncArray:
         new_metadata = replace(self.metadata, attributes=new_attributes)
 
         # Write new metadata
-        await (self.store_path / ZARR_JSON).set(as_buffer(new_metadata.to_bytes()))
+        await (self.store_path / ZARR_JSON).set(Buffer.from_bytes(new_metadata.to_bytes()))
         return replace(self, metadata=new_metadata)
 
     def __repr__(self):
diff --git a/src/zarr/array_v2.py b/src/zarr/array_v2.py
index 4c6576d3ed..2254b0e7c3 100644
--- a/src/zarr/array_v2.py
+++ b/src/zarr/array_v2.py
@@ -10,7 +10,7 @@
 
 from numcodecs.compat import ensure_bytes, ensure_ndarray
 
-from zarr.buffer import NDBuffer, as_buffer, as_bytearray
+from zarr.buffer import Buffer, NDBuffer, as_bytearray
 from zarr.common import (
     ZARRAY_JSON,
     ZATTRS_JSON,
@@ -180,7 +180,7 @@ async def _save_metadata(self) -> None:
         await (self.store_path / ZARRAY_JSON).set(self.metadata.to_bytes())
         if self.attributes is not None and len(self.attributes) > 0:
             await (self.store_path / ZATTRS_JSON).set(
-                as_buffer(json.dumps(self.attributes).encode()),
+                Buffer.from_bytes(json.dumps(self.attributes).encode()),
             )
         else:
             await (self.store_path / ZATTRS_JSON).delete()
@@ -375,7 +375,7 @@ async def _write_chunk_to_store(self, store_path: StorePath, chunk_array: np.nda
             if chunk_bytes is None:
                 await store_path.delete()
             else:
-                await store_path.set(as_buffer(chunk_bytes))
+                await store_path.set(Buffer.from_bytes(chunk_bytes))
 
     async def _encode_chunk(self, chunk_array: np.ndarray) -> Optional[BytesLike]:
         chunk_array = chunk_array.ravel(order=self.metadata.order)
@@ -494,7 +494,7 @@ async def convert_to_v3_async(self) -> Array:
         )
 
         new_metadata_bytes = new_metadata.to_bytes()
-        await (self.store_path / ZARR_JSON).set(as_buffer(new_metadata_bytes))
+        await (self.store_path / ZARR_JSON).set(Buffer.from_bytes(new_metadata_bytes))
 
         return Array.from_dict(
             store_path=self.store_path,
@@ -502,7 +502,9 @@ async def convert_to_v3_async(self) -> Array:
         )
 
     async def update_attributes_async(self, new_attributes: Dict[str, Any]) -> ArrayV2:
-        await (self.store_path / ZATTRS_JSON).set(as_buffer(json.dumps(new_attributes).encode()))
+        await (self.store_path / ZATTRS_JSON).set(
+            Buffer.from_bytes(json.dumps(new_attributes).encode())
+        )
         return replace(self, attributes=new_attributes)
 
     def update_attributes(self, new_attributes: Dict[str, Any]) -> ArrayV2:
diff --git a/src/zarr/buffer.py b/src/zarr/buffer.py
index 727ad79b96..b8d7ba93b5 100644
--- a/src/zarr/buffer.py
+++ b/src/zarr/buffer.py
@@ -4,6 +4,8 @@
 from typing import TYPE_CHECKING, Any, Callable, Iterable, Literal, Optional, Tuple
 import numpy as np
 
+from zarr.common import BytesLike
+
 
 if TYPE_CHECKING:
     from typing_extensions import Self
@@ -15,18 +17,30 @@ class Buffer:
 
     def __init__(self, array: np.ndarray):
         assert isinstance(array, np.ndarray)
-        assert array.dtype != object
+
         self._data = array
 
+    @classmethod
+    def create_empty(cls, *, nbytes: int) -> Self:
+        return cls(np.empty(shape=(nbytes,), dtype="b"))
+
+    @classmethod
+    def from_bytes(cls, data: BytesLike) -> Self:
+        return cls(np.frombuffer(data, dtype="b"))
+
+    @classmethod
+    def from_nd_buffer(cls, nd_buffer: NDBuffer) -> Self:
+        return cls(np.frombuffer(nd_buffer.as_numpy_array().reshape(-1), dtype="b"))
+
+    def to_bytes(self) -> bytes:
+        return bytes(self.memoryview())
+
     def memoryview(self) -> memoryview:
         return memoryview(self._data.reshape(-1).view(dtype="b"))
 
     def as_numpy_array(self, dtype: Optional[np.DTypeLike] = "b") -> np.ndarray:
         return self._data.reshape(-1).view(dtype=dtype)
 
-    def to_bytes(self) -> bytes:
-        return bytes(self.memoryview())
-
     def __getitem__(self, key: Any) -> Self:
         return self.__class__(self.as_numpy_array().__getitem__(key))
 
@@ -127,18 +141,8 @@ def transpose(self, *axes: np.SupportsIndex) -> Self:
         return self.__class__(self.as_numpy_array().transpose(*axes))
 
 
-def as_buffer(data: Any) -> Buffer:
-    if isinstance(data, Buffer):
-        return data
-    if isinstance(data, NDBuffer):
-        return Buffer(data.as_numpy_array())
-    if isinstance(data, (bytes, bytearray, memoryview)):
-        return Buffer(np.frombuffer(data, dtype="b"))
-    return Buffer(np.asanyarray(data))
-
-
 def as_bytes_wrapper(func: Callable[[bytes], bytes], buf: Buffer) -> Buffer:
-    return as_buffer(func(buf.to_bytes()))
+    return Buffer.from_bytes(func(buf.to_bytes()))
 
 
 def as_bytearray(data: Optional[Buffer]) -> Optional[bytes]:
diff --git a/src/zarr/codecs/blosc.py b/src/zarr/codecs/blosc.py
index 890c6e172e..ff52dba061 100644
--- a/src/zarr/codecs/blosc.py
+++ b/src/zarr/codecs/blosc.py
@@ -9,7 +9,7 @@
 from numcodecs.blosc import Blosc
 
 from zarr.abc.codec import BytesBytesCodec
-from zarr.buffer import Buffer, as_buffer, as_bytes_wrapper
+from zarr.buffer import Buffer, as_bytes_wrapper
 from zarr.codecs.registry import register_codec
 from zarr.common import parse_enum, parse_named_configuration, to_thread
 
@@ -170,8 +170,12 @@ async def encode(
         chunk_bytes: Buffer,
         chunk_spec: ArraySpec,
     ) -> Optional[Buffer]:
-        chunk_array = chunk_bytes.as_numpy_array(chunk_spec.dtype)
-        return await to_thread(lambda: as_buffer(self._blosc_codec.encode(chunk_array)))
+        # Since blosc only takes bytes, we convert the input and output of the encoding
+        # between bytes and Buffer
+        return await to_thread(
+            lambda chunk: Buffer.from_bytes(self._blosc_codec.encode(chunk.memoryview())),
+            chunk_bytes,
+        )
 
     def compute_encoded_size(self, _input_byte_length: int, _chunk_spec: ArraySpec) -> int:
         raise NotImplementedError
diff --git a/src/zarr/codecs/bytes.py b/src/zarr/codecs/bytes.py
index e6c44fee21..3a18442bab 100644
--- a/src/zarr/codecs/bytes.py
+++ b/src/zarr/codecs/bytes.py
@@ -8,7 +8,7 @@
 import numpy as np
 
 from zarr.abc.codec import ArrayBytesCodec
-from zarr.buffer import Buffer, NDBuffer, as_buffer
+from zarr.buffer import Buffer, NDBuffer
 from zarr.codecs.registry import register_codec
 from zarr.common import parse_enum, parse_named_configuration
 
@@ -93,7 +93,7 @@ async def encode(
             if self.endian is not None and self.endian != chunk_array.byteorder:
                 new_dtype = chunk_array.dtype.newbyteorder(self.endian.name)
                 chunk_array = chunk_array.astype(new_dtype)
-        return as_buffer(chunk_array)
+        return Buffer.from_nd_buffer(chunk_array)
 
     def compute_encoded_size(self, input_byte_length: int, _chunk_spec: ArraySpec) -> int:
         return input_byte_length
diff --git a/src/zarr/codecs/crc32c_.py b/src/zarr/codecs/crc32c_.py
index 7a37a3353a..ac389c1406 100644
--- a/src/zarr/codecs/crc32c_.py
+++ b/src/zarr/codecs/crc32c_.py
@@ -8,7 +8,7 @@
 from crc32c import crc32c
 
 from zarr.abc.codec import BytesBytesCodec
-from zarr.buffer import Buffer, as_buffer
+from zarr.buffer import Buffer
 from zarr.codecs.registry import register_codec
 from zarr.common import parse_named_configuration
 
@@ -46,7 +46,7 @@ async def decode(
                 "Stored and computed checksum do not match. "
                 + f"Stored: {stored_checksum!r}. Computed: {computed_checksum!r}."
             )
-        return as_buffer(inner_bytes)
+        return Buffer.from_bytes(inner_bytes)
 
     async def encode(
         self,
@@ -54,7 +54,7 @@ async def encode(
         _chunk_spec: ArraySpec,
     ) -> Optional[Buffer]:
         checksum = crc32c(chunk_bytes.memoryview())
-        return as_buffer(chunk_bytes.to_bytes() + np.uint32(checksum).tobytes())
+        return Buffer.from_bytes(chunk_bytes.to_bytes() + np.uint32(checksum).tobytes())
 
     def compute_encoded_size(self, input_byte_length: int, _chunk_spec: ArraySpec) -> int:
         return input_byte_length + 4
diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py
index b16a35e66f..8aaab78703 100644
--- a/src/zarr/codecs/sharding.py
+++ b/src/zarr/codecs/sharding.py
@@ -37,7 +37,7 @@
     ArrayMetadata,
     parse_codecs,
 )
-from zarr.buffer import Buffer, NDBuffer, as_buffer
+from zarr.buffer import Buffer, NDBuffer
 
 if TYPE_CHECKING:
     from typing import Awaitable, Callable, Dict, Iterator, List, Optional, Set, Tuple
@@ -148,7 +148,7 @@ async def from_bytes(
     def create_empty(cls, chunks_per_shard: ChunkCoords) -> _ShardProxy:
         index = _ShardIndex.create_empty(chunks_per_shard)
         obj = cls()
-        obj.buf = as_buffer(np.array([], dtype="b"))
+        obj.buf = Buffer.create_empty(nbytes=0)
         obj.index = index
         return obj
 
@@ -190,7 +190,7 @@ def merge_with_morton_order(
     @classmethod
     def create_empty(cls, chunks_per_shard: ChunkCoords) -> _ShardBuilder:
         obj = cls()
-        obj.buf = as_buffer(np.array([], dtype="b"))
+        obj.buf = Buffer.create_empty(nbytes=0)
         obj.index = _ShardIndex.create_empty(chunks_per_shard)
         return obj
 
diff --git a/src/zarr/group.py b/src/zarr/group.py
index 2b1636a3f6..449c6d1333 100644
--- a/src/zarr/group.py
+++ b/src/zarr/group.py
@@ -6,7 +6,7 @@
 import json
 import logging
 
-from zarr.buffer import as_buffer
+from zarr.buffer import Buffer
 
 if TYPE_CHECKING:
     from typing import (
@@ -240,7 +240,7 @@ async def delitem(self, key: str) -> None:
     async def _save_metadata(self) -> None:
         to_save = self.metadata.to_bytes()
         awaitables = [
-            (self.store_path / key).set(as_buffer(value)) for key, value in to_save.items()
+            (self.store_path / key).set(Buffer.from_bytes(value)) for key, value in to_save.items()
         ]
         await asyncio.gather(*awaitables)
 
@@ -273,9 +273,9 @@ async def update_attributes(self, new_attributes: dict[str, Any]):
         to_save = self.metadata.to_bytes()
         if self.metadata.zarr_format == 2:
             # only save the .zattrs object
-            await (self.store_path / ZATTRS_JSON).set(as_buffer(to_save[ZATTRS_JSON]))
+            await (self.store_path / ZATTRS_JSON).set(Buffer.from_bytes(to_save[ZATTRS_JSON]))
         else:
-            await (self.store_path / ZARR_JSON).set(as_buffer(to_save[ZARR_JSON]))
+            await (self.store_path / ZARR_JSON).set(Buffer.from_bytes(to_save[ZARR_JSON]))
 
         self.metadata.attributes.clear()
         self.metadata.attributes.update(new_attributes)
@@ -444,7 +444,7 @@ async def update_attributes_async(self, new_attributes: dict[str, Any]) -> Group
         # Write new metadata
         to_save = new_metadata.to_bytes()
         awaitables = [
-            (self.store_path / key).set(as_buffer(value)) for key, value in to_save.items()
+            (self.store_path / key).set(Buffer.from_bytes(value)) for key, value in to_save.items()
         ]
         await asyncio.gather(*awaitables)
 
diff --git a/src/zarr/metadata.py b/src/zarr/metadata.py
index 7a49e330dc..098ab34b86 100644
--- a/src/zarr/metadata.py
+++ b/src/zarr/metadata.py
@@ -6,7 +6,7 @@
 import numpy as np
 import numpy.typing as npt
 
-from zarr.buffer import Buffer, as_buffer
+from zarr.buffer import Buffer
 from zarr.chunk_grids import ChunkGrid, RegularChunkGrid
 from zarr.chunk_key_encodings import ChunkKeyEncoding, parse_separator
 
@@ -299,7 +299,7 @@ def _json_convert(o):
                     return o.descr
             raise TypeError
 
-        return as_buffer(json.dumps(self.to_dict(), default=_json_convert).encode())
+        return Buffer.from_bytes(json.dumps(self.to_dict(), default=_json_convert).encode())
 
     @classmethod
     def from_dict(cls, data: Dict[str, Any]) -> ArrayV2Metadata:
diff --git a/src/zarr/store/local.py b/src/zarr/store/local.py
index bfd8dd0c3b..37a9a5b8f5 100644
--- a/src/zarr/store/local.py
+++ b/src/zarr/store/local.py
@@ -7,7 +7,7 @@
 from typing import Union, Optional, List, Tuple
 
 from zarr.abc.store import Store
-from zarr.buffer import Buffer, as_buffer
+from zarr.buffer import Buffer
 from zarr.common import concurrent_map, to_thread
 
 
@@ -32,7 +32,7 @@ def _get(path: Path, byte_range: Optional[Tuple[int, Optional[int]]] = None) ->
 
         end = (start + byte_range[1]) if byte_range[1] is not None else None
     else:
-        return as_buffer(path.read_bytes())
+        return Buffer.from_bytes(path.read_bytes())
     with path.open("rb") as f:
         size = f.seek(0, io.SEEK_END)
         if start is not None:
@@ -43,8 +43,8 @@ def _get(path: Path, byte_range: Optional[Tuple[int, Optional[int]]] = None) ->
         if end is not None:
             if end < 0:
                 end = size + end
-            return as_buffer(f.read(end - f.tell()))
-        return as_buffer(f.read())
+            return Buffer.from_bytes(f.read(end - f.tell()))
+        return Buffer.from_bytes(f.read())
 
 
 def _put(
@@ -124,7 +124,7 @@ async def set(self, key: str, value: Buffer) -> None:
         assert isinstance(key, str)
         if isinstance(value, (bytes, bytearray)):
             # TODO: to support the v2 tests, we convert bytes to Buffer here
-            value = as_buffer(value)
+            value = Buffer.from_bytes(value)
         if not isinstance(value, Buffer):
             raise TypeError("LocalStore.set(): `value` must a Buffer instance")
         path = self.root / key
diff --git a/src/zarr/store/memory.py b/src/zarr/store/memory.py
index 2b86578b32..1caba5acc1 100644
--- a/src/zarr/store/memory.py
+++ b/src/zarr/store/memory.py
@@ -5,7 +5,7 @@
 
 from zarr.common import concurrent_map
 from zarr.abc.store import Store
-from zarr.buffer import Buffer, as_buffer
+from zarr.buffer import Buffer
 
 
 # TODO: this store could easily be extended to wrap any MutableMapping store from v2
@@ -53,7 +53,7 @@ async def set(
         assert isinstance(key, str)
         if isinstance(value, (bytes, bytearray)):
             # TODO: to support the v2 tests, we convert bytes to Buffer here
-            value = as_buffer(value)
+            value = Buffer.from_bytes(value)
         if not isinstance(value, Buffer):
             raise TypeError(f"Expected Buffer. Got {type(value)}.")
 
diff --git a/src/zarr/testing/store.py b/src/zarr/testing/store.py
index 48f2ce46b0..99f8021594 100644
--- a/src/zarr/testing/store.py
+++ b/src/zarr/testing/store.py
@@ -1,7 +1,7 @@
 import pytest
 
 from zarr.abc.store import Store
-from zarr.buffer import as_buffer
+from zarr.buffer import Buffer
 
 
 class StoreTests:
@@ -26,14 +26,14 @@ def test_store_capabilities(self, store: Store) -> None:
     @pytest.mark.parametrize("key", ["c/0", "foo/c/0.0", "foo/0/0"])
     @pytest.mark.parametrize("data", [b"\x01\x02\x03\x04", b""])
     async def test_set_get_bytes_roundtrip(self, store: Store, key: str, data: bytes) -> None:
-        await store.set(key, as_buffer(data))
+        await store.set(key, Buffer.from_bytes(data))
         assert await store.get(key) == data
 
     @pytest.mark.parametrize("key", ["foo/c/0"])
     @pytest.mark.parametrize("data", [b"\x01\x02\x03\x04", b""])
     async def test_get_partial_values(self, store: Store, key: str, data: bytes) -> None:
         # put all of the data
-        await store.set(key, as_buffer(data))
+        await store.set(key, Buffer.from_bytes(data))
         # read back just part of it
         vals = await store.get_partial_values([(key, (0, 2))])
         assert vals == [data[0:2]]
@@ -44,18 +44,18 @@ async def test_get_partial_values(self, store: Store, key: str, data: bytes) ->
 
     async def test_exists(self, store: Store) -> None:
         assert not await store.exists("foo")
-        await store.set("foo/zarr.json", as_buffer(b"bar"))
+        await store.set("foo/zarr.json", Buffer.from_bytes(b"bar"))
         assert await store.exists("foo/zarr.json")
 
     async def test_delete(self, store: Store) -> None:
-        await store.set("foo/zarr.json", as_buffer(b"bar"))
+        await store.set("foo/zarr.json", Buffer.from_bytes(b"bar"))
         assert await store.exists("foo/zarr.json")
         await store.delete("foo/zarr.json")
         assert not await store.exists("foo/zarr.json")
 
     async def test_list(self, store: Store) -> None:
         assert [k async for k in store.list()] == []
-        await store.set("foo/zarr.json", as_buffer(b"bar"))
+        await store.set("foo/zarr.json", Buffer.from_bytes(b"bar"))
         keys = [k async for k in store.list()]
         assert keys == ["foo/zarr.json"], keys
 
@@ -63,7 +63,9 @@ async def test_list(self, store: Store) -> None:
         for i in range(10):
             key = f"foo/c/{i}"
             expected.append(key)
-            await store.set(f"foo/c/{i}", as_buffer(i.to_bytes(length=3, byteorder="little")))
+            await store.set(
+                f"foo/c/{i}", Buffer.from_bytes(i.to_bytes(length=3, byteorder="little"))
+            )
 
     async def test_list_prefix(self, store: Store) -> None:
         # TODO: we currently don't use list_prefix anywhere
@@ -72,8 +74,8 @@ async def test_list_prefix(self, store: Store) -> None:
     async def test_list_dir(self, store: Store) -> None:
         assert [k async for k in store.list_dir("")] == []
         assert [k async for k in store.list_dir("foo")] == []
-        await store.set("foo/zarr.json", as_buffer(b"bar"))
-        await store.set("foo/c/1", as_buffer(b"\x01"))
+        await store.set("foo/zarr.json", Buffer.from_bytes(b"bar"))
+        await store.set("foo/c/1", Buffer.from_bytes(b"\x01"))
 
         keys = [k async for k in store.list_dir("foo")]
         assert set(keys) == set(["zarr.json", "c"]), keys
diff --git a/tests/v3/test_group.py b/tests/v3/test_group.py
index fd1f4e5b27..c94ec87e2f 100644
--- a/tests/v3/test_group.py
+++ b/tests/v3/test_group.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 from typing import TYPE_CHECKING
 
-from zarr.buffer import as_buffer
+from zarr.buffer import Buffer
 from zarr.sync import sync
 
 if TYPE_CHECKING:
@@ -41,7 +41,7 @@ def test_group_members(store_type, request):
 
     # add an extra object to the domain of the group.
     # the list of children should ignore this object.
-    sync(store.set(f"{path}/extra_object-1", as_buffer(b"000000")))
+    sync(store.set(f"{path}/extra_object-1", Buffer.from_bytes(b"000000")))
     # add an extra object under a directory-like prefix in the domain of the group.
     # this creates a directory with a random key in it
     # this should not show up as a member

From 962d729f1dfa361ff04e64eacc7a695525897b9f Mon Sep 17 00:00:00 2001
From: "Mads R. B. Kristensen" <madsbk@gmail.com>
Date: Mon, 13 May 2024 14:34:12 +0200
Subject: [PATCH 16/45] remove Buffer.as_numpy_array()

---
 src/zarr/buffer.py       | 24 +++++++++++++-----------
 src/zarr/codecs/bytes.py |  2 +-
 2 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/src/zarr/buffer.py b/src/zarr/buffer.py
index b8d7ba93b5..17413b16f0 100644
--- a/src/zarr/buffer.py
+++ b/src/zarr/buffer.py
@@ -17,7 +17,9 @@ class Buffer:
 
     def __init__(self, array: np.ndarray):
         assert isinstance(array, np.ndarray)
-
+        assert array.ndim == 1
+        assert array.itemsize == 1
+        assert array.dtype == np.dtype("b")
         self._data = array
 
     @classmethod
@@ -32,20 +34,20 @@ def from_bytes(cls, data: BytesLike) -> Self:
     def from_nd_buffer(cls, nd_buffer: NDBuffer) -> Self:
         return cls(np.frombuffer(nd_buffer.as_numpy_array().reshape(-1), dtype="b"))
 
+    def as_nd_buffer(self, *, dtype: np.DTypeLike) -> NDBuffer:
+        return NDBuffer(self._data.view(dtype=dtype))
+
     def to_bytes(self) -> bytes:
         return bytes(self.memoryview())
 
     def memoryview(self) -> memoryview:
-        return memoryview(self._data.reshape(-1).view(dtype="b"))
-
-    def as_numpy_array(self, dtype: Optional[np.DTypeLike] = "b") -> np.ndarray:
-        return self._data.reshape(-1).view(dtype=dtype)
+        return memoryview(self._data)
 
     def __getitem__(self, key: Any) -> Self:
-        return self.__class__(self.as_numpy_array().__getitem__(key))
+        return self.__class__(self._data.__getitem__(key))
 
     def __setitem__(self, key: Any, value: Any) -> None:
-        self.as_numpy_array().__setitem__(key, value)
+        self._data.__setitem__(key, value)
 
     def __len__(self) -> int:
         return self._data.nbytes
@@ -74,10 +76,6 @@ def __init__(self, array: np.ndarray):
         assert array.dtype != object
         self._data = array
 
-    @classmethod
-    def from_numpy_array(cls, array: np.ArrayLike) -> Self:
-        return cls(np.asanyarray(array))
-
     @classmethod
     def create_empty(
         cls, *, shape: Iterable[int], dtype: np.DTypeLike, order: Literal["C", "F"] = "C"
@@ -90,6 +88,10 @@ def create_zeros(
     ) -> Self:
         return cls(np.zeros(shape=shape, dtype=dtype, order=order))
 
+    @classmethod
+    def from_numpy_array(cls, array: np.ArrayLike) -> Self:
+        return cls(np.asanyarray(array))
+
     def as_numpy_array(self, dtype: Optional[np.DTypeLike] = None) -> np.ndarray:
         if dtype is None:
             return self._data
diff --git a/src/zarr/codecs/bytes.py b/src/zarr/codecs/bytes.py
index 3a18442bab..4d3ee5469a 100644
--- a/src/zarr/codecs/bytes.py
+++ b/src/zarr/codecs/bytes.py
@@ -74,7 +74,7 @@ async def decode(
             dtype = np.dtype(f"{prefix}{chunk_spec.dtype.str[1:]}")
         else:
             dtype = np.dtype(f"|{chunk_spec.dtype.str[1:]}")
-        chunk_array = NDBuffer(chunk_bytes.as_numpy_array(dtype))
+        chunk_array = chunk_bytes.as_nd_buffer(dtype=dtype)
 
         # ensure correct chunk shape
         if chunk_array.shape != chunk_spec.shape:

From 12de6c2a99ca16f4b6b67000f8986f214f60d4fa Mon Sep 17 00:00:00 2001
From: "Mads R. B. Kristensen" <madsbk@gmail.com>
Date: Mon, 13 May 2024 14:42:25 +0200
Subject: [PATCH 17/45] impl. NDBuffer.as_buffer()

---
 src/zarr/buffer.py       | 7 +++----
 src/zarr/codecs/bytes.py | 2 +-
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/zarr/buffer.py b/src/zarr/buffer.py
index 17413b16f0..ccd0be3525 100644
--- a/src/zarr/buffer.py
+++ b/src/zarr/buffer.py
@@ -30,10 +30,6 @@ def create_empty(cls, *, nbytes: int) -> Self:
     def from_bytes(cls, data: BytesLike) -> Self:
         return cls(np.frombuffer(data, dtype="b"))
 
-    @classmethod
-    def from_nd_buffer(cls, nd_buffer: NDBuffer) -> Self:
-        return cls(np.frombuffer(nd_buffer.as_numpy_array().reshape(-1), dtype="b"))
-
     def as_nd_buffer(self, *, dtype: np.DTypeLike) -> NDBuffer:
         return NDBuffer(self._data.view(dtype=dtype))
 
@@ -92,6 +88,9 @@ def create_zeros(
     def from_numpy_array(cls, array: np.ArrayLike) -> Self:
         return cls(np.asanyarray(array))
 
+    def as_buffer(self) -> Buffer:
+        return Buffer(np.frombuffer(self.as_numpy_array().reshape(-1), dtype="b"))
+
     def as_numpy_array(self, dtype: Optional[np.DTypeLike] = None) -> np.ndarray:
         if dtype is None:
             return self._data
diff --git a/src/zarr/codecs/bytes.py b/src/zarr/codecs/bytes.py
index 4d3ee5469a..d6a626e160 100644
--- a/src/zarr/codecs/bytes.py
+++ b/src/zarr/codecs/bytes.py
@@ -93,7 +93,7 @@ async def encode(
             if self.endian is not None and self.endian != chunk_array.byteorder:
                 new_dtype = chunk_array.dtype.newbyteorder(self.endian.name)
                 chunk_array = chunk_array.astype(new_dtype)
-        return Buffer.from_nd_buffer(chunk_array)
+        return chunk_array.as_buffer()
 
     def compute_encoded_size(self, input_byte_length: int, _chunk_spec: ArraySpec) -> int:
         return input_byte_length

From 36a0d98be9588ea5b1de636876ecfed7fdc0e046 Mon Sep 17 00:00:00 2001
From: "Mads R. B. Kristensen" <madsbk@gmail.com>
Date: Mon, 13 May 2024 16:12:14 +0200
Subject: [PATCH 18/45] reduce the use of as_numpy_array()

---
 src/zarr/buffer.py | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/zarr/buffer.py b/src/zarr/buffer.py
index ccd0be3525..a592ae69e6 100644
--- a/src/zarr/buffer.py
+++ b/src/zarr/buffer.py
@@ -89,7 +89,7 @@ def from_numpy_array(cls, array: np.ArrayLike) -> Self:
         return cls(np.asanyarray(array))
 
     def as_buffer(self) -> Buffer:
-        return Buffer(np.frombuffer(self.as_numpy_array().reshape(-1), dtype="b"))
+        return Buffer(self._data.reshape(-1).view(dtype="b"))
 
     def as_numpy_array(self, dtype: Optional[np.DTypeLike] = None) -> np.ndarray:
         if dtype is None:
@@ -98,11 +98,11 @@ def as_numpy_array(self, dtype: Optional[np.DTypeLike] = None) -> np.ndarray:
 
     @property
     def dtype(self) -> np.dtype[Any]:
-        return self.as_numpy_array().dtype
+        return self._data.dtype
 
     @property
     def shape(self) -> Tuple[int, ...]:
-        return self.as_numpy_array().shape
+        return self._data.shape
 
     @property
     def byteorder(self) -> Endian:
@@ -116,30 +116,30 @@ def byteorder(self) -> Endian:
             return Endian(sys.byteorder)
 
     def reshape(self, newshape: Iterable[int]) -> Self:
-        return self.__class__(self.as_numpy_array().reshape(newshape))
+        return self.__class__(self._data.reshape(newshape))
 
     def astype(self, dtype: np.DTypeLike, order: Literal["K", "A", "C", "F"] = "K") -> Self:
-        return self.__class__(self.as_numpy_array().astype(dtype=dtype, order=order))
+        return self.__class__(self._data.astype(dtype=dtype, order=order))
 
     def __getitem__(self, key: Any) -> Self:
-        return self.__class__(np.asanyarray(self.as_numpy_array().__getitem__(key)))
+        return self.__class__(np.asanyarray(self._data.__getitem__(key)))
 
     def __setitem__(self, key: Any, value: Any) -> None:
         if isinstance(value, NDBuffer):
-            value = value.as_numpy_array()
-        self.as_numpy_array().__setitem__(key, value)
+            value = value._data
+        self._data.__setitem__(key, value)
 
     def __len__(self) -> int:
-        return self.as_numpy_array().__len__()
+        return self._data.__len__()
 
     def fill(self, value: Any) -> None:
-        self.as_numpy_array().fill(value)
+        self._data.fill(value)
 
     def copy(self) -> Self:
-        return self.__class__(self.as_numpy_array().copy())
+        return self.__class__(self._data.copy())
 
     def transpose(self, *axes: np.SupportsIndex) -> Self:
-        return self.__class__(self.as_numpy_array().transpose(*axes))
+        return self.__class__(self._data.transpose(*axes))
 
 
 def as_bytes_wrapper(func: Callable[[bytes], bytes], buf: Buffer) -> Buffer:

From 43ebafeda7b22439ed3b26434954ecf7ad27ee8a Mon Sep 17 00:00:00 2001
From: "Mads R. B. Kristensen" <madsbk@gmail.com>
Date: Mon, 13 May 2024 16:25:41 +0200
Subject: [PATCH 19/45] impl. and use NDBuffer.all_equal

---
 src/zarr/array.py           | 2 +-
 src/zarr/buffer.py          | 3 +++
 src/zarr/codecs/sharding.py | 4 ++--
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/zarr/array.py b/src/zarr/array.py
index 8f5be1b223..fab81d0dac 100644
--- a/src/zarr/array.py
+++ b/src/zarr/array.py
@@ -343,7 +343,7 @@ async def _write_chunk(
     async def _write_chunk_to_store(
         self, store_path: StorePath, chunk_array: NDBuffer, chunk_spec: ArraySpec
     ) -> None:
-        if np.all(chunk_array.as_numpy_array() == self.metadata.fill_value):
+        if chunk_array.all_equal(self.metadata.fill_value):
             # chunks that only contain fill_value will be removed
             await store_path.delete()
         else:
diff --git a/src/zarr/buffer.py b/src/zarr/buffer.py
index a592ae69e6..d758e979af 100644
--- a/src/zarr/buffer.py
+++ b/src/zarr/buffer.py
@@ -132,6 +132,9 @@ def __setitem__(self, key: Any, value: Any) -> None:
     def __len__(self) -> int:
         return self._data.__len__()
 
+    def all_equal(self, other: Any) -> bool:
+        return bool((self._data == other).all())
+
     def fill(self, value: Any) -> None:
         self._data.fill(value)
 
diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py
index 8aaab78703..6c74f944bb 100644
--- a/src/zarr/codecs/sharding.py
+++ b/src/zarr/codecs/sharding.py
@@ -459,7 +459,7 @@ async def _write_chunk(
                 )
                 chunk_array.fill(shard_spec.fill_value)
                 chunk_array[chunk_selection] = shard_array[out_selection]
-            if not np.array_equiv(chunk_array.as_numpy_array(), shard_spec.fill_value):
+            if not chunk_array.all_equal(shard_spec.fill_value):
                 chunk_spec = self._get_chunk_spec(shard_spec)
                 return (
                     chunk_coords,
@@ -538,7 +538,7 @@ async def _write_chunk(
                     ).copy()  # make a writable copy
                 chunk_array[chunk_selection] = shard_array[out_selection]
 
-            if not np.array_equiv(chunk_array.as_numpy_array(), shard_spec.fill_value):
+            if not chunk_array.all_equal(shard_spec.fill_value):
                 return (
                     chunk_coords,
                     await self.codecs.encode(chunk_array, chunk_spec),

From d01557e45f15c2b7ffff1ccd356aed2abcf5bf29 Mon Sep 17 00:00:00 2001
From: "Mads R. B. Kristensen" <madsbk@gmail.com>
Date: Mon, 13 May 2024 16:29:42 +0200
Subject: [PATCH 20/45] as_numpy_array(): doc

---
 src/zarr/buffer.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/src/zarr/buffer.py b/src/zarr/buffer.py
index d758e979af..310fe17763 100644
--- a/src/zarr/buffer.py
+++ b/src/zarr/buffer.py
@@ -91,10 +91,18 @@ def from_numpy_array(cls, array: np.ArrayLike) -> Self:
     def as_buffer(self) -> Buffer:
         return Buffer(self._data.reshape(-1).view(dtype="b"))
 
-    def as_numpy_array(self, dtype: Optional[np.DTypeLike] = None) -> np.ndarray:
-        if dtype is None:
-            return self._data
-        return self._data.astype(dtype=dtype, copy=False)
+    def as_numpy_array(self) -> np.ndarray:
+        """Return the buffer as a NumPy array.
+
+        Warning
+        -------
+        Might have to copy data, only use this method for small buffers such as metadata
+
+        Return
+        ------
+            NumPy array of this buffer (might be a data copy)
+        """
+        return self._data
 
     @property
     def dtype(self) -> np.dtype[Any]:

From c74f2661226da860c96392268b09aecae2497c1b Mon Sep 17 00:00:00 2001
From: "Mads R. B. Kristensen" <madsbk@gmail.com>
Date: Mon, 13 May 2024 16:40:14 +0200
Subject: [PATCH 21/45] remove as_bytearray()

---
 src/zarr/array_v2.py    |  9 ++++++++-
 src/zarr/buffer.py      |  8 +-------
 tests/v3/test_codecs.py | 15 ++++++---------
 3 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/src/zarr/array_v2.py b/src/zarr/array_v2.py
index 2254b0e7c3..cc4ca7dd8a 100644
--- a/src/zarr/array_v2.py
+++ b/src/zarr/array_v2.py
@@ -10,7 +10,7 @@
 
 from numcodecs.compat import ensure_bytes, ensure_ndarray
 
-from zarr.buffer import Buffer, NDBuffer, as_bytearray
+from zarr.buffer import Buffer, NDBuffer
 from zarr.common import (
     ZARRAY_JSON,
     ZATTRS_JSON,
@@ -30,6 +30,13 @@
     from zarr.array import Array
 
 
+def as_bytearray(data: Optional[Buffer]) -> Optional[bytes]:
+    """Help function to convert a Buffer into bytes if not None"""
+    if data is None:
+        return data
+    return data.to_bytes()
+
+
 @dataclass(frozen=True)
 class _AsyncArrayProxy:
     array: ArrayV2
diff --git a/src/zarr/buffer.py b/src/zarr/buffer.py
index 310fe17763..5b50b54084 100644
--- a/src/zarr/buffer.py
+++ b/src/zarr/buffer.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 import sys
-from typing import TYPE_CHECKING, Any, Callable, Iterable, Literal, Optional, Tuple
+from typing import TYPE_CHECKING, Any, Callable, Iterable, Literal, Tuple
 import numpy as np
 
 from zarr.common import BytesLike
@@ -155,9 +155,3 @@ def transpose(self, *axes: np.SupportsIndex) -> Self:
 
 def as_bytes_wrapper(func: Callable[[bytes], bytes], buf: Buffer) -> Buffer:
     return Buffer.from_bytes(func(buf.to_bytes()))
-
-
-def as_bytearray(data: Optional[Buffer]) -> Optional[bytes]:
-    if data is None:
-        return data
-    return data.to_bytes()
diff --git a/tests/v3/test_codecs.py b/tests/v3/test_codecs.py
index a653c9ce92..85b21534fb 100644
--- a/tests/v3/test_codecs.py
+++ b/tests/v3/test_codecs.py
@@ -7,7 +7,6 @@
 
 import numpy as np
 import pytest
-from zarr.buffer import as_bytearray
 import zarr.v2
 from zarr.abc.codec import Codec
 from zarr.array import Array, AsyncArray
@@ -738,9 +737,9 @@ async def test_dimension_names(store: Store):
     )
 
     assert (await AsyncArray.open(store / "dimension_names2")).metadata.dimension_names is None
-    zarr_json_bytes = as_bytearray(await (store / "dimension_names2" / "zarr.json").get())
-    assert zarr_json_bytes is not None
-    assert "dimension_names" not in json.loads(zarr_json_bytes)
+    zarr_json_buffer = await (store / "dimension_names2" / "zarr.json").get()
+    assert zarr_json_buffer is not None
+    assert "dimension_names" not in json.loads(zarr_json_buffer.to_bytes())
 
 
 def test_gzip(store: Store):
@@ -966,7 +965,7 @@ async def test_blosc_evolve(store: Store):
         codecs=[BytesCodec(), BloscCodec()],
     )
 
-    zarr_json = json.loads(as_bytearray(await (store / "blosc_evolve_u1" / "zarr.json").get()))
+    zarr_json = json.loads((await (store / "blosc_evolve_u1" / "zarr.json").get()).to_bytes())
     blosc_configuration_json = zarr_json["codecs"][1]["configuration"]
     assert blosc_configuration_json["typesize"] == 1
     assert blosc_configuration_json["shuffle"] == "bitshuffle"
@@ -980,7 +979,7 @@ async def test_blosc_evolve(store: Store):
         codecs=[BytesCodec(), BloscCodec()],
     )
 
-    zarr_json = json.loads(as_bytearray(await (store / "blosc_evolve_u2" / "zarr.json").get()))
+    zarr_json = json.loads((await (store / "blosc_evolve_u2" / "zarr.json").get()).to_bytes())
     blosc_configuration_json = zarr_json["codecs"][1]["configuration"]
     assert blosc_configuration_json["typesize"] == 2
     assert blosc_configuration_json["shuffle"] == "shuffle"
@@ -994,9 +993,7 @@ async def test_blosc_evolve(store: Store):
         codecs=[ShardingCodec(chunk_shape=(16, 16), codecs=[BytesCodec(), BloscCodec()])],
     )
 
-    zarr_json = json.loads(
-        as_bytearray(await (store / "sharding_blosc_evolve" / "zarr.json").get())
-    )
+    zarr_json = json.loads((await (store / "sharding_blosc_evolve" / "zarr.json").get()).to_bytes())
     blosc_configuration_json = zarr_json["codecs"][0]["configuration"]["codecs"][1]["configuration"]
     assert blosc_configuration_json["typesize"] == 2
     assert blosc_configuration_json["shuffle"] == "shuffle"

From 6fce5a9e44f80cab14ee0a7c3363f6c67a61f7c9 Mon Sep 17 00:00:00 2001
From: "Mads R. B. Kristensen" <madsbk@gmail.com>
Date: Mon, 13 May 2024 17:25:11 +0200
Subject: [PATCH 22/45] impl. Buffer.from_numpy_array()

---
 src/zarr/buffer.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/zarr/buffer.py b/src/zarr/buffer.py
index 5b50b54084..5f8986f362 100644
--- a/src/zarr/buffer.py
+++ b/src/zarr/buffer.py
@@ -26,9 +26,13 @@ def __init__(self, array: np.ndarray):
     def create_empty(cls, *, nbytes: int) -> Self:
         return cls(np.empty(shape=(nbytes,), dtype="b"))
 
+    @classmethod
+    def from_numpy_array(cls, array: np.ArrayLike) -> Self:
+        return cls(np.asanyarray(array).reshape(-1).view(dtype="b"))
+
     @classmethod
     def from_bytes(cls, data: BytesLike) -> Self:
-        return cls(np.frombuffer(data, dtype="b"))
+        return cls.from_numpy_array(np.frombuffer(data, dtype="b"))
 
     def as_nd_buffer(self, *, dtype: np.DTypeLike) -> NDBuffer:
         return NDBuffer(self._data.view(dtype=dtype))

From c37312ba11bcf36fde030b6d01b2d31385549b08 Mon Sep 17 00:00:00 2001
From: "Mads R. B. Kristensen" <madsbk@gmail.com>
Date: Mon, 13 May 2024 19:32:11 +0200
Subject: [PATCH 23/45] NDArrayLike

---
 src/zarr/buffer.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/zarr/buffer.py b/src/zarr/buffer.py
index 5f8986f362..c27ce12ffb 100644
--- a/src/zarr/buffer.py
+++ b/src/zarr/buffer.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 import sys
-from typing import TYPE_CHECKING, Any, Callable, Iterable, Literal, Tuple
+from typing import TYPE_CHECKING, Any, Callable, Iterable, Literal, Tuple, TypeAlias
 import numpy as np
 
 from zarr.common import BytesLike
@@ -11,12 +11,13 @@
     from typing_extensions import Self
     from zarr.codecs.bytes import Endian
 
+NDArrayLike: TypeAlias = np.ndarray
+
 
 class Buffer:
     """A flat contiguous version of `NDBuffer` with an item size of 1"""
 
-    def __init__(self, array: np.ndarray):
-        assert isinstance(array, np.ndarray)
+    def __init__(self, array: NDArrayLike):
         assert array.ndim == 1
         assert array.itemsize == 1
         assert array.dtype == np.dtype("b")
@@ -71,8 +72,8 @@ class NDBuffer:
     of memory such as CUDA device memory.
     """
 
-    def __init__(self, array: np.ndarray):
-        assert isinstance(array, np.ndarray)
+    def __init__(self, array: NDArrayLike):
+        assert array.ndim > 0
         assert array.dtype != object
         self._data = array
 

From 925fa59d1915df90daad13028121f045a15e2a7b Mon Sep 17 00:00:00 2001
From: "Mads R. B. Kristensen" <madsbk@gmail.com>
Date: Mon, 13 May 2024 19:50:38 +0200
Subject: [PATCH 24/45] Factory.Create

---
 src/zarr/array.py           |  9 ++++++---
 src/zarr/array_v2.py        |  9 ++++++---
 src/zarr/buffer.py          | 30 ++++++++++++++++++------------
 src/zarr/codecs/sharding.py | 16 ++++++++++------
 4 files changed, 40 insertions(+), 24 deletions(-)

diff --git a/src/zarr/array.py b/src/zarr/array.py
index fab81d0dac..6edc636b52 100644
--- a/src/zarr/array.py
+++ b/src/zarr/array.py
@@ -193,7 +193,8 @@ async def getitem(self, selection: Selection) -> npt.NDArray[Any]:
         )
 
         # setup output array
-        out = NDBuffer.create_zeros(
+        out = NDBuffer.create(
+            factory=np.zeros,
             shape=indexer.shape,
             dtype=self.metadata.dtype,
             order=self.order,
@@ -303,7 +304,8 @@ async def _write_chunk(
         if is_total_slice(chunk_selection, chunk_shape):
             # write entire chunks
             if np.isscalar(value):
-                chunk_array = NDBuffer.create_empty(
+                chunk_array = NDBuffer.create(
+                    factory=np.empty,
                     shape=chunk_shape,
                     dtype=self.metadata.dtype,
                 )
@@ -327,7 +329,8 @@ async def _write_chunk(
 
             # merge new value
             if chunk_bytes is None:
-                chunk_array = NDBuffer.create_empty(
+                chunk_array = NDBuffer.create(
+                    factory=np.empty,
                     shape=chunk_shape,
                     dtype=self.metadata.dtype,
                 )
diff --git a/src/zarr/array_v2.py b/src/zarr/array_v2.py
index cc4ca7dd8a..ad58d1ceef 100644
--- a/src/zarr/array_v2.py
+++ b/src/zarr/array_v2.py
@@ -224,7 +224,8 @@ async def get_async(self, selection: Selection):
         )
 
         # setup output array
-        out = NDBuffer.create_zeros(
+        out = NDBuffer.create(
+            factory=np.zeros,
             shape=indexer.shape,
             dtype=self.metadata.dtype,
             order=self.metadata.order,
@@ -341,7 +342,8 @@ async def _write_chunk(
         if is_total_slice(chunk_selection, chunk_shape):
             # write entire chunks
             if np.isscalar(value):
-                chunk_array = NDBuffer.create_empty(
+                chunk_array = NDBuffer.create(
+                    factory=np.empty,
                     shape=chunk_shape,
                     dtype=self.metadata.dtype,
                     order=self.metadata.order,
@@ -358,7 +360,8 @@ async def _write_chunk(
 
             # merge new value
             if tmp is None:
-                chunk_array = NDBuffer.create_empty(
+                chunk_array = NDBuffer.create(
+                    factory=np.empty,
                     shape=chunk_shape,
                     dtype=self.metadata.dtype,
                     order=self.metadata.order,
diff --git a/src/zarr/buffer.py b/src/zarr/buffer.py
index c27ce12ffb..f96b6dff5b 100644
--- a/src/zarr/buffer.py
+++ b/src/zarr/buffer.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 import sys
-from typing import TYPE_CHECKING, Any, Callable, Iterable, Literal, Tuple, TypeAlias
+from typing import TYPE_CHECKING, Any, Callable, Iterable, Literal, Protocol, Tuple, TypeAlias
 import numpy as np
 
 from zarr.common import BytesLike
@@ -14,6 +14,13 @@
 NDArrayLike: TypeAlias = np.ndarray
 
 
+class Factory:
+    class Create(Protocol):
+        def __call__(
+            self, shape: Iterable[int], dtype: np.DTypeLike, order: Literal["C", "F"]
+        ) -> NDArrayLike: ...
+
+
 class Buffer:
     """A flat contiguous version of `NDBuffer` with an item size of 1"""
 
@@ -24,8 +31,8 @@ def __init__(self, array: NDArrayLike):
         self._data = array
 
     @classmethod
-    def create_empty(cls, *, nbytes: int) -> Self:
-        return cls(np.empty(shape=(nbytes,), dtype="b"))
+    def create(cls, *, factory: Factory.Create, nbytes: int) -> Self:
+        return cls(factory(shape=(nbytes,), dtype="b", order="C"))
 
     @classmethod
     def from_numpy_array(cls, array: np.ArrayLike) -> Self:
@@ -78,16 +85,15 @@ def __init__(self, array: NDArrayLike):
         self._data = array
 
     @classmethod
-    def create_empty(
-        cls, *, shape: Iterable[int], dtype: np.DTypeLike, order: Literal["C", "F"] = "C"
-    ) -> Self:
-        return cls(np.empty(shape=shape, dtype=dtype, order=order))
-
-    @classmethod
-    def create_zeros(
-        cls, *, shape: Iterable[int], dtype: np.DTypeLike, order: Literal["C", "F"] = "C"
+    def create(
+        cls,
+        *,
+        factory: Factory.Create,
+        shape: Iterable[int],
+        dtype: np.DTypeLike,
+        order: Literal["C", "F"] = "C",
     ) -> Self:
-        return cls(np.zeros(shape=shape, dtype=dtype, order=order))
+        return cls(factory(shape=shape, dtype=dtype, order=order))
 
     @classmethod
     def from_numpy_array(cls, array: np.ArrayLike) -> Self:
diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py
index 6c74f944bb..4c5c294337 100644
--- a/src/zarr/codecs/sharding.py
+++ b/src/zarr/codecs/sharding.py
@@ -148,7 +148,7 @@ async def from_bytes(
     def create_empty(cls, chunks_per_shard: ChunkCoords) -> _ShardProxy:
         index = _ShardIndex.create_empty(chunks_per_shard)
         obj = cls()
-        obj.buf = Buffer.create_empty(nbytes=0)
+        obj.buf = Buffer.create(factory=np.empty, nbytes=0)
         obj.index = index
         return obj
 
@@ -190,7 +190,7 @@ def merge_with_morton_order(
     @classmethod
     def create_empty(cls, chunks_per_shard: ChunkCoords) -> _ShardBuilder:
         obj = cls()
-        obj.buf = Buffer.create_empty(nbytes=0)
+        obj.buf = Buffer.create(factory=np.empty, nbytes=0)
         obj.index = _ShardIndex.create_empty(chunks_per_shard)
         return obj
 
@@ -312,7 +312,8 @@ async def decode(
         )
 
         # setup output array
-        out = NDBuffer.create_zeros(
+        out = NDBuffer.create(
+            factory=np.zeros,
             shape=shard_shape,
             dtype=shard_spec.dtype,
             order=shard_spec.order,
@@ -359,7 +360,8 @@ async def decode_partial(
         )
 
         # setup output array
-        out = NDBuffer.create_zeros(
+        out = NDBuffer.create(
+            factory=np.zeros,
             shape=indexer.shape,
             dtype=shard_spec.dtype,
             order=shard_spec.order,
@@ -453,7 +455,8 @@ async def _write_chunk(
                 chunk_array = shard_array[out_selection]
             else:
                 # handling writing partial chunks
-                chunk_array = NDBuffer.create_empty(
+                chunk_array = NDBuffer.create(
+                    factory=np.empty,
                     shape=chunk_shape,
                     dtype=shard_spec.dtype,
                 )
@@ -527,7 +530,8 @@ async def _write_chunk(
 
                 # merge new value
                 if chunk_bytes is None:
-                    chunk_array = NDBuffer.create_empty(
+                    chunk_array = NDBuffer.create(
+                        factory=np.empty,
                         shape=self.chunk_shape,
                         dtype=shard_spec.dtype,
                     )

From 1bbeefc0b244b73f1c4cc4c9ffac3a088c4a4149 Mon Sep 17 00:00:00 2001
From: "Mads R. B. Kristensen" <madsbk@gmail.com>
Date: Mon, 13 May 2024 19:57:25 +0200
Subject: [PATCH 25/45] Factory.FromNumpy

---
 src/zarr/array.py           |  2 +-
 src/zarr/buffer.py          | 13 ++++++++-----
 src/zarr/codecs/sharding.py |  2 +-
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/src/zarr/array.py b/src/zarr/array.py
index 6edc636b52..51d40723f6 100644
--- a/src/zarr/array.py
+++ b/src/zarr/array.py
@@ -270,7 +270,7 @@ async def setitem(self, selection: Selection, value: npt.NDArray[Any]) -> None:
 
         # We accept a numpy array as input from the user and convert it to a NDBuffer.
         # From this point onwards, we only pass Buffer and NDBuffer between components.
-        value = NDBuffer.from_numpy_array(value)
+        value = NDBuffer.from_numpy_array(factory=np.asanyarray, array_like=value)
 
         # merging with existing data and encoding chunks
         await concurrent_map(
diff --git a/src/zarr/buffer.py b/src/zarr/buffer.py
index f96b6dff5b..6aea159918 100644
--- a/src/zarr/buffer.py
+++ b/src/zarr/buffer.py
@@ -20,6 +20,9 @@ def __call__(
             self, shape: Iterable[int], dtype: np.DTypeLike, order: Literal["C", "F"]
         ) -> NDArrayLike: ...
 
+    class FromNumpy(Protocol):
+        def __call__(self, array_like: np.ArrayLike) -> NDArrayLike: ...
+
 
 class Buffer:
     """A flat contiguous version of `NDBuffer` with an item size of 1"""
@@ -35,12 +38,12 @@ def create(cls, *, factory: Factory.Create, nbytes: int) -> Self:
         return cls(factory(shape=(nbytes,), dtype="b", order="C"))
 
     @classmethod
-    def from_numpy_array(cls, array: np.ArrayLike) -> Self:
-        return cls(np.asanyarray(array).reshape(-1).view(dtype="b"))
+    def from_numpy_array(cls, *, factory: Factory.FromNumpy, array_like: np.ArrayLike) -> Self:
+        return cls(factory(array_like).reshape(-1).view(dtype="b"))
 
     @classmethod
     def from_bytes(cls, data: BytesLike) -> Self:
-        return cls.from_numpy_array(np.frombuffer(data, dtype="b"))
+        return cls.from_numpy_array(factory=np.asarray, array_like=np.frombuffer(data, dtype="b"))
 
     def as_nd_buffer(self, *, dtype: np.DTypeLike) -> NDBuffer:
         return NDBuffer(self._data.view(dtype=dtype))
@@ -96,8 +99,8 @@ def create(
         return cls(factory(shape=shape, dtype=dtype, order=order))
 
     @classmethod
-    def from_numpy_array(cls, array: np.ArrayLike) -> Self:
-        return cls(np.asanyarray(array))
+    def from_numpy_array(cls, *, factory: Factory.FromNumpy, array_like: np.ArrayLike) -> Self:
+        return cls(factory(array_like))
 
     def as_buffer(self) -> Buffer:
         return Buffer(self._data.reshape(-1).view(dtype="b"))
diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py
index 4c5c294337..1239bbe6ce 100644
--- a/src/zarr/codecs/sharding.py
+++ b/src/zarr/codecs/sharding.py
@@ -607,7 +607,7 @@ async def _decode_shard_index(
 
     async def _encode_shard_index(self, index: _ShardIndex) -> Buffer:
         index_bytes = await self.index_codecs.encode(
-            NDBuffer.from_numpy_array(index.offsets_and_lengths),
+            NDBuffer.from_numpy_array(factory=np.asarray, array_like=index.offsets_and_lengths),
             self._get_index_chunk_spec(index.chunks_per_shard),
         )
         assert index_bytes is not None

From 11595675d4bceee38b5fca8cecd95916f066202d Mon Sep 17 00:00:00 2001
From: "Mads R. B. Kristensen" <madsbk@gmail.com>
Date: Mon, 13 May 2024 20:01:41 +0200
Subject: [PATCH 26/45] doc

---
 src/zarr/buffer.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/zarr/buffer.py b/src/zarr/buffer.py
index 6aea159918..2325478fce 100644
--- a/src/zarr/buffer.py
+++ b/src/zarr/buffer.py
@@ -11,6 +11,7 @@
     from typing_extensions import Self
     from zarr.codecs.bytes import Endian
 
+# TODO: create a protocol for the attributes we need
 NDArrayLike: TypeAlias = np.ndarray
 
 
@@ -25,7 +26,12 @@ def __call__(self, array_like: np.ArrayLike) -> NDArrayLike: ...
 
 
 class Buffer:
-    """A flat contiguous version of `NDBuffer` with an item size of 1"""
+    """A flat contiguous memory block
+
+    We use `Buffer` throughout Zarr to represent a contiguous block of memory.
+    For now, we only support host memory but the plan is to support other types
+    of memory such as CUDA device memory.
+    """
 
     def __init__(self, array: NDArrayLike):
         assert array.ndim == 1

From 26d67083f756140a0dff314fa7804f027d9f75f3 Mon Sep 17 00:00:00 2001
From: "Mads R. B. Kristensen" <madsbk@gmail.com>
Date: Mon, 13 May 2024 22:17:15 +0200
Subject: [PATCH 27/45] doc

---
 src/zarr/buffer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/zarr/buffer.py b/src/zarr/buffer.py
index 2325478fce..11e1673464 100644
--- a/src/zarr/buffer.py
+++ b/src/zarr/buffer.py
@@ -11,7 +11,7 @@
     from typing_extensions import Self
     from zarr.codecs.bytes import Endian
 
-# TODO: create a protocol for the attributes we need
+# TODO: create a protocol for the attributes we need, for now we just aliasing numpy
 NDArrayLike: TypeAlias = np.ndarray
 
 

From 5ce21a0533c25b1d02bbc6f9cc343c5575a23eb6 Mon Sep 17 00:00:00 2001
From: "Mads R. B. Kristensen" <madsbk@gmail.com>
Date: Mon, 13 May 2024 22:42:15 +0200
Subject: [PATCH 28/45] remove the buffer factories again

---
 src/zarr/array.py           | 11 ++++------
 src/zarr/array_v2.py        |  9 +++-----
 src/zarr/buffer.py          | 43 +++++++++++++++++++------------------
 src/zarr/codecs/sharding.py | 18 ++++++----------
 4 files changed, 36 insertions(+), 45 deletions(-)

diff --git a/src/zarr/array.py b/src/zarr/array.py
index 51d40723f6..fab81d0dac 100644
--- a/src/zarr/array.py
+++ b/src/zarr/array.py
@@ -193,8 +193,7 @@ async def getitem(self, selection: Selection) -> npt.NDArray[Any]:
         )
 
         # setup output array
-        out = NDBuffer.create(
-            factory=np.zeros,
+        out = NDBuffer.create_zeros(
             shape=indexer.shape,
             dtype=self.metadata.dtype,
             order=self.order,
@@ -270,7 +269,7 @@ async def setitem(self, selection: Selection, value: npt.NDArray[Any]) -> None:
 
         # We accept a numpy array as input from the user and convert it to a NDBuffer.
         # From this point onwards, we only pass Buffer and NDBuffer between components.
-        value = NDBuffer.from_numpy_array(factory=np.asanyarray, array_like=value)
+        value = NDBuffer.from_numpy_array(value)
 
         # merging with existing data and encoding chunks
         await concurrent_map(
@@ -304,8 +303,7 @@ async def _write_chunk(
         if is_total_slice(chunk_selection, chunk_shape):
             # write entire chunks
             if np.isscalar(value):
-                chunk_array = NDBuffer.create(
-                    factory=np.empty,
+                chunk_array = NDBuffer.create_empty(
                     shape=chunk_shape,
                     dtype=self.metadata.dtype,
                 )
@@ -329,8 +327,7 @@ async def _write_chunk(
 
             # merge new value
             if chunk_bytes is None:
-                chunk_array = NDBuffer.create(
-                    factory=np.empty,
+                chunk_array = NDBuffer.create_empty(
                     shape=chunk_shape,
                     dtype=self.metadata.dtype,
                 )
diff --git a/src/zarr/array_v2.py b/src/zarr/array_v2.py
index ad58d1ceef..cc4ca7dd8a 100644
--- a/src/zarr/array_v2.py
+++ b/src/zarr/array_v2.py
@@ -224,8 +224,7 @@ async def get_async(self, selection: Selection):
         )
 
         # setup output array
-        out = NDBuffer.create(
-            factory=np.zeros,
+        out = NDBuffer.create_zeros(
             shape=indexer.shape,
             dtype=self.metadata.dtype,
             order=self.metadata.order,
@@ -342,8 +341,7 @@ async def _write_chunk(
         if is_total_slice(chunk_selection, chunk_shape):
             # write entire chunks
             if np.isscalar(value):
-                chunk_array = NDBuffer.create(
-                    factory=np.empty,
+                chunk_array = NDBuffer.create_empty(
                     shape=chunk_shape,
                     dtype=self.metadata.dtype,
                     order=self.metadata.order,
@@ -360,8 +358,7 @@ async def _write_chunk(
 
             # merge new value
             if tmp is None:
-                chunk_array = NDBuffer.create(
-                    factory=np.empty,
+                chunk_array = NDBuffer.create_empty(
                     shape=chunk_shape,
                     dtype=self.metadata.dtype,
                     order=self.metadata.order,
diff --git a/src/zarr/buffer.py b/src/zarr/buffer.py
index 11e1673464..ec0f4c52d6 100644
--- a/src/zarr/buffer.py
+++ b/src/zarr/buffer.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 import sys
-from typing import TYPE_CHECKING, Any, Callable, Iterable, Literal, Protocol, Tuple, TypeAlias
+from typing import TYPE_CHECKING, Any, Callable, Iterable, Literal, Tuple, TypeAlias
 import numpy as np
 
 from zarr.common import BytesLike
@@ -15,16 +15,6 @@
 NDArrayLike: TypeAlias = np.ndarray
 
 
-class Factory:
-    class Create(Protocol):
-        def __call__(
-            self, shape: Iterable[int], dtype: np.DTypeLike, order: Literal["C", "F"]
-        ) -> NDArrayLike: ...
-
-    class FromNumpy(Protocol):
-        def __call__(self, array_like: np.ArrayLike) -> NDArrayLike: ...
-
-
 class Buffer:
     """A flat contiguous memory block
 
@@ -40,16 +30,16 @@ def __init__(self, array: NDArrayLike):
         self._data = array
 
     @classmethod
-    def create(cls, *, factory: Factory.Create, nbytes: int) -> Self:
-        return cls(factory(shape=(nbytes,), dtype="b", order="C"))
+    def create_empty(cls, *, nbytes: int) -> Self:
+        return cls(np.empty(shape=(nbytes,), dtype="b", order="C"))
 
     @classmethod
-    def from_numpy_array(cls, *, factory: Factory.FromNumpy, array_like: np.ArrayLike) -> Self:
-        return cls(factory(array_like).reshape(-1).view(dtype="b"))
+    def from_numpy_array(cls, array_like: np.ArrayLike) -> Self:
+        return cls(np.asarray(array_like).reshape(-1).view(dtype="b"))
 
     @classmethod
     def from_bytes(cls, data: BytesLike) -> Self:
-        return cls.from_numpy_array(factory=np.asarray, array_like=np.frombuffer(data, dtype="b"))
+        return cls.from_numpy_array(np.frombuffer(data, dtype="b"))
 
     def as_nd_buffer(self, *, dtype: np.DTypeLike) -> NDBuffer:
         return NDBuffer(self._data.view(dtype=dtype))
@@ -94,19 +84,30 @@ def __init__(self, array: NDArrayLike):
         self._data = array
 
     @classmethod
-    def create(
+    def create_empty(
+        cls,
+        *,
+        shape: Iterable[int],
+        dtype: np.DTypeLike,
+        order: Literal["C", "F"] = "C",
+    ) -> Self:
+        return cls(np.empty(shape=shape, dtype=dtype, order=order))
+
+    @classmethod
+    def create_zeros(
         cls,
         *,
-        factory: Factory.Create,
         shape: Iterable[int],
         dtype: np.DTypeLike,
         order: Literal["C", "F"] = "C",
     ) -> Self:
-        return cls(factory(shape=shape, dtype=dtype, order=order))
+        ret = cls.create_empty(shape=shape, dtype=dtype, order=order)
+        ret[...] = 0
+        return ret
 
     @classmethod
-    def from_numpy_array(cls, *, factory: Factory.FromNumpy, array_like: np.ArrayLike) -> Self:
-        return cls(factory(array_like))
+    def from_numpy_array(cls, array_like: np.ArrayLike) -> Self:
+        return cls(np.asanyarray(array_like))
 
     def as_buffer(self) -> Buffer:
         return Buffer(self._data.reshape(-1).view(dtype="b"))
diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py
index 1239bbe6ce..6c74f944bb 100644
--- a/src/zarr/codecs/sharding.py
+++ b/src/zarr/codecs/sharding.py
@@ -148,7 +148,7 @@ async def from_bytes(
     def create_empty(cls, chunks_per_shard: ChunkCoords) -> _ShardProxy:
         index = _ShardIndex.create_empty(chunks_per_shard)
         obj = cls()
-        obj.buf = Buffer.create(factory=np.empty, nbytes=0)
+        obj.buf = Buffer.create_empty(nbytes=0)
         obj.index = index
         return obj
 
@@ -190,7 +190,7 @@ def merge_with_morton_order(
     @classmethod
     def create_empty(cls, chunks_per_shard: ChunkCoords) -> _ShardBuilder:
         obj = cls()
-        obj.buf = Buffer.create(factory=np.empty, nbytes=0)
+        obj.buf = Buffer.create_empty(nbytes=0)
         obj.index = _ShardIndex.create_empty(chunks_per_shard)
         return obj
 
@@ -312,8 +312,7 @@ async def decode(
         )
 
         # setup output array
-        out = NDBuffer.create(
-            factory=np.zeros,
+        out = NDBuffer.create_zeros(
             shape=shard_shape,
             dtype=shard_spec.dtype,
             order=shard_spec.order,
@@ -360,8 +359,7 @@ async def decode_partial(
         )
 
         # setup output array
-        out = NDBuffer.create(
-            factory=np.zeros,
+        out = NDBuffer.create_zeros(
             shape=indexer.shape,
             dtype=shard_spec.dtype,
             order=shard_spec.order,
@@ -455,8 +453,7 @@ async def _write_chunk(
                 chunk_array = shard_array[out_selection]
             else:
                 # handling writing partial chunks
-                chunk_array = NDBuffer.create(
-                    factory=np.empty,
+                chunk_array = NDBuffer.create_empty(
                     shape=chunk_shape,
                     dtype=shard_spec.dtype,
                 )
@@ -530,8 +527,7 @@ async def _write_chunk(
 
                 # merge new value
                 if chunk_bytes is None:
-                    chunk_array = NDBuffer.create(
-                        factory=np.empty,
+                    chunk_array = NDBuffer.create_empty(
                         shape=self.chunk_shape,
                         dtype=shard_spec.dtype,
                     )
@@ -607,7 +603,7 @@ async def _decode_shard_index(
 
     async def _encode_shard_index(self, index: _ShardIndex) -> Buffer:
         index_bytes = await self.index_codecs.encode(
-            NDBuffer.from_numpy_array(factory=np.asarray, array_like=index.offsets_and_lengths),
+            NDBuffer.from_numpy_array(index.offsets_and_lengths),
             self._get_index_chunk_spec(index.chunks_per_shard),
         )
         assert index_bytes is not None

From be9dce38944e5a4ebf19f182afdfccda20472de5 Mon Sep 17 00:00:00 2001
From: "Mads R. B. Kristensen" <madsbk@gmail.com>
Date: Tue, 14 May 2024 09:37:21 +0200
Subject: [PATCH 29/45] NDBuffer.create(): take fill_value

---
 src/zarr/array.py           | 16 ++++++----------
 src/zarr/array_v2.py        | 14 ++++++--------
 src/zarr/buffer.py          | 20 ++++++--------------
 src/zarr/codecs/sharding.py | 16 ++++++----------
 4 files changed, 24 insertions(+), 42 deletions(-)

diff --git a/src/zarr/array.py b/src/zarr/array.py
index fab81d0dac..9137f74644 100644
--- a/src/zarr/array.py
+++ b/src/zarr/array.py
@@ -193,10 +193,8 @@ async def getitem(self, selection: Selection) -> npt.NDArray[Any]:
         )
 
         # setup output array
-        out = NDBuffer.create_zeros(
-            shape=indexer.shape,
-            dtype=self.metadata.dtype,
-            order=self.order,
+        out = NDBuffer.create(
+            shape=indexer.shape, dtype=self.metadata.dtype, order=self.order, fill_value=0
         )
 
         # reading chunks and decoding them
@@ -303,11 +301,9 @@ async def _write_chunk(
         if is_total_slice(chunk_selection, chunk_shape):
             # write entire chunks
             if np.isscalar(value):
-                chunk_array = NDBuffer.create_empty(
-                    shape=chunk_shape,
-                    dtype=self.metadata.dtype,
+                chunk_array = NDBuffer.create(
+                    shape=chunk_shape, dtype=self.metadata.dtype, fill_value=value
                 )
-                chunk_array.fill(value)
             else:
                 chunk_array = value[out_selection]
             await self._write_chunk_to_store(store_path, chunk_array, chunk_spec)
@@ -327,11 +323,11 @@ async def _write_chunk(
 
             # merge new value
             if chunk_bytes is None:
-                chunk_array = NDBuffer.create_empty(
+                chunk_array = NDBuffer.create(
                     shape=chunk_shape,
                     dtype=self.metadata.dtype,
+                    fill_value=self.metadata.fill_value,
                 )
-                chunk_array.fill(self.metadata.fill_value)
             else:
                 chunk_array = (
                     await self.codecs.decode(chunk_bytes, chunk_spec)
diff --git a/src/zarr/array_v2.py b/src/zarr/array_v2.py
index cc4ca7dd8a..053d58eb1a 100644
--- a/src/zarr/array_v2.py
+++ b/src/zarr/array_v2.py
@@ -224,10 +224,8 @@ async def get_async(self, selection: Selection):
         )
 
         # setup output array
-        out = NDBuffer.create_zeros(
-            shape=indexer.shape,
-            dtype=self.metadata.dtype,
-            order=self.metadata.order,
+        out = NDBuffer.create(
+            shape=indexer.shape, dtype=self.metadata.dtype, order=self.metadata.order, fill_value=0
         )
 
         # reading chunks and decoding them
@@ -341,12 +339,12 @@ async def _write_chunk(
         if is_total_slice(chunk_selection, chunk_shape):
             # write entire chunks
             if np.isscalar(value):
-                chunk_array = NDBuffer.create_empty(
+                chunk_array = NDBuffer.create(
                     shape=chunk_shape,
                     dtype=self.metadata.dtype,
                     order=self.metadata.order,
+                    fill_value=value,
                 )
-                chunk_array.fill(value)
             else:
                 chunk_array = value[out_selection]
             await self._write_chunk_to_store(store_path, chunk_array)
@@ -358,12 +356,12 @@ async def _write_chunk(
 
             # merge new value
             if tmp is None:
-                chunk_array = NDBuffer.create_empty(
+                chunk_array = NDBuffer.create(
                     shape=chunk_shape,
                     dtype=self.metadata.dtype,
                     order=self.metadata.order,
+                    fill_value=self.metadata.fill_value,
                 )
-                chunk_array.fill(self.metadata.fill_value)
             else:
                 chunk_array = tmp.copy(
                     order=self.metadata.order,
diff --git a/src/zarr/buffer.py b/src/zarr/buffer.py
index ec0f4c52d6..3df119c292 100644
--- a/src/zarr/buffer.py
+++ b/src/zarr/buffer.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 import sys
-from typing import TYPE_CHECKING, Any, Callable, Iterable, Literal, Tuple, TypeAlias
+from typing import TYPE_CHECKING, Any, Callable, Iterable, Literal, Optional, Tuple, TypeAlias
 import numpy as np
 
 from zarr.common import BytesLike
@@ -84,25 +84,17 @@ def __init__(self, array: NDArrayLike):
         self._data = array
 
     @classmethod
-    def create_empty(
+    def create(
         cls,
         *,
         shape: Iterable[int],
         dtype: np.DTypeLike,
         order: Literal["C", "F"] = "C",
+        fill_value: Optional[Any] = None,
     ) -> Self:
-        return cls(np.empty(shape=shape, dtype=dtype, order=order))
-
-    @classmethod
-    def create_zeros(
-        cls,
-        *,
-        shape: Iterable[int],
-        dtype: np.DTypeLike,
-        order: Literal["C", "F"] = "C",
-    ) -> Self:
-        ret = cls.create_empty(shape=shape, dtype=dtype, order=order)
-        ret[...] = 0
+        ret = cls(np.empty(shape=shape, dtype=dtype, order=order))
+        if fill_value is not None:
+            ret.fill(fill_value)
         return ret
 
     @classmethod
diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py
index 6c74f944bb..41902ffc13 100644
--- a/src/zarr/codecs/sharding.py
+++ b/src/zarr/codecs/sharding.py
@@ -312,10 +312,8 @@ async def decode(
         )
 
         # setup output array
-        out = NDBuffer.create_zeros(
-            shape=shard_shape,
-            dtype=shard_spec.dtype,
-            order=shard_spec.order,
+        out = NDBuffer.create(
+            shape=shard_shape, dtype=shard_spec.dtype, order=shard_spec.order, fill_value=0
         )
         shard_dict = await _ShardProxy.from_bytes(shard_bytes, self, chunks_per_shard)
 
@@ -359,10 +357,8 @@ async def decode_partial(
         )
 
         # setup output array
-        out = NDBuffer.create_zeros(
-            shape=indexer.shape,
-            dtype=shard_spec.dtype,
-            order=shard_spec.order,
+        out = NDBuffer.create(
+            shape=indexer.shape, dtype=shard_spec.dtype, order=shard_spec.order, fill_value=0
         )
 
         indexed_chunks = list(indexer)
@@ -453,7 +449,7 @@ async def _write_chunk(
                 chunk_array = shard_array[out_selection]
             else:
                 # handling writing partial chunks
-                chunk_array = NDBuffer.create_empty(
+                chunk_array = NDBuffer.create(
                     shape=chunk_shape,
                     dtype=shard_spec.dtype,
                 )
@@ -527,7 +523,7 @@ async def _write_chunk(
 
                 # merge new value
                 if chunk_bytes is None:
-                    chunk_array = NDBuffer.create_empty(
+                    chunk_array = NDBuffer.create(
                         shape=self.chunk_shape,
                         dtype=shard_spec.dtype,
                     )

From 57e3dd615394bc3f34394759c6471db30e12ba1a Mon Sep 17 00:00:00 2001
From: "Mads R. B. Kristensen" <madsbk@gmail.com>
Date: Tue, 14 May 2024 11:23:50 +0200
Subject: [PATCH 30/45] getitem and setitem now use factory

---
 src/zarr/array.py  | 29 ++++++++++++++--------------
 src/zarr/buffer.py | 47 ++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 58 insertions(+), 18 deletions(-)

diff --git a/src/zarr/array.py b/src/zarr/array.py
index 936834fd34..d718117264 100644
--- a/src/zarr/array.py
+++ b/src/zarr/array.py
@@ -20,7 +20,7 @@
 
 
 # from zarr.array_v2 import ArrayV2
-from zarr.buffer import Buffer, NDBuffer
+from zarr.buffer import Buffer, Factory, NDArrayLike, NDBuffer
 from zarr.codecs import BytesCodec
 from zarr.codecs.pipeline import CodecPipeline
 from zarr.common import (
@@ -185,7 +185,9 @@ def dtype(self) -> np.dtype[Any]:
     def attrs(self) -> dict[str, Any]:
         return self.metadata.attributes
 
-    async def getitem(self, selection: Selection) -> npt.NDArray[Any]:
+    async def getitem(
+        self, selection: Selection, *, factory: Factory.Create = NDBuffer.create
+    ) -> NDArrayLike:
         assert isinstance(self.metadata.chunk_grid, RegularChunkGrid)
         indexer = BasicIndexer(
             selection,
@@ -194,7 +196,7 @@ async def getitem(self, selection: Selection) -> npt.NDArray[Any]:
         )
 
         # setup output array
-        out = NDBuffer.create(
+        out = factory(
             shape=indexer.shape, dtype=self.metadata.dtype, order=self.order, fill_value=0
         )
 
@@ -207,12 +209,7 @@ async def getitem(self, selection: Selection) -> npt.NDArray[Any]:
             self._read_chunk,
             config.get("async.concurrency"),
         )
-
-        # We always return a numpy array to the user
-        if out.shape:
-            return out.as_numpy_array()
-        else:
-            return out.as_numpy_array()[()]
+        return out.as_ndarray_like()
 
     async def _save_metadata(self) -> None:
         await (self.store_path / ZARR_JSON).set(Buffer.from_bytes(self.metadata.to_bytes()))
@@ -244,7 +241,12 @@ async def _read_chunk(
             else:
                 out[out_selection] = self.metadata.fill_value
 
-    async def setitem(self, selection: Selection, value: npt.NDArray[Any]) -> None:
+    async def setitem(
+        self,
+        selection: Selection,
+        value: NDArrayLike,
+        factory: Factory.NDArrayLike = NDBuffer.from_ndarray_like,
+    ) -> None:
         assert isinstance(self.metadata.chunk_grid, RegularChunkGrid)
         chunk_shape = self.metadata.chunk_grid.chunk_shape
         indexer = BasicIndexer(
@@ -257,8 +259,7 @@ async def setitem(self, selection: Selection, value: npt.NDArray[Any]) -> None:
 
         # check value shape
         if np.isscalar(value):
-            # setting a scalar value
-            pass
+            value = np.asanyarray(value)
         else:
             if not hasattr(value, "shape"):
                 value = np.asarray(value, self.metadata.dtype)
@@ -266,9 +267,9 @@ async def setitem(self, selection: Selection, value: npt.NDArray[Any]) -> None:
             if value.dtype.name != self.metadata.dtype.name:
                 value = value.astype(self.metadata.dtype, order="A")
 
-        # We accept a numpy array as input from the user and convert it to a NDBuffer.
+        # We accept any ndarray like object from the user and convert it to a NDBuffer.
         # From this point onwards, we only pass Buffer and NDBuffer between components.
-        value = NDBuffer.from_numpy_array(value)
+        value = factory(value)
 
         # merging with existing data and encoding chunks
         await concurrent_map(
diff --git a/src/zarr/buffer.py b/src/zarr/buffer.py
index 3df119c292..6eefb4c9aa 100644
--- a/src/zarr/buffer.py
+++ b/src/zarr/buffer.py
@@ -1,20 +1,44 @@
 from __future__ import annotations
 
 import sys
-from typing import TYPE_CHECKING, Any, Callable, Iterable, Literal, Optional, Tuple, TypeAlias
-import numpy as np
-
-from zarr.common import BytesLike
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Callable,
+    Iterable,
+    Literal,
+    Optional,
+    Protocol,
+    Tuple,
+    TypeAlias,
+)
 
+import numpy as np
 
 if TYPE_CHECKING:
     from typing_extensions import Self
     from zarr.codecs.bytes import Endian
+    from zarr.common import BytesLike
 
 # TODO: create a protocol for the attributes we need, for now we just aliasing numpy
 NDArrayLike: TypeAlias = np.ndarray
 
 
+class Factory:
+    class Create(Protocol):
+        def __call__(
+            self,
+            *,
+            shape: Iterable[int],
+            dtype: np.DTypeLike,
+            order: Literal["C", "F"],
+            fill_value: Optional[Any],
+        ) -> NDBuffer: ...
+
+    class NDArrayLike(Protocol):
+        def __call__(self, ndarray_like: NDArrayLike) -> NDBuffer: ...
+
+
 class Buffer:
     """A flat contiguous memory block
 
@@ -101,6 +125,21 @@ def create(
     def from_numpy_array(cls, array_like: np.ArrayLike) -> Self:
         return cls(np.asanyarray(array_like))
 
+    @classmethod
+    def from_ndarray_like(cls, ndarray_like: NDArrayLike) -> Self:
+        return cls(ndarray_like)
+
+    def as_ndarray_like(self) -> NDArrayLike:
+        """Return the underlying array instance representing the memory of this buffer
+
+        This will never copy data.
+
+        Return
+        ------
+            The underlying array such as a NumPy or CuPy array.
+        """
+        return self._data
+
     def as_buffer(self) -> Buffer:
         return Buffer(self._data.reshape(-1).view(dtype="b"))
 

From 8bbe5c1d4d8ba55380ee71c7953149fca7194e9a Mon Sep 17 00:00:00 2001
From: "Mads R. B. Kristensen" <madsbk@gmail.com>
Date: Tue, 14 May 2024 11:54:22 +0200
Subject: [PATCH 31/45] doc

---
 src/zarr/array.py  |  5 +++--
 src/zarr/buffer.py | 39 +++++++++++++++++++++++++++++++++++++--
 2 files changed, 40 insertions(+), 4 deletions(-)

diff --git a/src/zarr/array.py b/src/zarr/array.py
index d718117264..b2932a3af3 100644
--- a/src/zarr/array.py
+++ b/src/zarr/array.py
@@ -267,8 +267,9 @@ async def setitem(
             if value.dtype.name != self.metadata.dtype.name:
                 value = value.astype(self.metadata.dtype, order="A")
 
-        # We accept any ndarray like object from the user and convert it to a NDBuffer.
-        # From this point onwards, we only pass Buffer and NDBuffer between components.
+        # We accept any ndarray like object from the user and convert it
+        # to a NDBuffer (or subclass). From this point onwards, we only pass
+        # Buffer and NDBuffer between components.
         value = factory(value)
 
         # merging with existing data and encoding chunks
diff --git a/src/zarr/buffer.py b/src/zarr/buffer.py
index 6eefb4c9aa..095cf05f49 100644
--- a/src/zarr/buffer.py
+++ b/src/zarr/buffer.py
@@ -33,10 +33,45 @@ def __call__(
             dtype: np.DTypeLike,
             order: Literal["C", "F"],
             fill_value: Optional[Any],
-        ) -> NDBuffer: ...
+        ) -> NDBuffer:
+            """Factory function to create a new NDBuffer (or subclass)
+
+            Callables implementing the `Factor.Create` protocol must create a new
+            instance of NDBuffer (or subclass) given the following parameters.
+
+            Parameters
+            ----------
+            shape
+                The shape of the new buffer
+            dtype
+                The datatype of each element in the new buffer
+            order
+                Whether to store multi-dimensional data in row-major (C-style) or
+                column-major (Fortran-style) order in memory.
+            fill_value
+                If not None, fill the new buffer with a scalar value.
+
+            Return
+            ------
+                A new NDBuffer or subclass instance
+            """
 
     class NDArrayLike(Protocol):
-        def __call__(self, ndarray_like: NDArrayLike) -> NDBuffer: ...
+        def __call__(self, ndarray_like: NDArrayLike) -> NDBuffer:
+            """Factory function to coerce an array into a NDBuffer (or subclass)
+
+            Callables implementing the `Factor.NDArrayLike` protocol must return
+            an instance of NDBuffer (or subclass) given an ndarray-like object.
+
+            Parameters
+            ----------
+            ndarray_like
+                ndarray-like object
+
+            Return
+            ------
+                A NDBuffer or subclass instance that represents `ndarray_like`
+            """
 
 
 class Buffer:

From 1c64b797f672fbcd515da722b8bba9ffa4aff1be Mon Sep 17 00:00:00 2001
From: "Mads R. B. Kristensen" <madsbk@gmail.com>
Date: Tue, 14 May 2024 14:07:26 +0200
Subject: [PATCH 32/45] test

---
 tests/v3/test_buffer.py | 57 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 57 insertions(+)
 create mode 100644 tests/v3/test_buffer.py

diff --git a/tests/v3/test_buffer.py b/tests/v3/test_buffer.py
new file mode 100644
index 0000000000..edb3e40a35
--- /dev/null
+++ b/tests/v3/test_buffer.py
@@ -0,0 +1,57 @@
+from typing import Any, Iterable, Literal, Optional, Self
+import numpy as np
+import numpy.typing as npt
+import pytest
+
+from zarr.array import AsyncArray
+from zarr.buffer import NDBuffer
+from zarr.store.core import StorePath
+from zarr.store.memory import MemoryStore
+
+
+class MyNDArrayLike(np.ndarray):
+    """An example of a ndarray-like class"""
+
+    pass
+
+
+class MyNDBuffer(NDBuffer):
+    """Example of a custom NDBuffer that handles MyNDArrayLike"""
+
+    @classmethod
+    def create(
+        cls,
+        *,
+        shape: Iterable[int],
+        dtype: npt.DTypeLike,
+        order: Literal["C", "F"] = "C",
+        fill_value: Optional[Any] = None,
+    ) -> Self:
+        """Overwrite `NDBuffer.create` to create an MyNDArrayLike instance"""
+        ret = cls(MyNDArrayLike(shape=shape, dtype=dtype, order=order))
+        if fill_value is not None:
+            ret.fill(fill_value)
+        return ret
+
+
+@pytest.mark.asyncio
+async def test_async_array_factory():
+    store = StorePath(MemoryStore())
+    expect = np.zeros((9, 9), dtype="uint16", order="F")
+    a = await AsyncArray.create(
+        store / "test_async_array",
+        shape=expect.shape,
+        chunk_shape=(5, 5),
+        dtype=expect.dtype,
+        fill_value=0,
+    )
+    expect[1:4, 3:6] = np.ones((3, 3))
+
+    await a.setitem(
+        selection=(slice(1, 4), slice(3, 6)),
+        value=np.ones((3, 3)),
+        factory=MyNDBuffer.from_ndarray_like,
+    )
+    got = await a.getitem(selection=(slice(0, 9), slice(0, 9)), factory=MyNDBuffer.create)
+    assert isinstance(got, MyNDArrayLike)
+    assert np.array_equal(expect, got)

From cd7eb44ac31c87ca70c91fbde4b5d5b7088954d3 Mon Sep 17 00:00:00 2001
From: "Mads R. B. Kristensen" <madsbk@gmail.com>
Date: Tue, 14 May 2024 15:27:54 +0200
Subject: [PATCH 33/45] check_item_key_is_1d_contiguous

---
 src/zarr/buffer.py | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/src/zarr/buffer.py b/src/zarr/buffer.py
index 095cf05f49..0356568ff6 100644
--- a/src/zarr/buffer.py
+++ b/src/zarr/buffer.py
@@ -24,6 +24,15 @@
 NDArrayLike: TypeAlias = np.ndarray
 
 
+def check_item_key_is_1d_contiguous(key: Any) -> None:
+    if not isinstance(key, slice):
+        raise TypeError(
+            f"Item key has incorrect type (expected slice, got {key.__class__.__name__})"
+        )
+    if not (key.step is None or key.step == 1):
+        raise ValueError("slice must be contiguous")
+
+
 class Factory:
     class Create(Protocol):
         def __call__(
@@ -109,14 +118,16 @@ def to_bytes(self) -> bytes:
     def memoryview(self) -> memoryview:
         return memoryview(self._data)
 
-    def __getitem__(self, key: Any) -> Self:
+    def __getitem__(self, key: slice) -> Self:
+        check_item_key_is_1d_contiguous(key)
         return self.__class__(self._data.__getitem__(key))
 
-    def __setitem__(self, key: Any, value: Any) -> None:
+    def __setitem__(self, key: slice, value: Any) -> None:
+        check_item_key_is_1d_contiguous(key)
         self._data.__setitem__(key, value)
 
     def __len__(self) -> int:
-        return self._data.nbytes
+        return self._data.size
 
     def __add__(self, other: Buffer) -> Self:
         return self.__class__(np.frombuffer(self.to_bytes() + other.to_bytes(), dtype="b"))

From 01fcec1fe834f26f0ee91b29c8d6b33d52abbcab Mon Sep 17 00:00:00 2001
From: "Mads R. B. Kristensen" <madsbk@gmail.com>
Date: Tue, 14 May 2024 16:22:50 +0200
Subject: [PATCH 34/45] Buffer.create_zero_length()

---
 src/zarr/buffer.py          | 4 ++--
 src/zarr/codecs/sharding.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/zarr/buffer.py b/src/zarr/buffer.py
index 0356568ff6..f060a2649f 100644
--- a/src/zarr/buffer.py
+++ b/src/zarr/buffer.py
@@ -98,8 +98,8 @@ def __init__(self, array: NDArrayLike):
         self._data = array
 
     @classmethod
-    def create_empty(cls, *, nbytes: int) -> Self:
-        return cls(np.empty(shape=(nbytes,), dtype="b", order="C"))
+    def create_zero_length(cls) -> Self:
+        return cls(np.array([], dtype="b"))
 
     @classmethod
     def from_numpy_array(cls, array_like: np.ArrayLike) -> Self:
diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py
index 41902ffc13..b63d1e499b 100644
--- a/src/zarr/codecs/sharding.py
+++ b/src/zarr/codecs/sharding.py
@@ -148,7 +148,7 @@ async def from_bytes(
     def create_empty(cls, chunks_per_shard: ChunkCoords) -> _ShardProxy:
         index = _ShardIndex.create_empty(chunks_per_shard)
         obj = cls()
-        obj.buf = Buffer.create_empty(nbytes=0)
+        obj.buf = Buffer.create_zero_length()
         obj.index = index
         return obj
 
@@ -190,7 +190,7 @@ def merge_with_morton_order(
     @classmethod
     def create_empty(cls, chunks_per_shard: ChunkCoords) -> _ShardBuilder:
         obj = cls()
-        obj.buf = Buffer.create_empty(nbytes=0)
+        obj.buf = Buffer.create_zero_length()
         obj.index = _ShardIndex.create_empty(chunks_per_shard)
         return obj
 

From 9cc6edc099eb2a796b7472a78c32aa607ca92589 Mon Sep 17 00:00:00 2001
From: "Mads R. B. Kristensen" <madsbk@gmail.com>
Date: Tue, 14 May 2024 16:39:52 +0200
Subject: [PATCH 35/45] Buffer.__add__(): use concat

---
 src/zarr/buffer.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/zarr/buffer.py b/src/zarr/buffer.py
index f060a2649f..a30fcfdf38 100644
--- a/src/zarr/buffer.py
+++ b/src/zarr/buffer.py
@@ -130,7 +130,8 @@ def __len__(self) -> int:
         return self._data.size
 
     def __add__(self, other: Buffer) -> Self:
-        return self.__class__(np.frombuffer(self.to_bytes() + other.to_bytes(), dtype="b"))
+        assert other._data.dtype == np.dtype("b")
+        return self.__class__(np.concatenate((self._data, other._data)))
 
     def __eq__(self, other: Any) -> bool:
         if isinstance(other, (bytes, bytearray)):

From 40a30f1671d063e59c17660981c98800a3f70a15 Mon Sep 17 00:00:00 2001
From: "Mads R. B. Kristensen" <madsbk@gmail.com>
Date: Tue, 14 May 2024 16:54:27 +0200
Subject: [PATCH 36/45] Buffer.as_ndarray_like

---
 src/zarr/buffer.py | 46 +++++++++++++++++++++++++++++++++++++---------
 1 file changed, 37 insertions(+), 9 deletions(-)

diff --git a/src/zarr/buffer.py b/src/zarr/buffer.py
index a30fcfdf38..8e6a40a14b 100644
--- a/src/zarr/buffer.py
+++ b/src/zarr/buffer.py
@@ -102,17 +102,39 @@ def create_zero_length(cls) -> Self:
         return cls(np.array([], dtype="b"))
 
     @classmethod
-    def from_numpy_array(cls, array_like: np.ArrayLike) -> Self:
-        return cls(np.asarray(array_like).reshape(-1).view(dtype="b"))
+    def from_ndarray_like(cls, ndarray_like: NDArrayLike) -> Self:
+        return cls(ndarray_like)
 
     @classmethod
     def from_bytes(cls, data: BytesLike) -> Self:
-        return cls.from_numpy_array(np.frombuffer(data, dtype="b"))
+        return cls.from_ndarray_like(np.frombuffer(data, dtype="b"))
+
+    def as_ndarray_like(self) -> NDArrayLike:
+        """Return the underlying array that represents the memory of this buffer
+
+        This will never copy data.
+
+        Return
+        ------
+            The underlying 1d array such as a NumPy or CuPy array.
+        """
+        return self._data
 
     def as_nd_buffer(self, *, dtype: np.DTypeLike) -> NDBuffer:
         return NDBuffer(self._data.view(dtype=dtype))
 
     def to_bytes(self) -> bytes:
+        """Return the buffer as `bytes` (host memory).
+
+        Warning
+        -------
+        Will always copy data, only use this method for small buffers such
+        as metadata. If possible, use `.as_ndarray_like()` instead.
+
+        Return
+        ------
+            `bytes` of this buffer (data copy)
+        """
         return bytes(self.memoryview())
 
     def memoryview(self) -> memoryview:
@@ -130,12 +152,17 @@ def __len__(self) -> int:
         return self._data.size
 
     def __add__(self, other: Buffer) -> Self:
-        assert other._data.dtype == np.dtype("b")
-        return self.__class__(np.concatenate((self._data, other._data)))
+        other_array = other.as_ndarray_like()
+        assert other_array.dtype == np.dtype("b")
+        return self.__class__(np.concatenate((self._data, other_array)))
 
     def __eq__(self, other: Any) -> bool:
         if isinstance(other, (bytes, bytearray)):
-            return self.to_bytes() == other
+            # Many of the tests compares `Buffer` with `bytes` so we
+            # convert the bytes to a Buffer and try again
+            return self == self.from_bytes(other)
+        if isinstance(other, Buffer):
+            return (self._data == other.as_ndarray_like()).all()
         raise ValueError(
             f"equal operator not supported between {self.__class__} and {other.__class__}"
         )
@@ -144,7 +171,7 @@ def __eq__(self, other: Any) -> bool:
 class NDBuffer:
     """A n-dimensional memory block
 
-    We use `NDBuffer` throughout Zarr to represent a block of memory.
+    We use `NDBuffer` throughout Zarr to represent a n-dimensional memory block.
     For now, we only support host memory but the plan is to support other types
     of memory such as CUDA device memory.
     """
@@ -177,7 +204,7 @@ def from_ndarray_like(cls, ndarray_like: NDArrayLike) -> Self:
         return cls(ndarray_like)
 
     def as_ndarray_like(self) -> NDArrayLike:
-        """Return the underlying array instance representing the memory of this buffer
+        """Return the underlying array that represents the memory of this buffer
 
         This will never copy data.
 
@@ -195,7 +222,8 @@ def as_numpy_array(self) -> np.ndarray:
 
         Warning
         -------
-        Might have to copy data, only use this method for small buffers such as metadata
+        Might have to copy data, only use this method for small buffers such
+        as metadata. If possible, use `.as_ndarray_like()` instead.
 
         Return
         ------

From 2421c5e7fbb555b0f27ad53a528fff3365d78a17 Mon Sep 17 00:00:00 2001
From: "Mads R. B. Kristensen" <madsbk@gmail.com>
Date: Tue, 14 May 2024 17:09:54 +0200
Subject: [PATCH 37/45] Buffer.as_numpy_array

---
 src/zarr/buffer.py         | 35 ++++++++++++++++++++++-------------
 src/zarr/codecs/blosc.py   |  2 +-
 src/zarr/codecs/crc32c_.py |  7 ++++---
 src/zarr/store/local.py    |  4 ++--
 4 files changed, 29 insertions(+), 19 deletions(-)

diff --git a/src/zarr/buffer.py b/src/zarr/buffer.py
index 8e6a40a14b..45fe27b71e 100644
--- a/src/zarr/buffer.py
+++ b/src/zarr/buffer.py
@@ -109,8 +109,11 @@ def from_ndarray_like(cls, ndarray_like: NDArrayLike) -> Self:
     def from_bytes(cls, data: BytesLike) -> Self:
         return cls.from_ndarray_like(np.frombuffer(data, dtype="b"))
 
+    def as_nd_buffer(self, *, dtype: np.DTypeLike) -> NDBuffer:
+        return NDBuffer(self._data.view(dtype=dtype))
+
     def as_ndarray_like(self) -> NDArrayLike:
-        """Return the underlying array that represents the memory of this buffer
+        """Return the underlying array (host or device memory) of this buffer
 
         This will never copy data.
 
@@ -120,25 +123,32 @@ def as_ndarray_like(self) -> NDArrayLike:
         """
         return self._data
 
-    def as_nd_buffer(self, *, dtype: np.DTypeLike) -> NDBuffer:
-        return NDBuffer(self._data.view(dtype=dtype))
+    def as_numpy_array(self) -> np.ndarray:
+        """Return the buffer as a NumPy array (host memory).
+
+        Warning
+        -------
+        Might have to copy data, consider using `.as_ndarray_like()` instead.
+
+        Return
+        ------
+            NumPy array of this buffer (might be a data copy)
+        """
+        return self._data
 
     def to_bytes(self) -> bytes:
         """Return the buffer as `bytes` (host memory).
 
         Warning
         -------
-        Will always copy data, only use this method for small buffers such
-        as metadata. If possible, use `.as_ndarray_like()` instead.
+        Will always copy data, only use this method for small buffers such as meta-
+        data. If possible, use `.as_numpy_array()` or `.as_ndarray_like()` instead.
 
         Return
         ------
             `bytes` of this buffer (data copy)
         """
-        return bytes(self.memoryview())
-
-    def memoryview(self) -> memoryview:
-        return memoryview(self._data)
+        return bytes(self.as_numpy_array())
 
     def __getitem__(self, key: slice) -> Self:
         check_item_key_is_1d_contiguous(key)
@@ -204,7 +214,7 @@ def from_ndarray_like(cls, ndarray_like: NDArrayLike) -> Self:
         return cls(ndarray_like)
 
     def as_ndarray_like(self) -> NDArrayLike:
-        """Return the underlying array that represents the memory of this buffer
+        """Return the underlying array (host or device memory) of this buffer
 
         This will never copy data.
 
@@ -218,12 +228,11 @@ def as_buffer(self) -> Buffer:
         return Buffer(self._data.reshape(-1).view(dtype="b"))
 
     def as_numpy_array(self) -> np.ndarray:
-        """Return the buffer as a NumPy array.
+        """Return the buffer as a NumPy array (host memory).
 
         Warning
         -------
-        Might have to copy data, only use this method for small buffers such
-        as metadata. If possible, use `.as_ndarray_like()` instead.
+        Might have to copy data, consider using `.as_ndarray_like()` instead.
 
         Return
         ------
diff --git a/src/zarr/codecs/blosc.py b/src/zarr/codecs/blosc.py
index ff52dba061..e7b374ab1c 100644
--- a/src/zarr/codecs/blosc.py
+++ b/src/zarr/codecs/blosc.py
@@ -173,7 +173,7 @@ async def encode(
         # Since blosc only takes bytes, we convert the input and output of the encoding
         # between bytes and Buffer
         return await to_thread(
-            lambda chunk: Buffer.from_bytes(self._blosc_codec.encode(chunk.memoryview())),
+            lambda chunk: Buffer.from_bytes(self._blosc_codec.encode(chunk.as_ndarray_like())),
             chunk_bytes,
         )
 
diff --git a/src/zarr/codecs/crc32c_.py b/src/zarr/codecs/crc32c_.py
index ac389c1406..07c38c30df 100644
--- a/src/zarr/codecs/crc32c_.py
+++ b/src/zarr/codecs/crc32c_.py
@@ -35,7 +35,7 @@ async def decode(
         chunk_bytes: Buffer,
         _chunk_spec: ArraySpec,
     ) -> Buffer:
-        data = chunk_bytes.memoryview()
+        data = chunk_bytes.to_bytes()
         crc32_bytes = data[-4:]
         inner_bytes = data[:-4]
 
@@ -53,8 +53,9 @@ async def encode(
         chunk_bytes: Buffer,
         _chunk_spec: ArraySpec,
     ) -> Optional[Buffer]:
-        checksum = crc32c(chunk_bytes.memoryview())
-        return Buffer.from_bytes(chunk_bytes.to_bytes() + np.uint32(checksum).tobytes())
+        data = chunk_bytes.to_bytes()
+        checksum = crc32c(data)
+        return Buffer.from_bytes(data + np.uint32(checksum).tobytes())
 
     def compute_encoded_size(self, input_byte_length: int, _chunk_spec: ArraySpec) -> int:
         return input_byte_length + 4
diff --git a/src/zarr/store/local.py b/src/zarr/store/local.py
index 9b1dbab371..738be6dc59 100644
--- a/src/zarr/store/local.py
+++ b/src/zarr/store/local.py
@@ -58,10 +58,10 @@ def _put(
     if start is not None:
         with path.open("r+b") as f:
             f.seek(start)
-            f.write(value.memoryview())
+            f.write(value.as_numpy_array())
         return None
     else:
-        return path.write_bytes(value.memoryview())
+        return path.write_bytes(value.as_numpy_array())
 
 
 class LocalStore(Store):

From 227c0d9179448b8ccf724ad9e8a885e88868f86a Mon Sep 17 00:00:00 2001
From: "Mads R. B. Kristensen" <madsbk@gmail.com>
Date: Tue, 14 May 2024 17:24:28 +0200
Subject: [PATCH 38/45] crc32c: use as_numpy_array

---
 src/zarr/codecs/crc32c_.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/zarr/codecs/crc32c_.py b/src/zarr/codecs/crc32c_.py
index 07c38c30df..a20dde3770 100644
--- a/src/zarr/codecs/crc32c_.py
+++ b/src/zarr/codecs/crc32c_.py
@@ -35,7 +35,7 @@ async def decode(
         chunk_bytes: Buffer,
         _chunk_spec: ArraySpec,
     ) -> Buffer:
-        data = chunk_bytes.to_bytes()
+        data = chunk_bytes.as_numpy_array()
         crc32_bytes = data[-4:]
         inner_bytes = data[:-4]
 
@@ -46,16 +46,18 @@ async def decode(
                 "Stored and computed checksum do not match. "
                 + f"Stored: {stored_checksum!r}. Computed: {computed_checksum!r}."
             )
-        return Buffer.from_bytes(inner_bytes)
+        return Buffer.from_ndarray_like(inner_bytes)
 
     async def encode(
         self,
         chunk_bytes: Buffer,
         _chunk_spec: ArraySpec,
     ) -> Optional[Buffer]:
-        data = chunk_bytes.to_bytes()
-        checksum = crc32c(data)
-        return Buffer.from_bytes(data + np.uint32(checksum).tobytes())
+        data = chunk_bytes.as_numpy_array()
+        # Calculate the checksum and "cast" it to a numpy array
+        checksum = np.array([crc32c(data)], dtype=np.uint32)
+        # Append the checksum (as bytes) to the data
+        return Buffer.from_ndarray_like(np.append(data, checksum.view("b")))
 
     def compute_encoded_size(self, input_byte_length: int, _chunk_spec: ArraySpec) -> int:
         return input_byte_length + 4

From c1c218537d3347395d8863e2e03035a7afe8799f Mon Sep 17 00:00:00 2001
From: "Mads R. B. Kristensen" <madsbk@gmail.com>
Date: Tue, 14 May 2024 17:45:49 +0200
Subject: [PATCH 39/45] as_numpy_array_wrapper

---
 src/zarr/buffer.py       | 28 ++++++++++++++++++++++++----
 src/zarr/codecs/blosc.py |  4 ++--
 src/zarr/codecs/gzip.py  |  6 +++---
 src/zarr/codecs/zstd.py  |  6 +++---
 4 files changed, 32 insertions(+), 12 deletions(-)

diff --git a/src/zarr/buffer.py b/src/zarr/buffer.py
index 45fe27b71e..fe7a828c85 100644
--- a/src/zarr/buffer.py
+++ b/src/zarr/buffer.py
@@ -141,8 +141,8 @@ def to_bytes(self) -> bytes:
 
         Warning
         -------
-        Will always copy data, only use this method for small buffers such as meta-
-        data. If possible, use `.as_numpy_array()` or `.as_ndarray_like()` instead.
+        Will always copy data, only use this method for small buffers such as metadata
+        buffers. If possible, use `.as_numpy_array()` or `.as_ndarray_like()` instead.
 
         Return
         ------
@@ -289,5 +289,25 @@ def transpose(self, *axes: np.SupportsIndex) -> Self:
         return self.__class__(self._data.transpose(*axes))
 
 
-def as_bytes_wrapper(func: Callable[[bytes], bytes], buf: Buffer) -> Buffer:
-    return Buffer.from_bytes(func(buf.to_bytes()))
+def as_numpy_array_wrapper(func: Callable[[np.ndarray], bytes], buf: Buffer) -> Buffer:
+    """Converts the input of `func` to a numpy array and the output back to `Buffer`.
+
+    This function is useful when calling a `func` that only support host memory such
+    as `GZip.decode` and `Blosc.decode`. In this case, use this wrapper to convert
+    the input `buf` to a Numpy array and convert the result back into a `Buffer`.
+
+    Parameters
+    ----------
+    func
+        The callable that will be called with the converted `buf` as input.
+        `func` must return bytes, which will be converted into a `Buffer`
+        before returned.
+    buf
+        The buffer that will be converted to a Numpy array before given as
+        input to `func`.
+
+    Return
+    ------
+        The result of `func` converted to a `Buffer`
+    """
+    return Buffer.from_bytes(func(buf.as_numpy_array()))
diff --git a/src/zarr/codecs/blosc.py b/src/zarr/codecs/blosc.py
index e7b374ab1c..7334139acb 100644
--- a/src/zarr/codecs/blosc.py
+++ b/src/zarr/codecs/blosc.py
@@ -9,7 +9,7 @@
 from numcodecs.blosc import Blosc
 
 from zarr.abc.codec import BytesBytesCodec
-from zarr.buffer import Buffer, as_bytes_wrapper
+from zarr.buffer import Buffer, as_numpy_array_wrapper
 from zarr.codecs.registry import register_codec
 from zarr.common import parse_enum, parse_named_configuration, to_thread
 
@@ -163,7 +163,7 @@ async def decode(
         chunk_bytes: Buffer,
         _chunk_spec: ArraySpec,
     ) -> Buffer:
-        return await to_thread(as_bytes_wrapper, self._blosc_codec.decode, chunk_bytes)
+        return await to_thread(as_numpy_array_wrapper, self._blosc_codec.decode, chunk_bytes)
 
     async def encode(
         self,
diff --git a/src/zarr/codecs/gzip.py b/src/zarr/codecs/gzip.py
index cf36e8679d..a8d7f815aa 100644
--- a/src/zarr/codecs/gzip.py
+++ b/src/zarr/codecs/gzip.py
@@ -5,7 +5,7 @@
 
 from numcodecs.gzip import GZip
 from zarr.abc.codec import BytesBytesCodec
-from zarr.buffer import Buffer, as_bytes_wrapper
+from zarr.buffer import Buffer, as_numpy_array_wrapper
 from zarr.codecs.registry import register_codec
 from zarr.common import parse_named_configuration, to_thread
 
@@ -49,14 +49,14 @@ async def decode(
         chunk_bytes: Buffer,
         _chunk_spec: ArraySpec,
     ) -> Buffer:
-        return await to_thread(as_bytes_wrapper, GZip(self.level).decode, chunk_bytes)
+        return await to_thread(as_numpy_array_wrapper, GZip(self.level).decode, chunk_bytes)
 
     async def encode(
         self,
         chunk_bytes: Buffer,
         _chunk_spec: ArraySpec,
     ) -> Optional[Buffer]:
-        return await to_thread(as_bytes_wrapper, GZip(self.level).encode, chunk_bytes)
+        return await to_thread(as_numpy_array_wrapper, GZip(self.level).encode, chunk_bytes)
 
     def compute_encoded_size(
         self,
diff --git a/src/zarr/codecs/zstd.py b/src/zarr/codecs/zstd.py
index 2f5ee8868b..0cc99a0368 100644
--- a/src/zarr/codecs/zstd.py
+++ b/src/zarr/codecs/zstd.py
@@ -6,7 +6,7 @@
 from zstandard import ZstdCompressor, ZstdDecompressor
 
 from zarr.abc.codec import BytesBytesCodec
-from zarr.buffer import Buffer, as_bytes_wrapper
+from zarr.buffer import Buffer, as_numpy_array_wrapper
 from zarr.codecs.registry import register_codec
 from zarr.common import parse_named_configuration, to_thread
 
@@ -65,14 +65,14 @@ async def decode(
         chunk_bytes: Buffer,
         _chunk_spec: ArraySpec,
     ) -> Buffer:
-        return await to_thread(as_bytes_wrapper, self._decompress, chunk_bytes)
+        return await to_thread(as_numpy_array_wrapper, self._decompress, chunk_bytes)
 
     async def encode(
         self,
         chunk_bytes: Buffer,
         _chunk_spec: ArraySpec,
     ) -> Optional[Buffer]:
-        return await to_thread(as_bytes_wrapper, self._compress, chunk_bytes)
+        return await to_thread(as_numpy_array_wrapper, self._compress, chunk_bytes)
 
     def compute_encoded_size(self, _input_byte_length: int, _chunk_spec: ArraySpec) -> int:
         raise NotImplementedError

From 275cd6c2c3621f5f8c3430cb2634fc240e0d5405 Mon Sep 17 00:00:00 2001
From: "Mads R. B. Kristensen" <madsbk@gmail.com>
Date: Tue, 14 May 2024 17:57:56 +0200
Subject: [PATCH 40/45] fix import

---
 src/zarr/buffer.py      | 13 ++-----------
 tests/v3/test_buffer.py |  6 +++++-
 2 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/src/zarr/buffer.py b/src/zarr/buffer.py
index fe7a828c85..60a92feba7 100644
--- a/src/zarr/buffer.py
+++ b/src/zarr/buffer.py
@@ -1,17 +1,8 @@
 from __future__ import annotations
 
 import sys
-from typing import (
-    TYPE_CHECKING,
-    Any,
-    Callable,
-    Iterable,
-    Literal,
-    Optional,
-    Protocol,
-    Tuple,
-    TypeAlias,
-)
+from typing import (TYPE_CHECKING, Any, Callable, Iterable, Literal, Optional,
+                    Protocol, Tuple, TypeAlias)
 
 import numpy as np
 
diff --git a/tests/v3/test_buffer.py b/tests/v3/test_buffer.py
index edb3e40a35..c93d29f452 100644
--- a/tests/v3/test_buffer.py
+++ b/tests/v3/test_buffer.py
@@ -1,4 +1,5 @@
-from typing import Any, Iterable, Literal, Optional, Self
+from typing import TYPE_CHECKING, Any, Iterable, Literal, Optional
+
 import numpy as np
 import numpy.typing as npt
 import pytest
@@ -8,6 +9,9 @@
 from zarr.store.core import StorePath
 from zarr.store.memory import MemoryStore
 
+if TYPE_CHECKING:
+    from typing_extensions import Self
+
 
 class MyNDArrayLike(np.ndarray):
     """An example of a ndarray-like class"""

From 91809e546350cca23497b60868fc03844156dd39 Mon Sep 17 00:00:00 2001
From: "Mads R. B. Kristensen" <madsbk@gmail.com>
Date: Tue, 14 May 2024 18:00:27 +0200
Subject: [PATCH 41/45] use from __future__ import annotations

---
 src/zarr/buffer.py      | 13 +++++++++++--
 tests/v3/test_buffer.py |  2 ++
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/src/zarr/buffer.py b/src/zarr/buffer.py
index 60a92feba7..fe7a828c85 100644
--- a/src/zarr/buffer.py
+++ b/src/zarr/buffer.py
@@ -1,8 +1,17 @@
 from __future__ import annotations
 
 import sys
-from typing import (TYPE_CHECKING, Any, Callable, Iterable, Literal, Optional,
-                    Protocol, Tuple, TypeAlias)
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Callable,
+    Iterable,
+    Literal,
+    Optional,
+    Protocol,
+    Tuple,
+    TypeAlias,
+)
 
 import numpy as np
 
diff --git a/tests/v3/test_buffer.py b/tests/v3/test_buffer.py
index c93d29f452..a56c768782 100644
--- a/tests/v3/test_buffer.py
+++ b/tests/v3/test_buffer.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from typing import TYPE_CHECKING, Any, Iterable, Literal, Optional
 
 import numpy as np

From b5eec5ddb88ee9a0b07db28da8b6a50b7db06aaa Mon Sep 17 00:00:00 2001
From: "Mads R. B. Kristensen" <madsbk@gmail.com>
Date: Wed, 15 May 2024 09:58:28 +0200
Subject: [PATCH 42/45] doc and clean up

---
 src/zarr/buffer.py | 177 +++++++++++++++++++++++++++++++++++++++------
 1 file changed, 155 insertions(+), 22 deletions(-)

diff --git a/src/zarr/buffer.py b/src/zarr/buffer.py
index fe7a828c85..e6b75886c7 100644
--- a/src/zarr/buffer.py
+++ b/src/zarr/buffer.py
@@ -25,6 +25,7 @@
 
 
 def check_item_key_is_1d_contiguous(key: Any) -> None:
+    """Raises error if `key` isn't a 1d contiguous slice"""
     if not isinstance(key, slice):
         raise TypeError(
             f"Item key has incorrect type (expected slice, got {key.__class__.__name__})"
@@ -86,31 +87,70 @@ def __call__(self, ndarray_like: NDArrayLike) -> NDBuffer:
 class Buffer:
     """A flat contiguous memory block
 
-    We use `Buffer` throughout Zarr to represent a contiguous block of memory.
-    For now, we only support host memory but the plan is to support other types
-    of memory such as CUDA device memory.
+    We use Buffer throughout Zarr to represent a contiguous block of memory.
+
+    A Buffer is backed by a underlying ndarray-like instance that represents
+    the memory. The memory type is unspecified; can be regular host memory,
+    CUDA device memory, or something else. The only requirement is that the
+    ndarray-like instance can be copied/converted to a regular Numpy array
+    (host memory).
+
+    Note
+    ----
+    This buffer is untyped, so all indexing and sizes are in bytes.
+
+    Parameters
+    ----------
+    ndarray_like
+        ndarray-like object that must be 1-dim, contiguous, and byte dtype.
     """
 
-    def __init__(self, array: NDArrayLike):
-        assert array.ndim == 1
-        assert array.itemsize == 1
-        assert array.dtype == np.dtype("b")
-        self._data = array
+    def __init__(self, ndarray_like: NDArrayLike):
+        if ndarray_like.ndim != 1:
+            raise ValueError("ndarray_like: only 1-dim allowed")
+        if ndarray_like.dtype != np.dtype("b"):
+            raise ValueError("ndarray_like: only byte dtype allowed")
+        self._data = ndarray_like
 
     @classmethod
     def create_zero_length(cls) -> Self:
+        """Create an empty buffer with length zero
+
+        Return
+        ------
+            New empty 0-length buffer
+        """
         return cls(np.array([], dtype="b"))
 
     @classmethod
     def from_ndarray_like(cls, ndarray_like: NDArrayLike) -> Self:
+        """Create a new buffer of a ndarray-like object
+
+        Parameters
+        ----------
+        ndarray_like
+            ndarray-like object that must be 1-dim, contiguous, and byte dtype.
+
+        Return
+        ------
+            New buffer representing `ndarray_like`
+        """
         return cls(ndarray_like)
 
     @classmethod
-    def from_bytes(cls, data: BytesLike) -> Self:
-        return cls.from_ndarray_like(np.frombuffer(data, dtype="b"))
+    def from_bytes(cls, bytes_like: BytesLike) -> Self:
+        """Create a new buffer of a bytes-like object (host memory)
 
-    def as_nd_buffer(self, *, dtype: np.DTypeLike) -> NDBuffer:
-        return NDBuffer(self._data.view(dtype=dtype))
+        Parameters
+        ----------
+        bytes_like
+           bytes-like object
+
+        Return
+        ------
+            New buffer representing `bytes_like`
+        """
+        return cls.from_ndarray_like(np.frombuffer(bytes_like, dtype="b"))
 
     def as_ndarray_like(self) -> NDArrayLike:
         """Return the underlying array (host or device memory) of this buffer
@@ -123,6 +163,22 @@ def as_ndarray_like(self) -> NDArrayLike:
         """
         return self._data
 
+    def as_nd_buffer(self, *, dtype: np.DTypeLike) -> NDBuffer:
+        """Create a new NDBuffer from this one.
+
+        This will never copy data.
+
+        Parameters
+        ----------
+        dtype
+           The datatype of the returned buffer (reinterpretation of the bytes)
+
+        Return
+        ------
+            New NDbuffer representing `self.as_ndarray_like()`
+        """
+        return NDBuffer.from_ndarray_like(self._data.view(dtype=dtype))
+
     def as_numpy_array(self) -> np.ndarray:
         """Return the buffer as a NumPy array (host memory).
 
@@ -134,7 +190,7 @@ def as_numpy_array(self) -> np.ndarray:
         ------
             NumPy array of this buffer (might be a data copy)
         """
-        return self._data
+        return np.asanyarray(self._data)
 
     def to_bytes(self) -> bytes:
         """Return the buffer as `bytes` (host memory).
@@ -162,6 +218,8 @@ def __len__(self) -> int:
         return self._data.size
 
     def __add__(self, other: Buffer) -> Self:
+        """Concatenate two buffers"""
+
         other_array = other.as_ndarray_like()
         assert other_array.dtype == np.dtype("b")
         return self.__class__(np.concatenate((self._data, other_array)))
@@ -181,9 +239,26 @@ def __eq__(self, other: Any) -> bool:
 class NDBuffer:
     """A n-dimensional memory block
 
-    We use `NDBuffer` throughout Zarr to represent a n-dimensional memory block.
-    For now, we only support host memory but the plan is to support other types
-    of memory such as CUDA device memory.
+    We use NDBuffer throughout Zarr to represent a n-dimensional memory block.
+
+    A NDBuffer is backed by a underlying ndarray-like instance that represents
+    the memory. The memory type is unspecified; can be regular host memory,
+    CUDA device memory, or something else. The only requirement is that the
+    ndarray-like instance can be copied/converted to a regular Numpy array
+    (host memory).
+
+    Note
+    ----
+    The two buffer classes Buffer and NDBuffer are very similar. In fact,
+    Buffer is a special case of NDBuffer where dim=1, stride=1, and dtype="b".
+    However, in order to use the Python's type system to differentiate between
+    the flat contiguous Buffer and the n-dim (non-contiguous) NDBuffer, we keep
+    the definition of the two classes separate.
+
+    Parameters
+    ----------
+    ndarray_like
+        ndarray-like object that is convertible to a regular Numpy array.
     """
 
     def __init__(self, array: NDArrayLike):
@@ -200,19 +275,64 @@ def create(
         order: Literal["C", "F"] = "C",
         fill_value: Optional[Any] = None,
     ) -> Self:
+        """Create a new buffer and its underlying ndarray-like object
+
+        Parameters
+        ----------
+        shape
+            The shape of the buffer and its underlying ndarray-like object
+        dtype
+            The datatype of the buffer and its underlying ndarray-like object
+        order
+            Whether to store multi-dimensional data in row-major (C-style) or
+            column-major (Fortran-style) order in memory.
+        fill_value
+            If not None, fill the new buffer with a scalar value.
+
+        Return
+        ------
+            New buffer representing a new ndarray_like object
+
+        Developer Notes
+        ---------------
+        A subclass can overwrite this method to create a ndarray-like object
+        other then the default Numpy array.
+        """
         ret = cls(np.empty(shape=shape, dtype=dtype, order=order))
         if fill_value is not None:
             ret.fill(fill_value)
         return ret
 
-    @classmethod
-    def from_numpy_array(cls, array_like: np.ArrayLike) -> Self:
-        return cls(np.asanyarray(array_like))
-
     @classmethod
     def from_ndarray_like(cls, ndarray_like: NDArrayLike) -> Self:
+        """Create a new buffer of a ndarray-like object
+
+        Parameters
+        ----------
+        ndarray_like
+            ndarray-like object
+
+        Return
+        ------
+            New buffer representing `ndarray_like`
+        """
         return cls(ndarray_like)
 
+    @classmethod
+    def from_numpy_array(cls, array_like: np.ArrayLike) -> Self:
+        """Create a new buffer of Numpy array-like object
+
+        Parameters
+        ----------
+        array_like
+            Object that can be coerced into a Numpy array
+
+        Return
+        ------
+            New buffer representing `array_like`
+        """
+        return cls.from_ndarray_like(np.asanyarray(array_like))
+
     def as_ndarray_like(self) -> NDArrayLike:
         """Return the underlying array (host or device memory) of this buffer
 
@@ -225,7 +345,20 @@ def as_ndarray_like(self) -> NDArrayLike:
         return self._data
 
     def as_buffer(self) -> Buffer:
-        return Buffer(self._data.reshape(-1).view(dtype="b"))
+        """Create a new Buffer from this one.
+
+        Warning
+        -------
+        Copies data if the buffer is non-contiguous.
+
+        Return
+        ------
+            The new buffer (might be data copy)
+        """
+        data = self._data
+        if not self._data.flags.contiguous:
+            data = np.ascontiguousarray(self._data)
+        return Buffer(data.reshape(-1).view(dtype="b"))  # Flatten the array without copy
 
     def as_numpy_array(self) -> np.ndarray:
         """Return the buffer as a NumPy array (host memory).
@@ -238,7 +371,7 @@ def as_numpy_array(self) -> np.ndarray:
         ------
             NumPy array of this buffer (might be a data copy)
         """
-        return self._data
+        return np.asanyarray(self._data)
 
     @property
     def dtype(self) -> np.dtype[Any]:

From 197b9b086f5cfa1dd1bdc98b2b19f4adb0b386ba Mon Sep 17 00:00:00 2001
From: "Mads R. B. Kristensen" <madsbk@gmail.com>
Date: Wed, 15 May 2024 11:26:51 +0200
Subject: [PATCH 43/45] doc

---
 src/zarr/buffer.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/zarr/buffer.py b/src/zarr/buffer.py
index e6b75886c7..615bc4d579 100644
--- a/src/zarr/buffer.py
+++ b/src/zarr/buffer.py
@@ -249,11 +249,11 @@ class NDBuffer:
 
     Note
     ----
-    The two buffer classes Buffer and NDBuffer are very similar. In fact,
-    Buffer is a special case of NDBuffer where dim=1, stride=1, and dtype="b".
-    However, in order to use the Python's type system to differentiate between
-    the flat contiguous Buffer and the n-dim (non-contiguous) NDBuffer, we keep
-    the definition of the two classes separate.
+    The two buffer classes Buffer and NDBuffer are very similar. In fact, Buffer
+    is a special case of NDBuffer where dim=1, stride=1, and dtype="b". However,
+    in order to use Python's type system to differentiate between the contiguous
+    Buffer and the n-dim (non-contiguous) NDBuffer, we keep the definition of the
+    two classes separate.
 
     Parameters
     ----------

From b5f87f1a8d1505dbe893e477d9ce85ff744aec52 Mon Sep 17 00:00:00 2001
From: "Mads R. B. Kristensen" <madsbk@gmail.com>
Date: Wed, 15 May 2024 12:22:06 +0200
Subject: [PATCH 44/45] Apply suggestions from code review

Co-authored-by: Norman Rzepka <code@normanrz.com>
---
 src/zarr/buffer.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/zarr/buffer.py b/src/zarr/buffer.py
index 615bc4d579..6ab9b454e0 100644
--- a/src/zarr/buffer.py
+++ b/src/zarr/buffer.py
@@ -46,7 +46,7 @@ def __call__(
         ) -> NDBuffer:
             """Factory function to create a new NDBuffer (or subclass)
 
-            Callables implementing the `Factor.Create` protocol must create a new
+            Callables implementing the `Factory.Create` protocol must create a new
             instance of NDBuffer (or subclass) given the following parameters.
 
             Parameters
@@ -70,7 +70,7 @@ class NDArrayLike(Protocol):
         def __call__(self, ndarray_like: NDArrayLike) -> NDBuffer:
             """Factory function to coerce an array into a NDBuffer (or subclass)
 
-            Callables implementing the `Factor.NDArrayLike` protocol must return
+            Callables implementing the `Factory.NDArrayLike` protocol must return
             an instance of NDBuffer (or subclass) given an ndarray-like object.
 
             Parameters

From 3854becbdffab97bb7eb7acf3580b0d4a4648761 Mon Sep 17 00:00:00 2001
From: "Mads R. B. Kristensen" <madsbk@gmail.com>
Date: Wed, 15 May 2024 14:40:50 +0200
Subject: [PATCH 45/45] Buffer is now backed by ArrayLike

---
 src/zarr/buffer.py         | 50 ++++++++++++++++++++------------------
 src/zarr/codecs/blosc.py   |  2 +-
 src/zarr/codecs/crc32c_.py |  4 +--
 3 files changed, 29 insertions(+), 27 deletions(-)

diff --git a/src/zarr/buffer.py b/src/zarr/buffer.py
index 6ab9b454e0..a633cc09ec 100644
--- a/src/zarr/buffer.py
+++ b/src/zarr/buffer.py
@@ -20,7 +20,9 @@
     from zarr.codecs.bytes import Endian
     from zarr.common import BytesLike
 
-# TODO: create a protocol for the attributes we need, for now we just aliasing numpy
+# TODO: create a protocol for the attributes we need, for now we alias Numpy's ndarray
+#       both for the array-like and ndarray-like
+ArrayLike: TypeAlias = np.ndarray
 NDArrayLike: TypeAlias = np.ndarray
 
 
@@ -89,10 +91,10 @@ class Buffer:
 
     We use Buffer throughout Zarr to represent a contiguous block of memory.
 
-    A Buffer is backed by a underlying ndarray-like instance that represents
+    A Buffer is backed by a underlying array-like instance that represents
     the memory. The memory type is unspecified; can be regular host memory,
     CUDA device memory, or something else. The only requirement is that the
-    ndarray-like instance can be copied/converted to a regular Numpy array
+    array-like instance can be copied/converted to a regular Numpy array
     (host memory).
 
     Note
@@ -101,16 +103,16 @@ class Buffer:
 
     Parameters
     ----------
-    ndarray_like
-        ndarray-like object that must be 1-dim, contiguous, and byte dtype.
+    array_like
+        array-like object that must be 1-dim, contiguous, and byte dtype.
     """
 
-    def __init__(self, ndarray_like: NDArrayLike):
-        if ndarray_like.ndim != 1:
-            raise ValueError("ndarray_like: only 1-dim allowed")
-        if ndarray_like.dtype != np.dtype("b"):
-            raise ValueError("ndarray_like: only byte dtype allowed")
-        self._data = ndarray_like
+    def __init__(self, array_like: ArrayLike):
+        if array_like.ndim != 1:
+            raise ValueError("array_like: only 1-dim allowed")
+        if array_like.dtype != np.dtype("b"):
+            raise ValueError("array_like: only byte dtype allowed")
+        self._data = array_like
 
     @classmethod
     def create_zero_length(cls) -> Self:
@@ -123,19 +125,19 @@ def create_zero_length(cls) -> Self:
         return cls(np.array([], dtype="b"))
 
     @classmethod
-    def from_ndarray_like(cls, ndarray_like: NDArrayLike) -> Self:
-        """Create a new buffer of a ndarray-like object
+    def from_array_like(cls, array_like: NDArrayLike) -> Self:
+        """Create a new buffer of a array-like object
 
         Parameters
         ----------
-        ndarray_like
-            ndarray-like object that must be 1-dim, contiguous, and byte dtype.
+        array_like
+            array-like object that must be 1-dim, contiguous, and byte dtype.
 
         Return
         ------
-            New buffer representing `ndarray_like`
+            New buffer representing `array_like`
         """
-        return cls(ndarray_like)
+        return cls(array_like)
 
     @classmethod
     def from_bytes(cls, bytes_like: BytesLike) -> Self:
@@ -150,9 +152,9 @@ def from_bytes(cls, bytes_like: BytesLike) -> Self:
         ------
             New buffer representing `bytes_like`
         """
-        return cls.from_ndarray_like(np.frombuffer(bytes_like, dtype="b"))
+        return cls.from_array_like(np.frombuffer(bytes_like, dtype="b"))
 
-    def as_ndarray_like(self) -> NDArrayLike:
+    def as_array_like(self) -> NDArrayLike:
         """Return the underlying array (host or device memory) of this buffer
 
         This will never copy data.
@@ -175,7 +177,7 @@ def as_nd_buffer(self, *, dtype: np.DTypeLike) -> NDBuffer:
 
         Return
         ------
-            New NDbuffer representing `self.as_ndarray_like()`
+            New NDbuffer representing `self.as_array_like()`
         """
         return NDBuffer.from_ndarray_like(self._data.view(dtype=dtype))
 
@@ -184,7 +186,7 @@ def as_numpy_array(self) -> np.ndarray:
 
         Warning
         -------
-        Might have to copy data, consider using `.as_ndarray_like()` instead.
+        Might have to copy data, consider using `.as_array_like()` instead.
 
         Return
         ------
@@ -198,7 +200,7 @@ def to_bytes(self) -> bytes:
         Warning
         -------
         Will always copy data, only use this method for small buffers such as metadata
-        buffers. If possible, use `.as_numpy_array()` or `.as_ndarray_like()` instead.
+        buffers. If possible, use `.as_numpy_array()` or `.as_array_like()` instead.
 
         Return
         ------
@@ -220,7 +222,7 @@ def __len__(self) -> int:
     def __add__(self, other: Buffer) -> Self:
         """Concatenate two buffers"""
 
-        other_array = other.as_ndarray_like()
+        other_array = other.as_array_like()
         assert other_array.dtype == np.dtype("b")
         return self.__class__(np.concatenate((self._data, other_array)))
 
@@ -230,7 +232,7 @@ def __eq__(self, other: Any) -> bool:
             # convert the bytes to a Buffer and try again
             return self == self.from_bytes(other)
         if isinstance(other, Buffer):
-            return (self._data == other.as_ndarray_like()).all()
+            return (self._data == other.as_array_like()).all()
         raise ValueError(
             f"equal operator not supported between {self.__class__} and {other.__class__}"
         )
diff --git a/src/zarr/codecs/blosc.py b/src/zarr/codecs/blosc.py
index 7334139acb..7e94575f9a 100644
--- a/src/zarr/codecs/blosc.py
+++ b/src/zarr/codecs/blosc.py
@@ -173,7 +173,7 @@ async def encode(
         # Since blosc only takes bytes, we convert the input and output of the encoding
         # between bytes and Buffer
         return await to_thread(
-            lambda chunk: Buffer.from_bytes(self._blosc_codec.encode(chunk.as_ndarray_like())),
+            lambda chunk: Buffer.from_bytes(self._blosc_codec.encode(chunk.as_array_like())),
             chunk_bytes,
         )
 
diff --git a/src/zarr/codecs/crc32c_.py b/src/zarr/codecs/crc32c_.py
index a20dde3770..1daf512e43 100644
--- a/src/zarr/codecs/crc32c_.py
+++ b/src/zarr/codecs/crc32c_.py
@@ -46,7 +46,7 @@ async def decode(
                 "Stored and computed checksum do not match. "
                 + f"Stored: {stored_checksum!r}. Computed: {computed_checksum!r}."
             )
-        return Buffer.from_ndarray_like(inner_bytes)
+        return Buffer.from_array_like(inner_bytes)
 
     async def encode(
         self,
@@ -57,7 +57,7 @@ async def encode(
         # Calculate the checksum and "cast" it to a numpy array
         checksum = np.array([crc32c(data)], dtype=np.uint32)
         # Append the checksum (as bytes) to the data
-        return Buffer.from_ndarray_like(np.append(data, checksum.view("b")))
+        return Buffer.from_array_like(np.append(data, checksum.view("b")))
 
     def compute_encoded_size(self, input_byte_length: int, _chunk_spec: ArraySpec) -> int:
         return input_byte_length + 4