Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[v3] First step to generalizes ndarray and bytes #1826

Merged
merged 53 commits into from
May 16, 2024
Merged
Show file tree
Hide file tree
Changes from 51 commits
Commits
Show all changes
53 commits
Select commit Hold shift + click to select a range
e642337
use Buffer
madsbk Apr 30, 2024
23a71a7
use memoryview as the underlying memory
madsbk Apr 30, 2024
eb6d097
use NDBuffer
madsbk May 1, 2024
389b41e
Merge branch 'v3' of github.com:zarr-developers/zarr-python into buffer
madsbk May 2, 2024
2982c9b
convert to Buffer for the v2 tests
madsbk May 2, 2024
45ad254
clean up
madsbk May 2, 2024
95ae12a
Merge branch 'v3' into buffer
madsbk May 6, 2024
d848bbe
spilling
madsbk May 6, 2024
71dcff1
remove return_as_bytes_wrapper
madsbk May 6, 2024
48edc4e
remove as_ndarray
madsbk May 6, 2024
5a83442
doc
madsbk May 6, 2024
e6d49f3
clean up
madsbk May 6, 2024
0c8b677
Merge branch 'v3' of github.com:zarr-developers/zarr-python into buffer
madsbk May 8, 2024
009ad29
as_buffer(): handle bytes like
madsbk May 8, 2024
ee0a69a
Merge branch 'v3' of github.com:zarr-developers/zarr-python into buffer
madsbk May 13, 2024
c189a4f
removed sync.py again
madsbk May 13, 2024
7cb9346
separate Buffer and NNBuffer
madsbk May 13, 2024
2ba8510
impl. NDBuffer.from_numpy_array()
madsbk May 13, 2024
fccd956
remove as_buffer()
madsbk May 13, 2024
962d729
remove Buffer.as_numpy_array()
madsbk May 13, 2024
12de6c2
impl. NDBuffer.as_buffer()
madsbk May 13, 2024
36a0d98
reduce the use of as_numpy_array()
madsbk May 13, 2024
43ebafe
impl. and use NDBuffer.all_equal
madsbk May 13, 2024
d01557e
as_numpy_array(): doc
madsbk May 13, 2024
c74f266
remove as_bytearray()
madsbk May 13, 2024
6fce5a9
impl. Buffer.from_numpy_array()
madsbk May 13, 2024
c37312b
NDArrayLike
madsbk May 13, 2024
925fa59
Factory.Create
madsbk May 13, 2024
1bbeefc
Factory.FromNumpy
madsbk May 13, 2024
1159567
doc
madsbk May 13, 2024
2a922d2
Merge branch 'v3' of github.com:zarr-developers/zarr-python into buffer
madsbk May 13, 2024
26d6708
doc
madsbk May 13, 2024
5ce21a0
remove the buffer factories again
madsbk May 13, 2024
be9dce3
NDBuffer.create(): take fill_value
madsbk May 14, 2024
749d0c5
Merge branch 'v3' of github.com:zarr-developers/zarr-python into buffer
madsbk May 14, 2024
57e3dd6
getitem and setitem now use factory
madsbk May 14, 2024
8bbe5c1
doc
madsbk May 14, 2024
1c64b79
test
madsbk May 14, 2024
cd7eb44
check_item_key_is_1d_contiguous
madsbk May 14, 2024
01fcec1
Buffer.create_zero_length()
madsbk May 14, 2024
65c9b90
Merge branch 'v3' of github.com:zarr-developers/zarr-python into buffer
madsbk May 14, 2024
9cc6edc
Buffer.__add__(): use concat
madsbk May 14, 2024
40a30f1
Buffer.as_ndarray_like
madsbk May 14, 2024
2421c5e
Buffer.as_numpy_array
madsbk May 14, 2024
227c0d9
crc32c: use as_numpy_array
madsbk May 14, 2024
c1c2185
as_numpy_array_wrapper
madsbk May 14, 2024
275cd6c
fix import
madsbk May 14, 2024
91809e5
use from __future__ import annotations
madsbk May 14, 2024
b5eec5d
doc and clean up
madsbk May 15, 2024
197b9b0
doc
madsbk May 15, 2024
b5f87f1
Apply suggestions from code review
madsbk May 15, 2024
3854bec
Buffer is now backed by ArrayLike
madsbk May 15, 2024
e780279
Merge branch 'v3' of github.com:zarr-developers/zarr-python into buffer
madsbk May 16, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 16 additions & 16 deletions src/zarr/abc/codec.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,16 @@
from abc import abstractmethod
from typing import TYPE_CHECKING, Optional

import numpy as np
from zarr.abc.metadata import Metadata

from zarr.buffer import Buffer, NDBuffer
from zarr.common import ArraySpec
from zarr.store import StorePath


if TYPE_CHECKING:
from typing_extensions import Self
from zarr.common import BytesLike, SliceSelection
from zarr.common import SliceSelection
from zarr.metadata import ArrayMetadata


Expand All @@ -37,35 +37,35 @@ class ArrayArrayCodec(Codec):
@abstractmethod
async def decode(
self,
chunk_array: np.ndarray,
chunk_array: NDBuffer,
chunk_spec: ArraySpec,
) -> np.ndarray:
) -> NDBuffer:
pass

@abstractmethod
async def encode(
self,
chunk_array: np.ndarray,
chunk_array: NDBuffer,
chunk_spec: ArraySpec,
) -> Optional[np.ndarray]:
) -> Optional[NDBuffer]:
pass


class ArrayBytesCodec(Codec):
@abstractmethod
async def decode(
self,
chunk_array: BytesLike,
chunk_array: Buffer,
chunk_spec: ArraySpec,
) -> np.ndarray:
) -> NDBuffer:
pass

@abstractmethod
async def encode(
self,
chunk_array: np.ndarray,
chunk_array: NDBuffer,
chunk_spec: ArraySpec,
) -> Optional[BytesLike]:
) -> Optional[Buffer]:
pass


Expand All @@ -76,7 +76,7 @@ async def decode_partial(
store_path: StorePath,
selection: SliceSelection,
chunk_spec: ArraySpec,
) -> Optional[np.ndarray]:
) -> Optional[NDBuffer]:
pass


Expand All @@ -85,7 +85,7 @@ class ArrayBytesCodecPartialEncodeMixin:
async def encode_partial(
self,
store_path: StorePath,
chunk_array: np.ndarray,
chunk_array: NDBuffer,
selection: SliceSelection,
chunk_spec: ArraySpec,
) -> None:
Expand All @@ -96,15 +96,15 @@ class BytesBytesCodec(Codec):
@abstractmethod
async def decode(
self,
chunk_array: BytesLike,
chunk_array: Buffer,
chunk_spec: ArraySpec,
) -> BytesLike:
) -> Buffer:
pass

@abstractmethod
async def encode(
self,
chunk_array: BytesLike,
chunk_array: Buffer,
chunk_spec: ArraySpec,
) -> Optional[BytesLike]:
) -> Optional[Buffer]:
pass
13 changes: 7 additions & 6 deletions src/zarr/abc/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@
from collections.abc import AsyncGenerator
from typing import List, Tuple, Optional

from zarr.buffer import Buffer


class Store(ABC):
@abstractmethod
async def get(
self, key: str, byte_range: Optional[Tuple[int, Optional[int]]] = None
) -> Optional[bytes]:
) -> Optional[Buffer]:
"""Retrieve the value associated with a given key.

Parameters
Expand All @@ -18,14 +20,14 @@ async def get(

Returns
-------
bytes
Buffer
"""
...

@abstractmethod
async def get_partial_values(
self, key_ranges: List[Tuple[str, Tuple[int, int]]]
) -> List[Optional[bytes]]:
) -> List[Optional[Buffer]]:
"""Retrieve possibly partial values from given key_ranges.

Parameters
Expand All @@ -35,8 +37,7 @@ async def get_partial_values(

Returns
-------
list[bytes]
list of values, in the order of the key_ranges, may contain null/none for missing keys
list of values, in the order of the key_ranges, may contain null/none for missing keys
"""
...

Expand All @@ -61,7 +62,7 @@ def supports_writes(self) -> bool:
...

@abstractmethod
async def set(self, key: str, value: bytes) -> None:
async def set(self, key: str, value: Buffer) -> None:
"""Store a (key, value) pair.

Parameters
Expand Down
62 changes: 33 additions & 29 deletions src/zarr/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@


# from zarr.array_v2 import ArrayV2
from zarr.buffer import Buffer, Factory, NDArrayLike, NDBuffer
from zarr.codecs import BytesCodec
from zarr.codecs.pipeline import CodecPipeline
from zarr.common import (
Expand Down Expand Up @@ -145,7 +146,7 @@ async def open(
assert zarr_json_bytes is not None
return cls.from_dict(
store_path,
json.loads(zarr_json_bytes),
json.loads(zarr_json_bytes.to_bytes()),
)

@classmethod
Expand All @@ -158,7 +159,7 @@ async def open_auto(
if v3_metadata_bytes is not None:
return cls.from_dict(
store_path,
json.loads(v3_metadata_bytes),
json.loads(v3_metadata_bytes.to_bytes()),
)
else:
raise ValueError("no v2 support yet")
Expand All @@ -184,7 +185,9 @@ def dtype(self) -> np.dtype[Any]:
def attrs(self) -> dict[str, Any]:
return self.metadata.attributes

async def getitem(self, selection: Selection) -> npt.NDArray[Any]:
async def getitem(
self, selection: Selection, *, factory: Factory.Create = NDBuffer.create
) -> NDArrayLike:
assert isinstance(self.metadata.chunk_grid, RegularChunkGrid)
indexer = BasicIndexer(
selection,
Expand All @@ -193,10 +196,8 @@ async def getitem(self, selection: Selection) -> npt.NDArray[Any]:
)

# setup output array
out = np.zeros(
indexer.shape,
dtype=self.metadata.dtype,
order=self.order,
out = factory(
shape=indexer.shape, dtype=self.metadata.dtype, order=self.order, fill_value=0
)

# reading chunks and decoding them
Expand All @@ -208,21 +209,17 @@ async def getitem(self, selection: Selection) -> npt.NDArray[Any]:
self._read_chunk,
config.get("async.concurrency"),
)

if out.shape:
return out
else:
return out[()]
return out.as_ndarray_like()

async def _save_metadata(self) -> None:
await (self.store_path / ZARR_JSON).set(self.metadata.to_bytes())
await (self.store_path / ZARR_JSON).set(Buffer.from_bytes(self.metadata.to_bytes()))

async def _read_chunk(
self,
chunk_coords: ChunkCoords,
chunk_selection: SliceSelection,
out_selection: SliceSelection,
out: npt.NDArray[Any],
out: NDBuffer,
) -> None:
chunk_spec = self.metadata.get_chunk_spec(chunk_coords, self.order)
chunk_key_encoding = self.metadata.chunk_key_encoding
Expand All @@ -244,7 +241,12 @@ async def _read_chunk(
else:
out[out_selection] = self.metadata.fill_value

async def setitem(self, selection: Selection, value: npt.NDArray[Any]) -> None:
async def setitem(
self,
selection: Selection,
value: NDArrayLike,
factory: Factory.NDArrayLike = NDBuffer.from_ndarray_like,
) -> None:
assert isinstance(self.metadata.chunk_grid, RegularChunkGrid)
chunk_shape = self.metadata.chunk_grid.chunk_shape
indexer = BasicIndexer(
Expand All @@ -257,15 +259,19 @@ async def setitem(self, selection: Selection, value: npt.NDArray[Any]) -> None:

# check value shape
if np.isscalar(value):
# setting a scalar value
pass
value = np.asanyarray(value)
else:
if not hasattr(value, "shape"):
value = np.asarray(value, self.metadata.dtype)
assert value.shape == sel_shape
if value.dtype.name != self.metadata.dtype.name:
value = value.astype(self.metadata.dtype, order="A")

# We accept any ndarray like object from the user and convert it
# to a NDBuffer (or subclass). From this point onwards, we only pass
# Buffer and NDBuffer between components.
value = factory(value)

# merging with existing data and encoding chunks
await concurrent_map(
[
Expand All @@ -284,7 +290,7 @@ async def setitem(self, selection: Selection, value: npt.NDArray[Any]) -> None:

async def _write_chunk(
self,
value: npt.NDArray[Any],
value: NDBuffer,
chunk_shape: ChunkCoords,
chunk_coords: ChunkCoords,
chunk_selection: SliceSelection,
Expand All @@ -298,11 +304,9 @@ async def _write_chunk(
if is_total_slice(chunk_selection, chunk_shape):
# write entire chunks
if np.isscalar(value):
chunk_array = np.empty(
chunk_shape,
dtype=self.metadata.dtype,
chunk_array = NDBuffer.create(
shape=chunk_shape, dtype=self.metadata.dtype, fill_value=value
)
chunk_array.fill(value)
else:
chunk_array = value[out_selection]
await self._write_chunk_to_store(store_path, chunk_array, chunk_spec)
Expand All @@ -322,11 +326,11 @@ async def _write_chunk(

# merge new value
if chunk_bytes is None:
chunk_array = np.empty(
chunk_shape,
chunk_array = NDBuffer.create(
shape=chunk_shape,
dtype=self.metadata.dtype,
fill_value=self.metadata.fill_value,
)
chunk_array.fill(self.metadata.fill_value)
else:
chunk_array = (
await self.codecs.decode(chunk_bytes, chunk_spec)
Expand All @@ -336,9 +340,9 @@ async def _write_chunk(
await self._write_chunk_to_store(store_path, chunk_array, chunk_spec)

async def _write_chunk_to_store(
self, store_path: StorePath, chunk_array: npt.NDArray[Any], chunk_spec: ArraySpec
self, store_path: StorePath, chunk_array: NDBuffer, chunk_spec: ArraySpec
) -> None:
if np.all(chunk_array == self.metadata.fill_value):
if chunk_array.all_equal(self.metadata.fill_value):
# chunks that only contain fill_value will be removed
await store_path.delete()
else:
Expand Down Expand Up @@ -377,14 +381,14 @@ async def _delete_key(key: str) -> None:
)

# Write new metadata
await (self.store_path / ZARR_JSON).set(new_metadata.to_bytes())
await (self.store_path / ZARR_JSON).set(Buffer.from_bytes(new_metadata.to_bytes()))
return replace(self, metadata=new_metadata)

async def update_attributes(self, new_attributes: Dict[str, Any]) -> AsyncArray:
new_metadata = replace(self.metadata, attributes=new_attributes)

# Write new metadata
await (self.store_path / ZARR_JSON).set(new_metadata.to_bytes())
await (self.store_path / ZARR_JSON).set(Buffer.from_bytes(new_metadata.to_bytes()))
return replace(self, metadata=new_metadata)

def __repr__(self) -> str:
Expand Down
Loading
Loading