Skip to content

Commit

Permalink
Ome ngff pattonw (#361)
Browse files Browse the repository at this point in the history
Add ome metadata support

Needs some testing. OME zarr support is not very robust and I couldn't find a library that covered all our needs.

There is the official ome-zarr-py package: https://github.com/ome/ome-zarr-py. I had difficulty extracting the metadata from arrays and it seems like you end up having to write your own internal models for the metadata or simply operate on the json level. See this example
Davis has a metadata package that has a lot of what I want on the metadata side, but it doesn't seem to come with much actual data support for reading and writing arrays with the given metadata.
Davis seems to also have a new package that looks very promising but seems very early in development.
Ziwen has put a lot of effort into this library which is what I ended up going for since it seems to be the closest to fully covering both the metadata model side and tying it to actual data with something like an open_ome_zarr convenience function. There was only a small piece missing that I felt I needed so I opened a pull request here which is currently in progress.
There seem to be more implementations, but these are the ones I looked into
  • Loading branch information
mzouink authored Jan 23, 2025
2 parents 12c14c0 + 93e0341 commit 8c5ae81
Show file tree
Hide file tree
Showing 7 changed files with 116 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,11 @@ def array(self, mode="r") -> Array:

def group_array(data):
groups = [
da.isin(data, group_ids)
if len(group_ids) > 0
else data != self.background
(
da.isin(data, group_ids)
if len(group_ids) > 0
else data != self.background
)
for _, group_ids in self.groupings
]
out = da.stack(groups, axis=0)
Expand Down
12 changes: 10 additions & 2 deletions dacapo/experiments/datasplits/datasets/arrays/zarr_array_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from .array_config import ArrayConfig

from funlib.geometry import Coordinate
from funlib.persistence import open_ds
from funlib.persistence import open_ds, open_ome_ds

from upath import UPath as Path

Expand Down Expand Up @@ -56,9 +56,17 @@ class ZarrArrayConfig(ArrayConfig):
mode: Optional[str] = attr.ib(
default="a", metadata={"help_text": "The access mode!"}
)
ome_metadata: bool = attr.ib(
default=False, metadata={"help_text": "Whether to expect OME metadata"}
)

def array(self, mode="r"):
return open_ds(f"{self.file_name}/{self.dataset}", mode=mode)
if self.ome_metadata:
name = self.dataset.split("/")[-1]
dataset = self.dataset.replace(f"/{name}", "")
return open_ome_ds(self.file_name / dataset, name=name, mode=mode)
else:
return open_ds(self.file_name / self.dataset, mode=mode)

def verify(self) -> Tuple[bool, str]:
"""
Expand Down
3 changes: 1 addition & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ dependencies = [
"funlib.geometry>=0.2",
"mwatershed>=0.5.2",
"cellmap-models",
"funlib.persistence>=0.5.3",
"gunpowder>=1.4",
"lsds",
"xarray",
Expand All @@ -59,7 +58,7 @@ dependencies = [
"upath",
"boto3",
"matplotlib",
"xarray-multiscale",
"funlib.persistence @ git+https://github.com/funkelab/funlib.persistence.git@ome-ngff"
]

# extras
Expand Down
2 changes: 1 addition & 1 deletion tests/fixtures/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
unet_architecture,
unet_3d_architecture,
)
from .arrays import dummy_array, zarr_array, cellmap_array
from .arrays import dummy_array, zarr_array, cellmap_array, multiscale_zarr
from .datasplits import (
dummy_datasplit,
twelve_class_datasplit,
Expand Down
70 changes: 67 additions & 3 deletions tests/fixtures/arrays.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

import zarr
import numpy as np

from numcodecs import Zstd
import pytest


Expand All @@ -27,7 +27,7 @@ def zarr_array(tmp_path):
zarr_array_config.dataset, data=np.zeros((100, 50, 25), dtype=np.float32)
)
dataset.attrs["offset"] = (12, 12, 12)
dataset.attrs["resolution"] = (1, 2, 4)
dataset.attrs["voxel_size"] = (1, 2, 4)
dataset.attrs["axis_names"] = ["z", "y", "x"]
yield zarr_array_config

Expand All @@ -45,7 +45,7 @@ def cellmap_array(tmp_path):
data=np.arange(0, 100, dtype=np.uint8).reshape(10, 5, 2),
)
dataset.attrs["offset"] = (12, 12, 12)
dataset.attrs["resolution"] = (1, 2, 4)
dataset.attrs["voxel_size"] = (1, 2, 4)
dataset.attrs["axis_names"] = ["z", "y", "x"]

cellmap_array_config = BinarizeArrayConfig(
Expand All @@ -59,3 +59,67 @@ def cellmap_array(tmp_path):
)

yield cellmap_array_config


@pytest.fixture()
def multiscale_zarr(tmp_path):
zarr_metadata = {
"multiscales": [
{
"axes": [
{"name": "z", "type": "space", "unit": "nanometer"},
{"name": "y", "type": "space", "unit": "nanometer"},
{"name": "x", "type": "space", "unit": "nanometer"},
],
"coordinateTransformations": [],
"datasets": [
{
"coordinateTransformations": [
{"scale": [4.2, 7.4, 5.6], "type": "scale"},
{"translation": [6.0, 10.0, 2.0], "type": "translation"},
],
"path": "s0",
},
{
"coordinateTransformations": [
{"type": "scale", "scale": [1.0, 2.0, 4.0]},
{"type": "translation", "translation": [12.0, 12.0, 12.0]},
],
"path": "s1",
},
],
"name": "multiscale_dataset",
"version": "0.4",
}
],
"omero": {
"id": 1,
"name": "test_image",
"channels": [],
},
}
ome_zarr_array_config = ZarrArrayConfig(
name="ome_zarr_array",
file_name=tmp_path / "ome_zarr_array.zarr",
dataset="multiscale_dataset/s1",
ome_metadata=True,
)

store = zarr.DirectoryStore(ome_zarr_array_config.file_name)
multiscale_group = zarr.group(
store=store, path="multiscale_dataset", overwrite=True
)

for level in [0, 1]:
scaling = pow(2, level)
multiscale_group.require_dataset(
name=f"s{level}",
shape=(100 / scaling, 80 / scaling, 60 / scaling),
chunks=10,
dtype=np.float32,
compressor=Zstd(level=6),
)

multiscale_group.attrs.update(zarr_metadata)

yield ome_zarr_array_config
28 changes: 28 additions & 0 deletions tests/operations/test_array.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import os
from ..fixtures import *

import pytest
from pytest_lazy_fixtures import lf

import logging

logging.basicConfig(level=logging.INFO)


@pytest.mark.parametrize(
"array_config",
[
lf("zarr_array"),
lf("multiscale_zarr"),
],
)
def test_array(array_config):
array = array_config.array()

assert array.offset == (12, 12, 12), f"offset is not correct, expected (12, 12, 12), got {array.offset}"
assert array.voxel_size == (1, 2, 4), f"resolution is not correct, expected (1, 2, 4), got {array.voxel_size}"
assert array.axis_names == ["z", "y", "x"], f"axis names are not correct, expected ['z', 'y', 'x'], got {array.axis_names}"

# offset = array.attrs["offset"]
# resolution = array.attrs["resolution"]

5 changes: 4 additions & 1 deletion tests/operations/test_mini.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from dacapo.experiments import Run
from dacapo.train import train_run
from dacapo.validate import validate_run

from dacapo.compute_context import create_compute_context
import zarr

import pytest
Expand Down Expand Up @@ -75,6 +75,9 @@ def test_mini(
num_iterations=1,
)
run = Run(run_config)
compute_context = create_compute_context()
device = compute_context.device
run.model.to(device)

if func == "train":
train_run(run)
Expand Down

0 comments on commit 8c5ae81

Please sign in to comment.