Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support Zarr-Python 3 #1082

Merged
merged 6 commits into from
Jan 29, 2025
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions src/titiler/xarray/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,15 @@ dependencies = [

[project.optional-dependencies]
full = [
"zarr>=2,<3",
"zarr",
"h5netcdf",
"fsspec",
"s3fs",
"aiohttp",
"gcsfs",
]
minimal = [
"zarr>=2,<3",
"zarr",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

will this work for zarr<3 ? Should that be specified?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I accounted for both the new and old syntax in

if module_available("zarr", minversion="3.0"):
if protocol == "file":
store = zarr.storage.LocalStore(parsed.path, read_only=True)
else:
fs = fsspec.filesystem(protocol, storage_options={"asynchronous": True})
store = zarr.storage.FsspecStore(fs, path=src_path, read_only=True)
else:
store = fsspec.filesystem(protocol).get_mapper(src_path)
and tested locally for zarr<3.

"h5netcdf",
"fsspec",
]
Expand All @@ -64,7 +64,7 @@ test = [
"pytest-cov",
"pytest-asyncio",
"httpx",
"zarr>=2,<3",
"zarr",
"h5netcdf",
"fsspec",
]
Expand Down
81 changes: 14 additions & 67 deletions src/titiler/xarray/titiler/xarray/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from morecantile import TileMatrixSet
from rio_tiler.constants import WEB_MERCATOR_TMS
from rio_tiler.io.xarray import XarrayReader
from xarray.namedarray.utils import module_available


def xarray_open_dataset( # noqa: C901
Expand All @@ -29,21 +30,6 @@ def xarray_open_dataset( # noqa: C901
"""
import fsspec # noqa

try:
import gcsfs
except ImportError: # pragma: nocover
gcsfs = None # type: ignore

try:
import s3fs
except ImportError: # pragma: nocover
s3fs = None # type: ignore

try:
import aiohttp
except ImportError: # pragma: nocover
aiohttp = None # type: ignore

try:
import h5netcdf
except ImportError: # pragma: nocover
Expand All @@ -66,55 +52,8 @@ def xarray_open_dataset( # noqa: C901

else:
assert zarr is not None, "'zarr' must be installed to read Zarr dataset"

xr_engine = "zarr"

if protocol in ["", "file"]:
filesystem = fsspec.filesystem(protocol) # type: ignore
file_handler = (
filesystem.open(src_path)
if xr_engine == "h5netcdf"
else filesystem.get_mapper(src_path)
)

elif protocol == "s3":
assert (
s3fs is not None
), "'aiohttp' must be installed to read dataset stored online"

s3_filesystem = s3fs.S3FileSystem()
file_handler = (
s3_filesystem.open(src_path)
if xr_engine == "h5netcdf"
else s3fs.S3Map(root=src_path, s3=s3_filesystem)
)

elif protocol == "gs":
assert (
gcsfs is not None
), "'gcsfs' must be installed to read dataset stored in Google Cloud Storage"

gcs_filesystem = gcsfs.GCSFileSystem()
file_handler = (
gcs_filesystem.open(src_path)
if xr_engine == "h5netcdf"
else gcs_filesystem.get_mapper(root=src_path)
)

elif protocol in ["http", "https"]:
assert (
aiohttp is not None
), "'aiohttp' must be installed to read dataset stored online"

filesystem = fsspec.filesystem(protocol) # type: ignore
file_handler = (
filesystem.open(src_path)
if xr_engine == "h5netcdf"
else filesystem.get_mapper(src_path)
)

else:
raise ValueError(f"Unsupported protocol: {protocol}, for {src_path}")
maxrjones marked this conversation as resolved.
Show resolved Hide resolved
_zarr_v3 = module_available("zarr", minversion="3.0")
maxrjones marked this conversation as resolved.
Show resolved Hide resolved

# Arguments for xarray.open_dataset
# Default args
Expand All @@ -135,13 +74,21 @@ def xarray_open_dataset( # noqa: C901
"lock": False,
}
)

ds = xarray.open_dataset(file_handler, **xr_open_args)
fs = fsspec.filesystem(protocol)
ds = xarray.open_dataset(fs.open(src_path), **xr_open_args)

# Fallback to Zarr
else:
ds = xarray.open_zarr(file_handler, **xr_open_args)

if _zarr_v3:
if protocol == "file":
store = zarr.storage.LocalStore(parsed.path, read_only=True)
else:
fs = fsspec.filesystem(protocol, storage_options={"asynchronous": True})
store = zarr.storage.FsspecStore(fs, path=src_path, read_only=True)
ds = xarray.open_zarr(store, **xr_open_args)
else:
fs = fsspec.filesystem(protocol)
ds = xarray.open_zarr(fs.get_mapper(src_path), **xr_open_args)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the bulk of the changes to account for Zarr no longer accepting fsspec's mutable mapping wrappers and expecting asynchronous file systems.

maxrjones marked this conversation as resolved.
Show resolved Hide resolved
return ds


Expand Down
Loading