From f2dd9f12509b452da55725b121fa47da9ea1c91c Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Tue, 18 Jul 2023 17:55:05 -0400 Subject: [PATCH 1/7] Replace stackstac with odc-stac --- README.md | 8 ++++---- environment.yaml | 2 +- tests/test_core.py | 15 ++++++++++++++- xpystac/core.py | 9 +++++++-- 4 files changed, 26 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 2ae85c3..b0f399f 100644 --- a/README.md +++ b/README.md @@ -13,13 +13,13 @@ import xarray as xr catalog = pystac_client.Client.open( - "https://earth-search.aws.element84.com/v0" + "https://earth-search.aws.element84.com/v1", ) search = catalog.search( intersects=dict(type="Point", coordinates=[-105.78, 35.79]), - collections=['sentinel-s2-l2a-cogs'], - datetime="2020-04-01/2020-05-01", + collections=['sentinel-2-l2a'], + datetime="2022-04-01/2022-05-01", ) xr.open_dataset(search, engine="stac") @@ -71,7 +71,7 @@ pip install git+https://github.com/stac-utils/xpystac ## How it works When you call ``xarray.open_dataset(object, engine="stac")`` this library maps that open call to the correct library. -Depending on the ``type`` of ``object`` that might be [stackstac](https://github.com/gjoseph92/stackstac) +Depending on the ``type`` of ``object`` that might be [odc-stac](https://github.com/opendatacube/odc-stac) or back to ``xarray.open_dataset`` itself but with the engine and other options pulled from the pystac object. ## Prior Art diff --git a/environment.yaml b/environment.yaml index 2e84b2f..8cf65e6 100644 --- a/environment.yaml +++ b/environment.yaml @@ -11,11 +11,11 @@ dependencies: - adlfs - aiohttp - fsspec + - odc-stac - planetary-computer - pystac-client - requests - rioxarray - - stackstac - urllib3<2 # temporary pin https://github.com/stac-utils/pystac-client/issues/509 - zarr # testing diff --git a/tests/test_core.py b/tests/test_core.py index 803ca2d..76d24fb 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -1,3 +1,4 @@ +import dask.array import pytest from xpystac.core import to_xarray @@ -9,10 +10,22 @@ def test_to_xarray_with_cog_asset(simple_cog): def test_to_xarray_with_pystac_client_search(simple_search): - ds = to_xarray(simple_search, assets=["blue", "green", "red"]) + ds = to_xarray(simple_search) assert ds +def test_to_xarray_returns_dask_backed_object(simple_search): + ds = to_xarray(simple_search) + assert isinstance(ds.blue.data, dask.array.Array) + assert ds.blue.data.npartitions > 1 + + +def test_to_xarray_with_pystac_client_search_passes_kwargs_through(simple_search): + ds = to_xarray(simple_search, assets=["red", "green", "blue"], chunks={}) + assert list(ds.data_vars) == ["red", "green", "blue"] + assert ds.blue.data.npartitions == 1 + + def test_to_xarray_with_drop_variables_raises(simple_search): with pytest.raises(KeyError, match="not implemented for pystac items"): to_xarray(simple_search, drop_variables=["blue"]) diff --git a/xpystac/core.py b/xpystac/core.py index a9d90f8..7aabd24 100644 --- a/xpystac/core.py +++ b/xpystac/core.py @@ -23,10 +23,15 @@ def _( drop_variables: Union[str, List[str], None] = None, **kwargs, ) -> xarray.Dataset: - stackstac = _import_optional_dependency("stackstac") + odc_stac = _import_optional_dependency("odc.stac") + default_kwargs: Mapping = {"chunks": {"x": 1024, "y": 1024}} if drop_variables is not None: raise KeyError("``drop_variables`` not implemented for pystac items") - return stackstac.stack(obj, **kwargs).to_dataset(dim="band", promote_attrs=True) + if isinstance(obj, pystac.Item): + items = [obj] + else: + items = [i for i in obj] + return odc_stac.load(items, **{**default_kwargs, **kwargs}) @to_xarray.register From 8e27edaaba78b91e3a3740b9975c9ef6553d3e94 Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Wed, 19 Jul 2023 08:53:21 -0400 Subject: [PATCH 2/7] Use `bands` rather than `assets` --- tests/test_core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_core.py b/tests/test_core.py index 76d24fb..0292775 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -21,7 +21,7 @@ def test_to_xarray_returns_dask_backed_object(simple_search): def test_to_xarray_with_pystac_client_search_passes_kwargs_through(simple_search): - ds = to_xarray(simple_search, assets=["red", "green", "blue"], chunks={}) + ds = to_xarray(simple_search, bands=["red", "green", "blue"], chunks={}) assert list(ds.data_vars) == ["red", "green", "blue"] assert ds.blue.data.npartitions == 1 From fcdc7591e614d6218a9f4d72ceef82fef2d4a1d0 Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Wed, 6 Sep 2023 18:13:15 -0400 Subject: [PATCH 3/7] Make the stacking_lirary configurable --- environment.yaml | 1 + tests/test_core.py | 7 +++++++ xpystac/core.py | 52 +++++++++++++++++++++++++++++++++++----------- 3 files changed, 48 insertions(+), 12 deletions(-) diff --git a/environment.yaml b/environment.yaml index 8cf65e6..e7a1b1d 100644 --- a/environment.yaml +++ b/environment.yaml @@ -16,6 +16,7 @@ dependencies: - pystac-client - requests - rioxarray + - stackstac - urllib3<2 # temporary pin https://github.com/stac-utils/pystac-client/issues/509 - zarr # testing diff --git a/tests/test_core.py b/tests/test_core.py index 0292775..688c04b 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -1,5 +1,6 @@ import dask.array import pytest +import xarray as xr from xpystac.core import to_xarray @@ -26,6 +27,12 @@ def test_to_xarray_with_pystac_client_search_passes_kwargs_through(simple_search assert ds.blue.data.npartitions == 1 +@pytest.mark.parametrize("stacking_library", ["odc.stac", "stackstac"]) +def test_to_xarray_with_different_stacking_library(simple_search, stacking_library): + ds = to_xarray(simple_search, stacking_library=stacking_library) + assert isinstance(ds, xr.Dataset) + + def test_to_xarray_with_drop_variables_raises(simple_search): with pytest.raises(KeyError, match="not implemented for pystac items"): to_xarray(simple_search, drop_variables=["blue"]) diff --git a/xpystac/core.py b/xpystac/core.py index 7aabd24..aacdf5c 100644 --- a/xpystac/core.py +++ b/xpystac/core.py @@ -1,5 +1,5 @@ import functools -from typing import List, Mapping, Union +from typing import List, Literal, Mapping, Union import pystac import xarray @@ -8,11 +8,20 @@ @functools.singledispatch -def to_xarray(obj, **kwargs) -> xarray.Dataset: - """Given a pystac object return an xarray dataset""" +def to_xarray( + obj, + stacking_library: Union[Literal["odc.stac", "stackstac"], None] = None, + **kwargs, +) -> xarray.Dataset: + """Given a pystac object return an xarray dataset + + When stacking multiple items, an optional ``stacking_library`` argument + is accepted. It defaults to ``odc.stac`` if available and otherwise ``stackstac``. + Control the behavior by setting ``stacking_library`` + """ if _is_item_search(obj): item_collection = obj.item_collection() - return to_xarray(item_collection, **kwargs) + return to_xarray(item_collection, stacking_library=stacking_library, **kwargs) raise TypeError @@ -21,21 +30,40 @@ def to_xarray(obj, **kwargs) -> xarray.Dataset: def _( obj: Union[pystac.Item, pystac.ItemCollection], drop_variables: Union[str, List[str], None] = None, + stacking_library: Union[Literal["odc.stac", "stackstac"], None] = None, **kwargs, ) -> xarray.Dataset: - odc_stac = _import_optional_dependency("odc.stac") - default_kwargs: Mapping = {"chunks": {"x": 1024, "y": 1024}} if drop_variables is not None: raise KeyError("``drop_variables`` not implemented for pystac items") - if isinstance(obj, pystac.Item): - items = [obj] - else: - items = [i for i in obj] - return odc_stac.load(items, **{**default_kwargs, **kwargs}) + + if stacking_library is None: + try: + _import_optional_dependency("odc.stac") + stacking_library = "odc.stac" + except ImportError: + _import_optional_dependency("stackstac") + stacking_library = "stackstac" + elif stacking_library not in ["odc.stac", "stackstac"]: + raise ValueError(f"{stacking_library=} is not a valid option") + + if stacking_library == "odc.stac": + odc_stac = _import_optional_dependency("odc.stac") + if isinstance(obj, pystac.Item): + items = [obj] + else: + items = [i for i in obj] + return odc_stac.load(items, **{"chunks": {"x": 1024, "y": 1024}, **kwargs}) + elif stacking_library == "stackstac": + stackstac = _import_optional_dependency("stackstac") + return stackstac.stack(obj, **kwargs).to_dataset(dim="band", promote_attrs=True) @to_xarray.register -def _(obj: pystac.Asset, **kwargs) -> xarray.Dataset: +def _( + obj: pystac.Asset, + stacking_library: Union[Literal["odc.stac", "stackstac"], None] = None, + **kwargs, +) -> xarray.Dataset: default_kwargs: Mapping = {"chunks": {}} open_kwargs = obj.extra_fields.get("xarray:open_kwargs", {}) From b7c43b6140d47416d25a979730016d22ba39f53d Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Mon, 11 Sep 2023 10:00:29 -0400 Subject: [PATCH 4/7] Refer to both stackstac and odc-stac in README --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index b0f399f..dbb94c1 100644 --- a/README.md +++ b/README.md @@ -70,8 +70,9 @@ pip install git+https://github.com/stac-utils/xpystac ## How it works -When you call ``xarray.open_dataset(object, engine="stac")`` this library maps that open call to the correct library. -Depending on the ``type`` of ``object`` that might be [odc-stac](https://github.com/opendatacube/odc-stac) +When you call ``xarray.open_dataset(object, engine="stac")`` this library maps that `open` call to the correct library. +Depending on the ``type`` of ``object`` that might be a stacking library (either +[odc-stac](https://github.com/opendatacube/odc-stac) or [stackstac](https://github.com/gjoseph92/stackstac)) or back to ``xarray.open_dataset`` itself but with the engine and other options pulled from the pystac object. ## Prior Art From cf96916d01003ed411a0f81b266034ab95c1c053 Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Mon, 11 Sep 2023 10:18:17 -0400 Subject: [PATCH 5/7] Update action --- .github/workflows/tests.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index edfbdc7..0d5c78c 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -35,12 +35,11 @@ jobs: uses: actions/checkout@v3.3.0 - name: setup micromamba - uses: mamba-org/provision-with-micromamba@main + uses: mamba-org/setup-micromamba@main with: environment-file: ${{ matrix.environment-file }} micromamba-version: "latest" - extra-specs: python=${{ matrix.python-version }} - channel-priority: "flexible" + create-args: python=${{ matrix.python-version }} - name: install xpystac run: pip install . From be761dbe6b889e4c84e500ff7943619aefd663d3 Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Mon, 11 Sep 2023 10:24:38 -0400 Subject: [PATCH 6/7] Add libarchive as explicit dep --- environment.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/environment.yaml b/environment.yaml index e7a1b1d..d42b2dd 100644 --- a/environment.yaml +++ b/environment.yaml @@ -11,6 +11,7 @@ dependencies: - adlfs - aiohttp - fsspec + - libarchive - odc-stac - planetary-computer - pystac-client From 3b15fe917e42324ae8b47f1c31aa5b4486d150f1 Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Wed, 20 Sep 2023 16:42:02 -0400 Subject: [PATCH 7/7] Bump env --- environment.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/environment.yaml b/environment.yaml index d42b2dd..c32d731 100644 --- a/environment.yaml +++ b/environment.yaml @@ -1,4 +1,4 @@ -name: xpystac-dev +name: xpystac-broken channels: - conda-forge - nodefaults @@ -11,7 +11,6 @@ dependencies: - adlfs - aiohttp - fsspec - - libarchive - odc-stac - planetary-computer - pystac-client