diff --git a/CHANGELOG.md b/CHANGELOG.md index ba1c90707..05f54f40f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ ### Changed - Made item pickles smaller by changing how nested links are stored([#1285](https://github.com/stac-utils/pystac/pull/1285)) +- Add APILayoutStrategy ([#1294](https://github.com/stac-utils/pystac/pull/1294)) - Allow setting a default layout strategy for Catalog ([#1295](https://github.com/stac-utils/pystac/pull/1295)) ### Fixed diff --git a/pystac/catalog.py b/pystac/catalog.py index f4e38ab54..6fd5fa77a 100644 --- a/pystac/catalog.py +++ b/pystac/catalog.py @@ -288,7 +288,7 @@ def add_child( # set self link self_href = self.get_self_href() if self_href and set_parent: - child_href = strategy.get_href(child, os.path.dirname(self_href)) + child_href = strategy.get_href(child, self_href) child.set_self_href(child_href) child_link = Link.child(child, title=title) @@ -359,7 +359,7 @@ def add_item( # set self link self_href = self.get_self_href() if self_href and set_parent: - item_href = strategy.get_href(item, os.path.dirname(self_href)) + item_href = strategy.get_href(item, self_href) item.set_self_href(item_href) item_link = Link.item(item, title=title) @@ -771,7 +771,7 @@ def normalize_hrefs( root_href = make_absolute_href(root_href, os.getcwd(), start_is_dir=True) def process_item( - item: Item, _root_href: str, parent: Catalog | None + item: Item, _root_href: str, is_root: bool, parent: Catalog | None ) -> Callable[[], None] | None: if not skip_unresolved: item.resolve_links() @@ -781,7 +781,7 @@ def process_item( if parent is not None and item.get_parent() != parent: return None - new_self_href = _strategy.get_href(item, _root_href) + new_self_href = _strategy.get_href(item, _root_href, is_root) def fn() -> None: item.set_self_href(new_self_href) @@ -805,7 +805,7 @@ def process_catalog( return setter_funcs new_self_href = _strategy.get_href(cat, _root_href, is_root) - new_root = os.path.dirname(new_self_href) + new_root = new_self_href for link in cat.get_links(): if skip_unresolved and not link.is_resolved(): @@ -813,7 +813,7 @@ def process_catalog( elif link.rel == pystac.RelType.ITEM: link.resolve_stac_object(root=self.get_root()) item_fn = process_item( - cast(pystac.Item, link.target), new_root, cat + cast(pystac.Item, link.target), new_root, is_root, cat ) if item_fn is not None: setter_funcs.append(item_fn) diff --git a/pystac/layout.py b/pystac/layout.py index 08e889872..baafea722 100644 --- a/pystac/layout.py +++ b/pystac/layout.py @@ -1,5 +1,6 @@ from __future__ import annotations +import os import posixpath import warnings from abc import ABC, abstractmethod @@ -8,6 +9,7 @@ from typing import TYPE_CHECKING, Any, Callable import pystac +from pystac.utils import is_file_path if TYPE_CHECKING: from pystac.catalog import Catalog @@ -258,6 +260,9 @@ class HrefLayoutStrategy(ABC): def get_href( self, stac_object: STACObject, parent_dir: str, is_root: bool = False ) -> str: + if is_file_path(parent_dir): + parent_dir = os.path.dirname(parent_dir) + if isinstance(stac_object, pystac.Item): return self.get_item_href(stac_object, parent_dir) elif isinstance(stac_object, pystac.Collection): @@ -529,3 +534,41 @@ def get_item_href(self, item: Item, parent_dir: str) -> str: ) else: return href + + +class APILayoutStrategy(HrefLayoutStrategy): + """Layout strategy that represents the STAC API endpoint layout described + in the `STAC API Specifications + `_ + + The URL of the root catalog will be the base URL of the API. + Other catalogs will be listed underneath their parent catalog + at ``./${id}``. + Collections will be found underneath the root or their parent + catalog at ``./collections/${id}``. Collection cannot be the root themselves. + Items will be found underneath their collections at + ``./collections/${collection}/items/${id}``. + """ + + def get_catalog_href(self, cat: Catalog, parent_dir: str, is_root: bool) -> str: + if is_root: + cat_href = parent_dir + else: + cat_href = posixpath.join(parent_dir, f"{cat.id}") + + return cat_href + + def get_collection_href( + self, col: Collection, parent_dir: str, is_root: bool + ) -> str: + if is_root: + raise ValueError("Collections cannot be root") + + col_root = posixpath.join(parent_dir, "collections") + + return posixpath.join(col_root, f"{col.id}") + + def get_item_href(self, item: Item, parent_dir: str) -> str: + item_root = posixpath.join(parent_dir, "items") + + return posixpath.join(item_root, f"{item.id}") diff --git a/pystac/utils.py b/pystac/utils.py index 1cf100118..c60248663 100644 --- a/pystac/utils.py +++ b/pystac/utils.py @@ -559,3 +559,24 @@ def get_required(option: T | None, obj: str | Any, prop: str) -> T: if option is None: raise RequiredPropertyMissing(obj, prop) return option + + +def is_file_path(href: str) -> bool: + """Checks if an HREF resembles a file path. + + This method checks if the given HREF resembles a file path. + It checks if the path ends with any kind of file extension + and if true, assumes it is a file. + Unlike `os.path.isfile()` it does NOT check the actual file. + + Caution: There are cases for which this method may return wrong results! + + Args: + href (str) : The HREF to consider. + + Returns: + bool: ``True`` if the given HREF resembles a file path, + ``False`` if it does not. + """ + parsed = urlparse(href) + return bool(os.path.splitext(parsed.path)[1]) diff --git a/tests/test_layout.py b/tests/test_layout.py index fee31c042..7559f96f2 100644 --- a/tests/test_layout.py +++ b/tests/test_layout.py @@ -3,9 +3,12 @@ from datetime import datetime, timedelta from typing import Callable +import pytest + import pystac from pystac.collection import Collection from pystac.layout import ( + APILayoutStrategy, AsIsLayoutStrategy, BestPracticesLayoutStrategy, CustomLayoutStrategy, @@ -442,3 +445,88 @@ def test_item(self) -> None: item.set_self_href("/an/href") href = self.strategy.get_href(item, parent_dir="http://example.com") self.assertEqual(href, "/an/href") + + +class APILayoutStrategyTest(unittest.TestCase): + def setUp(self) -> None: + self.strategy = APILayoutStrategy() + + def test_produces_layout_for_root_catalog(self) -> None: + cat = pystac.Catalog(id="test", description="test desc") + href = self.strategy.get_href( + cat, parent_dir="http://example.com", is_root=True + ) + self.assertEqual(href, "http://example.com") + + def test_produces_layout_for_child_catalog(self) -> None: + cat = pystac.Catalog(id="test", description="test desc") + href = self.strategy.get_href(cat, parent_dir="http://example.com") + self.assertEqual(href, "http://example.com/test") + + def test_cannot_produce_layout_for_root_collection(self) -> None: + collection = TestCases.case_8() + with pytest.raises(ValueError): + self.strategy.get_href( + collection, parent_dir="http://example.com", is_root=True + ) + + def test_produces_layout_for_child_collection(self) -> None: + collection = TestCases.case_8() + href = self.strategy.get_href(collection, parent_dir="http://example.com") + self.assertEqual(href, f"http://example.com/collections/{collection.id}") + + def test_produces_layout_for_item(self) -> None: + collection = TestCases.case_8() + col_href = self.strategy.get_href(collection, parent_dir="http://example.com") + item = next(collection.get_items(recursive=True)) + href = self.strategy.get_href(item, parent_dir=col_href) + expected = f"http://example.com/collections/{collection.id}/items/{item.id}" + self.assertEqual(href, expected) + + def test_produces_normalized_layout(self) -> None: + cat = pystac.Catalog(id="test_catalog", description="Test Catalog") + col = pystac.Collection( + id="test_collection", + description="Test Collection", + extent=pystac.Extent( + spatial=pystac.SpatialExtent([[-180.0, -90.0, 180.0, 90.0]]), + temporal=pystac.TemporalExtent( + [[datetime(2023, 1, 1), datetime(2023, 12, 31)]] + ), + ), + ) + item = pystac.Item( + id="test_item", + geometry={ + "type": "Polygon", + "coordinates": [ + [ + [180.0, -90.0], + [180.0, 90.0], + [-180.0, 90.0], + [-180.0, -90.0], + [180.0, -90.0], + ] + ], + }, + bbox=[-180, -90, 180, 90], + datetime=datetime(2023, 1, 1), + properties={}, + assets={ + "data": pystac.Asset( + href="http://example.com/assets/data.tif", + roles=["data"], + title="DATA", + ) + }, + ) + cat.add_child(col) + col.add_item(item) + cat.normalize_hrefs("http://example.com", strategy=self.strategy) + + assert cat.self_href == "http://example.com" + assert col.self_href == "http://example.com/collections/test_collection" + assert ( + item.self_href + == "http://example.com/collections/test_collection/items/test_item" + ) diff --git a/tests/test_utils.py b/tests/test_utils.py index d43139044..bda3d3ece 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -12,6 +12,7 @@ from pystac.utils import ( JoinType, is_absolute_href, + is_file_path, join_path_or_url, make_absolute_href, make_relative_href, @@ -445,3 +446,32 @@ def test_join_path_or_url() -> None: with pytest.warns(DeprecationWarning): joined_url = join_path_or_url(JoinType.URL, *url_args) assert joined_url == "https://some/page/file.html" + + +@pytest.mark.parametrize( + "href,expected", + [ + ("path/to/file.txt", True), + ("path/to/nofile", False), + ("./path/to/file.txt", True), + ("./path/to/nofile", False), + ("./path/to/", False), + ("/path/to/file.txt", True), + ("/path/to/nofile", False), + ("/path", False), + ("/", False), + ("D:/path/to/file.txt", True), + ("D:/path/to/nofile", False), + ("D:\\path\\to\\file.txt", True), + ("D:\\path\\to\\file.txt", True), + ("D:\\path\\to\\nofile", False), + ("D:", False), + ("D:\\", False), + ("https://example.com/absolutepath/to/file.txt", True), + ("https://example.com/absolutepath/to/nofile", False), + ("https://example.com", False), + ("https://example.com/", False), + ], +) +def test_is_file_path(href: str, expected: bool) -> None: + assert is_file_path(href) == expected