Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add API layout strategy #1294

Merged
merged 10 commits into from
Jan 10, 2024
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
### Changed

- Made item pickles smaller by changing how nested links are stored([#1285](https://github.com/stac-utils/pystac/pull/1285))
- Add APILayoutStrategy ([#1294](https://github.com/stac-utils/pystac/pull/1294))
- Allow setting a default layout strategy for Catalog ([#1295](https://github.com/stac-utils/pystac/pull/1295))

### Fixed
Expand Down
12 changes: 6 additions & 6 deletions pystac/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ def add_child(
# set self link
self_href = self.get_self_href()
if self_href and set_parent:
child_href = strategy.get_href(child, os.path.dirname(self_href))
child_href = strategy.get_href(child, self_href)
child.set_self_href(child_href)

child_link = Link.child(child, title=title)
Expand Down Expand Up @@ -359,7 +359,7 @@ def add_item(
# set self link
self_href = self.get_self_href()
if self_href and set_parent:
item_href = strategy.get_href(item, os.path.dirname(self_href))
item_href = strategy.get_href(item, self_href)
item.set_self_href(item_href)

item_link = Link.item(item, title=title)
Expand Down Expand Up @@ -771,7 +771,7 @@ def normalize_hrefs(
root_href = make_absolute_href(root_href, os.getcwd(), start_is_dir=True)

def process_item(
item: Item, _root_href: str, parent: Catalog | None
item: Item, _root_href: str, is_root: bool, parent: Catalog | None
) -> Callable[[], None] | None:
if not skip_unresolved:
item.resolve_links()
Expand All @@ -781,7 +781,7 @@ def process_item(
if parent is not None and item.get_parent() != parent:
return None

new_self_href = _strategy.get_href(item, _root_href)
new_self_href = _strategy.get_href(item, _root_href, is_root)

def fn() -> None:
item.set_self_href(new_self_href)
Expand All @@ -805,15 +805,15 @@ def process_catalog(
return setter_funcs

new_self_href = _strategy.get_href(cat, _root_href, is_root)
new_root = os.path.dirname(new_self_href)
new_root = new_self_href

for link in cat.get_links():
if skip_unresolved and not link.is_resolved():
continue
elif link.rel == pystac.RelType.ITEM:
link.resolve_stac_object(root=self.get_root())
item_fn = process_item(
cast(pystac.Item, link.target), new_root, cat
cast(pystac.Item, link.target), new_root, is_root, cat
)
if item_fn is not None:
setter_funcs.append(item_fn)
Expand Down
43 changes: 43 additions & 0 deletions pystac/layout.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import os
import posixpath
import warnings
from abc import ABC, abstractmethod
Expand All @@ -8,6 +9,7 @@
from typing import TYPE_CHECKING, Any, Callable

import pystac
from pystac.utils import is_file_path

if TYPE_CHECKING:
from pystac.catalog import Catalog
Expand Down Expand Up @@ -258,6 +260,9 @@ class HrefLayoutStrategy(ABC):
def get_href(
self, stac_object: STACObject, parent_dir: str, is_root: bool = False
) -> str:
if is_file_path(parent_dir):
parent_dir = os.path.dirname(parent_dir)

if isinstance(stac_object, pystac.Item):
return self.get_item_href(stac_object, parent_dir)
elif isinstance(stac_object, pystac.Collection):
Expand Down Expand Up @@ -529,3 +534,41 @@ def get_item_href(self, item: Item, parent_dir: str) -> str:
)
else:
return href


class APILayoutStrategy(HrefLayoutStrategy):
"""Layout strategy that represents the STAC API endpoint layout described
in the `STAC API Specifications
<https://github.com/radiantearth/stac-api-spec/blob/v1.0.0/overview.md#endpoints>`_

The URL of the root catalog will be the base URL of the API.
Other catalogs will be listed underneath their parent catalog
at ``./${id}``.
Collections will be found underneath the root or their parent
catalog at ``./collections/${id}``. Collection cannot be the root themselves.
Items will be found underneath their collections at
``./collections/${collection}/items/${id}``.
"""

def get_catalog_href(self, cat: Catalog, parent_dir: str, is_root: bool) -> str:
if is_root:
cat_href = parent_dir
else:
cat_href = posixpath.join(parent_dir, f"{cat.id}")

return cat_href

def get_collection_href(
self, col: Collection, parent_dir: str, is_root: bool
) -> str:
if is_root:
raise ValueError("Collections cannot be root")

col_root = posixpath.join(parent_dir, "collections")

return posixpath.join(col_root, f"{col.id}")

def get_item_href(self, item: Item, parent_dir: str) -> str:
item_root = posixpath.join(parent_dir, "items")

return posixpath.join(item_root, f"{item.id}")
21 changes: 21 additions & 0 deletions pystac/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -559,3 +559,24 @@ def get_required(option: T | None, obj: str | Any, prop: str) -> T:
if option is None:
raise RequiredPropertyMissing(obj, prop)
return option


def is_file_path(href: str) -> bool:
jsignell marked this conversation as resolved.
Show resolved Hide resolved
"""Checks if an HREF resembles a file path.

This method checks if the given HREF resembles a file path.
It checks if the path ends with any kind of file extension
and if true, assumes it is a file.
Unlike `os.path.isfile()` it does NOT check the actual file.

Caution: There are cases for which this method may return wrong results!

Args:
href (str) : The HREF to consider.

Returns:
bool: ``True`` if the given HREF resembles a file path,
``False`` if it does not.
"""
parsed = urlparse(href)
return bool(os.path.splitext(parsed.path)[1])
88 changes: 88 additions & 0 deletions tests/test_layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,12 @@
from datetime import datetime, timedelta
from typing import Callable

import pytest

import pystac
from pystac.collection import Collection
from pystac.layout import (
APILayoutStrategy,
AsIsLayoutStrategy,
BestPracticesLayoutStrategy,
CustomLayoutStrategy,
Expand Down Expand Up @@ -442,3 +445,88 @@ def test_item(self) -> None:
item.set_self_href("/an/href")
href = self.strategy.get_href(item, parent_dir="http://example.com")
self.assertEqual(href, "/an/href")


class APILayoutStrategyTest(unittest.TestCase):
def setUp(self) -> None:
self.strategy = APILayoutStrategy()

def test_produces_layout_for_root_catalog(self) -> None:
cat = pystac.Catalog(id="test", description="test desc")
href = self.strategy.get_href(
cat, parent_dir="http://example.com", is_root=True
)
self.assertEqual(href, "http://example.com")

def test_produces_layout_for_child_catalog(self) -> None:
cat = pystac.Catalog(id="test", description="test desc")
href = self.strategy.get_href(cat, parent_dir="http://example.com")
self.assertEqual(href, "http://example.com/test")

def test_cannot_produce_layout_for_root_collection(self) -> None:
collection = TestCases.case_8()
with pytest.raises(ValueError):
self.strategy.get_href(
collection, parent_dir="http://example.com", is_root=True
)

def test_produces_layout_for_child_collection(self) -> None:
collection = TestCases.case_8()
href = self.strategy.get_href(collection, parent_dir="http://example.com")
self.assertEqual(href, f"http://example.com/collections/{collection.id}")

def test_produces_layout_for_item(self) -> None:
collection = TestCases.case_8()
col_href = self.strategy.get_href(collection, parent_dir="http://example.com")
item = next(collection.get_items(recursive=True))
href = self.strategy.get_href(item, parent_dir=col_href)
expected = f"http://example.com/collections/{collection.id}/items/{item.id}"
self.assertEqual(href, expected)

def test_produces_normalized_layout(self) -> None:
cat = pystac.Catalog(id="test_catalog", description="Test Catalog")
col = pystac.Collection(
id="test_collection",
description="Test Collection",
extent=pystac.Extent(
spatial=pystac.SpatialExtent([[-180.0, -90.0, 180.0, 90.0]]),
temporal=pystac.TemporalExtent(
[[datetime(2023, 1, 1), datetime(2023, 12, 31)]]
),
),
)
item = pystac.Item(
id="test_item",
geometry={
"type": "Polygon",
"coordinates": [
[
[180.0, -90.0],
[180.0, 90.0],
[-180.0, 90.0],
[-180.0, -90.0],
[180.0, -90.0],
]
],
},
bbox=[-180, -90, 180, 90],
datetime=datetime(2023, 1, 1),
properties={},
assets={
"data": pystac.Asset(
href="http://example.com/assets/data.tif",
roles=["data"],
title="DATA",
)
},
)
cat.add_child(col)
col.add_item(item)
cat.normalize_hrefs("http://example.com", strategy=self.strategy)

assert cat.self_href == "http://example.com"
assert col.self_href == "http://example.com/collections/test_collection"
assert (
item.self_href
== "http://example.com/collections/test_collection/items/test_item"
)
30 changes: 30 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from pystac.utils import (
JoinType,
is_absolute_href,
is_file_path,
join_path_or_url,
make_absolute_href,
make_relative_href,
Expand Down Expand Up @@ -445,3 +446,32 @@ def test_join_path_or_url() -> None:
with pytest.warns(DeprecationWarning):
joined_url = join_path_or_url(JoinType.URL, *url_args)
assert joined_url == "https://some/page/file.html"


@pytest.mark.parametrize(
"href,expected",
[
("path/to/file.txt", True),
("path/to/nofile", False),
("./path/to/file.txt", True),
("./path/to/nofile", False),
("./path/to/", False),
("/path/to/file.txt", True),
("/path/to/nofile", False),
("/path", False),
("/", False),
("D:/path/to/file.txt", True),
("D:/path/to/nofile", False),
("D:\\path\\to\\file.txt", True),
("D:\\path\\to\\file.txt", True),
("D:\\path\\to\\nofile", False),
("D:", False),
("D:\\", False),
("https://example.com/absolutepath/to/file.txt", True),
("https://example.com/absolutepath/to/nofile", False),
("https://example.com", False),
("https://example.com/", False),
],
)
def test_is_file_path(href: str, expected: bool) -> None:
assert is_file_path(href) == expected