From c1f7b0512520c1a61edab33660090bfe188ca39f Mon Sep 17 00:00:00 2001 From: Jeroen Verstraelen Date: Thu, 25 Aug 2022 12:04:44 +0200 Subject: [PATCH] Issue #5 reformat _merge_collection_metadata --- src/openeo_aggregator/backend.py | 70 +++++++------------------------- tests/test_backend.py | 16 ++++---- 2 files changed, 22 insertions(+), 64 deletions(-) diff --git a/src/openeo_aggregator/backend.py b/src/openeo_aggregator/backend.py index d42ea29b..7e0ad257 100644 --- a/src/openeo_aggregator/backend.py +++ b/src/openeo_aggregator/backend.py @@ -149,41 +149,28 @@ def _merge_collection_metadata(self, by_backend: Dict[str, dict]) -> dict: result = { "id": cid, + "stac_version": max(list(getter.get("stac_version")) + ["0.9.0"]), + "title": getter.first("title", default=cid), + "description": getter.first("description", default=cid), + "type": getter.first("type", default="Collection"), + "links": [l for l in list(getter.merge_arrays("links")) if l.get("rel") not in ("self","parent","root")], + "summaries": getter.select("summaries").simple_merge() } - # stac_version - result["stac_version"] = max(list(getter.get("stac_version")) + ["0.9.0"]) - # stac_extensions - stac_extensions = sorted(getter.merge_arrays("stac_extensions", skip_duplicates=True)) - if stac_extensions: - result["stac_extensions"] = stac_extensions - - result["title"] = getter.first("title", default=cid) - result["description"] = getter.first("description", default=cid) - - # keywords - keywords = getter.merge_arrays("keywords", skip_duplicates=True) - if keywords: - result["keywords"] = keywords - # version + # Note: CRS is required by OGC API: https://docs.opengeospatial.org/is/18-058/18-058.html#_crs_identifier_list + result.update(getter.simple_merge([ + "stac_extensions", "keywords", "deprecated", "providers", "assets", + "crs", + "sci:citation", "sci:doi", "sci:publications" + ])) + + # All keys with special merge handling. versions = set(getter.get("version")) if versions: # TODO: smarter version maximum? Low priority, versions key is not used in most backends. result["version"] = max(versions) - # deprecated - deprecateds = list(getter.get("deprecated")) - if deprecateds: - result["deprecated"] = all(deprecateds) - - result["type"] = getter.first("type", default="Collection") - # Assume the license links are available. licenses = set(getter.get("license")) result["license"] = licenses.pop() if len(licenses) == 1 else ("various" if licenses else "proprietary") - # providers - providers = getter.merge_arrays("providers", skip_duplicates=True) - if providers: - result["providers"] = list(providers) - # extent result["extent"] = { "spatial": { "bbox": getter.select("extent").select("spatial").merge_arrays("bbox", skip_duplicates=True) \ @@ -194,10 +181,7 @@ def _merge_collection_metadata(self, by_backend: Dict[str, dict]) -> dict: or [[None, None]], }, } - # links - result["links"] = [l for l in list(getter.merge_arrays("links")) if l.get("rel") not in ("self","parent","root")] - # cube_dimensions cube_dimensions = getter.first("cube:dimensions") if cube_dimensions: cube_dimension_bands = list(getter.select("cube:dimensions").select("bands").merge_arrays('values', skip_duplicates=True)) @@ -235,35 +219,11 @@ def _merge_collection_metadata(self, by_backend: Dict[str, dict]) -> dict: result["cube:dimensions"][dim] = cube_dim_value result["cube:dimensions"][dim]["extent"] = extent - # summaries - result["summaries"] = getter.select("summaries").simple_merge() # TODO: use a more robust/user friendly backend pointer than backend id (which is internal implementation detail) result["summaries"][self.STAC_PROPERTY_PROVIDER_BACKEND] = list(by_backend.keys()) - # assets - result["assets"] = list(getter.merge_arrays("assets")) - - # crs - # Required by OGC API - Features: https://docs.opengeospatial.org/is/18-058/18-058.html#_crs_identifier_list - crs_list = getter.merge_arrays("crs", skip_duplicates=True) - if crs_list: - result["crs"] = list(crs_list) - - # Scientific extension. - # sci:citation - citation_list = getter.first("sci:citation", default=None) - if citation_list: - result["sci:citation"] = citation_list - # sci:doi - doi_list = getter.first("sci:doi", default=None) - if doi_list: - result["sci:doi"] = doi_list - # sci:publications - publications_list = getter.merge_arrays("sci:publications", skip_duplicates=True) - if publications_list: - result["sci:publications"] = list(publications_list) # Log warning for collections without license links. - license_links = [l for l in list(getter.merge_arrays("links")) if l.get("rel")=="license"] + license_links = [l for l in list(getter.merge_arrays("links")) if l.get("rel") == "license"] if result["license"] in ["various", "proprietary"] and not license_links: _log.warning(f"Missing license links for collection: {cid}") return result diff --git a/tests/test_backend.py b/tests/test_backend.py index e6c090f9..e22fe351 100644 --- a/tests/test_backend.py +++ b/tests/test_backend.py @@ -152,7 +152,7 @@ def test_get_all_metadata_common_collections_minimal( {'href': 'http://oeoa.test/openeo/1.1.0/collections/S3', 'rel': 'self'}] }, { - "id": "S4", "description": "S4", "title": "S4", + "id": "S4", "description": "S4", "title": "S4", "type": "Collection", "stac_version": "0.9.0", "extent": {"spatial": {"bbox": [[-180, -90, 180, 90]]}, "temporal": {"interval": [[None, None]]}}, "license": "proprietary", @@ -162,7 +162,6 @@ def test_get_all_metadata_common_collections_minimal( {'href': 'http://oeoa.test/openeo/1.1.0/collections', 'rel': 'parent'}, {'href': 'http://oeoa.test/openeo/1.1.0/collections/S4', 'rel': 'self'} ], - "assets": [], "description": "S4", "type": "Collection" }, { "id": "S5", 'links': [ @@ -244,7 +243,7 @@ def test_get_all_metadata_common_collections_merging( {"href": "http://oeoa.test/openeo/1.1.0/collections", "rel": "parent"}, {"href": "http://oeoa.test/openeo/1.1.0/collections/S4", "rel": "self"}, ], - "assets": [], "type": "Collection" + "type": "Collection" }, ] @@ -341,12 +340,12 @@ def test_get_collection_metadata_merging(self, multi_backend_connection, backend "stac_version": "1.0.0", "stac_extensions": [ "datacube", + "sar", "https://stac-extensions.github.io/datacube/v1.0.0/schema.json", - "https://stac-extensions.github.io/raster/v1.0.0/schema.json", - "https://stac-extensions.github.io/sar/v1.0.0/schema.json", - "https://stac-extensions.github.io/sat/v1.0.0/schema.json", "https://stac-extensions.github.io/scientific/v1.0.0/schema.json", - "sar" + "https://stac-extensions.github.io/sat/v1.0.0/schema.json", + "https://stac-extensions.github.io/sar/v1.0.0/schema.json", + "https://stac-extensions.github.io/raster/v1.0.0/schema.json" ], "crs": ["http://www.opengis.net/def/crs/OGC/1.3/CRS84", "http://www.opengis.net/def/crs/EPSG/0/2154"], "keywords": ["S2", "Sentinel Hub", "xcube", "SAR"], @@ -363,7 +362,6 @@ def test_get_collection_metadata_merging(self, multi_backend_connection, backend {'href': 'http://oeoa.test/openeo/1.1.0/collections/S2', 'rel': 'self'} ], "providers": [{"name": "provider1"}, {"name": "provider2"}], - "assets": [], "type": "Collection", "sci:citation": "Modified Copernicus Sentinel data [Year]/Sentinel Hub", } @@ -423,7 +421,7 @@ def test_get_collection_metadata_merging_summaries( 'sar:looks_azimuth': [1], 'sar:looks_equivalent_number': [4.4], 'sar:looks_range': [5], 'sar:pixel_spacing_azimuth': [10], 'sar:pixel_spacing_range': [10], 'sar:resolution_azimuth': [22], "sar:resolution_range": [20] - }, 'assets': [] + } } def test_get_collection_metadata_merging_extent(self, multi_backend_connection, backend1, backend2, requests_mock, flask_app):