From b145ee6dd6f69e4730d7e46a871ce3171f1b127d Mon Sep 17 00:00:00 2001 From: Stefaan Lippens Date: Wed, 24 Aug 2022 13:41:12 +0200 Subject: [PATCH 01/15] Update Sinergise SentinelHub back-end url (dev) --- conf/aggregator.dev.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/aggregator.dev.py b/conf/aggregator.dev.py index e6268491..6bcc940d 100644 --- a/conf/aggregator.dev.py +++ b/conf/aggregator.dev.py @@ -61,7 +61,7 @@ # internal version of https://openeo.creo.vito.be/openeo/1.0/ "creo": "https://openeo.creo.vgt.vito.be/openeo/1.0", # Sentinel Hub OpenEO by Sinergise - "sentinelhub": "https://w0j9yieg9l.execute-api.eu-central-1.amazonaws.com/testing", + "sentinelhub": "https://lj00amcev7.execute-api.eu-central-1.amazonaws.com/testing/", }, auth_entitlement_check={"oidc_issuer_whitelist": { "https://aai.egi.eu/auth/realms/egi/", From c5b016d31844696dcd4e134ae39e0a3d48768a7c Mon Sep 17 00:00:00 2001 From: Stefaan Lippens Date: Wed, 24 Aug 2022 14:37:09 +0200 Subject: [PATCH 02/15] Issue #51: change legacy EGI Check-in provider id to "egi-legacy" relates to Open-EO/openeo-python-driver#131 and openEOPlatform/architecture-docs#261 --- CHANGELOG.md | 2 +- conf/aggregator.dev.py | 2 +- conf/aggregator.prod.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e6fbc0d2..0ef3acb8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,7 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Make sure user id (prefix) is logged in JSON logs - Updated (generous fallback) "FreeTier" user role to 30DayTrial (more strict) - Use EODC dev instance in aggregator dev config -- Update EGI issuer URL to new Keycloak one (keep old provider under "egi-old") +- Update EGI issuer URL to new Keycloak one (keep old provider under "egi-legacy") ### Fixed diff --git a/conf/aggregator.dev.py b/conf/aggregator.dev.py index 6bcc940d..32eabf5d 100644 --- a/conf/aggregator.dev.py +++ b/conf/aggregator.dev.py @@ -28,7 +28,7 @@ default_clients=[DEFAULT_OIDC_CLIENT_EGI], ), OidcProvider( - id="egi-old", + id="egi-legacy", issuer="https://aai.egi.eu/oidc/", # TODO: remove old EGI provider refs (issuer https://aai.egi.eu/oidc/) scopes=[ "openid", "email", diff --git a/conf/aggregator.prod.py b/conf/aggregator.prod.py index c822c69c..c8a8ba70 100644 --- a/conf/aggregator.prod.py +++ b/conf/aggregator.prod.py @@ -29,7 +29,7 @@ default_clients=[DEFAULT_OIDC_CLIENT_EGI], ), OidcProvider( - id="egi-old", + id="egi-legacy", issuer="https://aai.egi.eu/oidc/", # TODO: remove old EGI provider refs (issuer https://aai.egi.eu/oidc/) scopes=[ "openid", "email", From cefee8f5ed880e9d5c7ee0b190abc8b393007c9b Mon Sep 17 00:00:00 2001 From: Stefaan Lippens Date: Wed, 24 Aug 2022 15:15:39 +0200 Subject: [PATCH 03/15] OIDC providers: fallback on configured providers when empty intersection related to #51, Open-EO/openeo-python-driver#131 and openEOPlatform/architecture-docs#261 --- src/openeo_aggregator/connection.py | 2 ++ tests/test_connection.py | 26 ++++++++++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/src/openeo_aggregator/connection.py b/src/openeo_aggregator/connection.py index 75eabd82..dd2cf117 100644 --- a/src/openeo_aggregator/connection.py +++ b/src/openeo_aggregator/connection.py @@ -303,6 +303,8 @@ def get_oidc_providers(self) -> List[OidcProvider]: _log.debug(f"OIDC provider intersection: {intersection}") if len(intersection) == 0: _log.error(f"Emtpy OIDC provider intersection. Issuers per backend: {agg_pids_per_backend}") + intersection = set(p.id for p in self._configured_oidc_providers) + _log.warning(f"Emtpy OIDC provider intersection. Falling back on {intersection} from config") # Take configured providers for common issuers. agg_providers = [p for p in self._configured_oidc_providers if p.id in intersection] diff --git a/tests/test_connection.py b/tests/test_connection.py index c1c3ac2c..a1f6524f 100644 --- a/tests/test_connection.py +++ b/tests/test_connection.py @@ -286,6 +286,32 @@ def test_build_oidc_handling_intersection( {"ya": "y2", "za": "z2"}, ] + def test_build_oidc_handling_intersection_empty( + self, config, requests_mock, backend1, backend2 + ): + requests_mock.get(backend1 + "/credentials/oidc", json={"providers": [ + {"id": "x1", "issuer": "https://x.test", "title": "X1"}, + ]}) + requests_mock.get(backend2 + "/credentials/oidc", json={"providers": [ + {"id": "y2", "issuer": "https://y.test", "title": "YY2"}, + ]}) + + multi_backend_connection = MultiBackendConnection( + backends=config.aggregator_backends, + configured_oidc_providers=[ + OidcProvider("ya", "https://y.test", "A-Y"), + OidcProvider("za", "https://z.test", "A-Z"), + ]) + + assert multi_backend_connection.get_oidc_providers() == [ + OidcProvider(id="ya", issuer="https://y.test", title="A-Y", scopes=["openid"]), + OidcProvider(id="za", issuer="https://z.test", title="A-Z", scopes=["openid"]), + ] + assert [con._oidc_provider_map for con in multi_backend_connection] == [ + {}, + {"ya": "y2"}, + ] + def test_build_oidc_handling_order(self, config, requests_mock, backend1, backend2): requests_mock.get(backend1 + "/credentials/oidc", json={"providers": [ {"id": "d1", "issuer": "https://d.test", "title": "D1"}, From d64d4de8a08d4dc9be6002999a6ffaa256b63ace Mon Sep 17 00:00:00 2001 From: Stefaan Lippens Date: Wed, 24 Aug 2022 17:19:56 +0200 Subject: [PATCH 04/15] Remove legacy/experimental `default_client` field from oidc provider configs --- conf/aggregator.dev.py | 3 --- conf/aggregator.prod.py | 2 -- 2 files changed, 5 deletions(-) diff --git a/conf/aggregator.dev.py b/conf/aggregator.dev.py index 32eabf5d..0dead634 100644 --- a/conf/aggregator.dev.py +++ b/conf/aggregator.dev.py @@ -24,7 +24,6 @@ "eduperson_scoped_affiliation", ], title="EGI Check-in", - default_client=DEFAULT_OIDC_CLIENT_EGI, # TODO: remove this legacy experimental field default_clients=[DEFAULT_OIDC_CLIENT_EGI], ), OidcProvider( @@ -36,7 +35,6 @@ "eduperson_scoped_affiliation", ], title="EGI Check-in (legacy)", - default_client=DEFAULT_OIDC_CLIENT_EGI, # TODO: remove this legacy experimental field default_clients=[DEFAULT_OIDC_CLIENT_EGI], ), # OidcProvider( @@ -48,7 +46,6 @@ # "eduperson_scoped_affiliation", # ], # title="EGI Check-in (dev)", - # default_client=_DEFAULT_OIDC_CLIENT_EGI, # TODO: remove this legacy experimental field # default_clients=[_DEFAULT_OIDC_CLIENT_EGI], # ), ] diff --git a/conf/aggregator.prod.py b/conf/aggregator.prod.py index c8a8ba70..f52981a8 100644 --- a/conf/aggregator.prod.py +++ b/conf/aggregator.prod.py @@ -25,7 +25,6 @@ "eduperson_scoped_affiliation", ], title="EGI Check-in", - default_client=DEFAULT_OIDC_CLIENT_EGI, # TODO: remove this legacy experimental field default_clients=[DEFAULT_OIDC_CLIENT_EGI], ), OidcProvider( @@ -37,7 +36,6 @@ "eduperson_scoped_affiliation", ], title="EGI Check-in (legacy)", - default_client=DEFAULT_OIDC_CLIENT_EGI, # TODO: remove this legacy experimental field default_clients=[DEFAULT_OIDC_CLIENT_EGI], ), ] From 5b8b60ad9e359b8004555a65e1799ef281d69bfa Mon Sep 17 00:00:00 2001 From: Stefaan Lippens Date: Tue, 30 Aug 2022 16:09:20 +0200 Subject: [PATCH 05/15] Issue #68 make oidc_issuer_whitelist checking more robust Ignore trailing slash --- src/openeo_aggregator/backend.py | 9 ++++-- src/openeo_aggregator/utils.py | 4 +++ tests/conftest.py | 13 +++++++-- tests/test_utils.py | 7 ++++- tests/test_views.py | 47 ++++++++++++++++++++++++++++++++ 5 files changed, 74 insertions(+), 6 deletions(-) diff --git a/src/openeo_aggregator/backend.py b/src/openeo_aggregator/backend.py index 4c8819bc..a8981a1f 100644 --- a/src/openeo_aggregator/backend.py +++ b/src/openeo_aggregator/backend.py @@ -20,7 +20,7 @@ from openeo_aggregator.partitionedjobs import PartitionedJob from openeo_aggregator.partitionedjobs.splitting import FlimsySplitter, TileGridSplitter from openeo_aggregator.partitionedjobs.tracking import PartitionedJobConnection, PartitionedJobTracker -from openeo_aggregator.utils import TtlCache, MultiDictGetter, subdict, dict_merge +from openeo_aggregator.utils import TtlCache, MultiDictGetter, subdict, dict_merge, normalize_issuer_url from openeo_driver.ProcessGraphDeserializer import SimpleProcessing from openeo_driver.backend import OpenEoBackendImplementation, AbstractCollectionCatalog, LoadParameters, Processing, \ OidcProvider, BatchJobs, BatchJobMetadata @@ -715,10 +715,13 @@ def merge(formats: dict, to_add: dict): def user_access_validation(self, user: User, request: flask.Request) -> User: if self._auth_entitlement_check: int_data = user.internal_auth_data - issuer_whitelist = self._auth_entitlement_check.get("oidc_issuer_whitelist", []) + issuer_whitelist = [ + normalize_issuer_url(u) + for u in self._auth_entitlement_check.get("oidc_issuer_whitelist", []) + ] if not ( int_data["authentication_method"] == "OIDC" - and int_data["oidc_issuer"].rstrip("/").lower() in issuer_whitelist + and normalize_issuer_url(int_data["oidc_issuer"]) in issuer_whitelist ): user_message = "An EGI account is required for using openEO Platform." _log.warning(f"user_access_validation failure: %r %r", user_message, { diff --git a/src/openeo_aggregator/utils.py b/src/openeo_aggregator/utils.py index d1e9d025..0ad72eb0 100644 --- a/src/openeo_aggregator/utils.py +++ b/src/openeo_aggregator/utils.py @@ -233,3 +233,7 @@ def timestamp_to_rfc3339(timestamp: float) -> str: """Convert unix epoch timestamp to RFC3339 datetime string""" dt = datetime.datetime.utcfromtimestamp(timestamp) return rfc3339.datetime(dt) + + +def normalize_issuer_url(url: str) -> str: + return url.rstrip("/").lower() diff --git a/tests/conftest.py b/tests/conftest.py index b63ed40c..aa5e1fe1 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -33,9 +33,18 @@ def backend2(requests_mock) -> str: @pytest.fixture -def configured_oidc_providers() -> List[OidcProvider]: +def main_test_oidc_issuer() -> str: + """ + Main OIDC issuer URL. + As a fixture to make it overridable with `pytest.mark.parametrize` for certain tests. + """ + return "https://egi.test" + + +@pytest.fixture +def configured_oidc_providers(main_test_oidc_issuer: str) -> List[OidcProvider]: return [ - OidcProvider(id="egi", issuer="https://egi.test", title="EGI"), + OidcProvider(id="egi", issuer=main_test_oidc_issuer, title="EGI"), OidcProvider(id="x-agg", issuer="https://x.test", title="X (agg)"), OidcProvider(id="y-agg", issuer="https://y.test", title="Y (agg)"), OidcProvider(id="z-agg", issuer="https://z.test", title="Z (agg)"), diff --git a/tests/test_utils.py b/tests/test_utils.py index 298a15d7..7da61b1b 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -4,7 +4,7 @@ import shapely.geometry from openeo_aggregator.utils import TtlCache, CacheMissException, MultiDictGetter, subdict, dict_merge, EventHandler, \ - BoundingBox, strip_join, timestamp_to_rfc3339 + BoundingBox, strip_join, timestamp_to_rfc3339, normalize_issuer_url class FakeClock: @@ -268,3 +268,8 @@ def test_strip_join(): def test_timestamp_to_rfc3339(): assert timestamp_to_rfc3339(0) == "1970-01-01T00:00:00Z" assert timestamp_to_rfc3339(1644012109) == "2022-02-04T22:01:49Z" + + +def test_normalize_issuer_url(): + assert normalize_issuer_url("https://example.com/oidc/") == "https://example.com/oidc" + assert normalize_issuer_url("https://example.com/OidC/") == "https://example.com/oidc" diff --git a/tests/test_views.py b/tests/test_views.py index a91c9585..fd068a74 100644 --- a/tests/test_views.py +++ b/tests/test_views.py @@ -6,6 +6,7 @@ import pytest import requests +from openeo.rest.connection import url_join from openeo_aggregator.backend import AggregatorCollectionCatalog from openeo_aggregator.config import AggregatorConfig from openeo_aggregator.connection import MultiBackendConnection @@ -337,6 +338,52 @@ def test_oidc_enrolled( assert data["info"]["roles"] == expected_roles assert data["default_plan"] == expected_plan + @pytest.mark.parametrize(["whitelist", "main_test_oidc_issuer", "success"], [ + (["https://egi.test"], "https://egi.test", True), + (["https://egi.test"], "https://egi.test/", True), + (["https://egi.test/"], "https://egi.test", True), + (["https://egi.test/"], "https://egi.test/", True), + (["https://egi.test/oidc"], "https://egi.test/oidc/", True), + (["https://egi.test/oidc/"], "https://egi.test/oidc", True), + (["https://egi.test/foo"], "https://egi.test/bar", False), + ]) + def test_issuer_url_normalization( + self, config, requests_mock, backend1, backend2, whitelist, + main_test_oidc_issuer, success, caplog, + ): + config.auth_entitlement_check = {"oidc_issuer_whitelist": whitelist} + + requests_mock.get(backend1 + "/credentials/oidc", json={"providers": [ + {"id": "egi", "issuer": main_test_oidc_issuer, "title": "EGI"} + ]}) + requests_mock.get(backend2 + "/credentials/oidc", json={"providers": [ + {"id": "egi", "issuer": main_test_oidc_issuer, "title": "EGI"} + ]}) + oidc_url_ui = url_join(main_test_oidc_issuer, "/userinfo") + oidc_url_conf = url_join(main_test_oidc_issuer, "/.well-known/openid-configuration") + requests_mock.get(oidc_url_conf, json={"userinfo_endpoint": oidc_url_ui}) + requests_mock.get( + oidc_url_ui, + json=self._get_userifo_handler(eduperson_entitlement=[ + "urn:mace:egi.eu:group:vo.openeo.cloud:role=early_adopter#aai.egi.eu", + ]) + ) + api100 = get_api100(get_flask_app(config)) + api100.set_auth_bearer_token(token="oidc/egi/funiculifunicula") + + if success: + res = api100.get("/me").assert_status_code(200) + data = res.json + assert data["user_id"] == "john" + assert data["info"]["roles"] == ["EarlyAdopter"] + else: + res = api100.get("/me") + res.assert_error(403, "PermissionsInsufficient") + assert re.search( + "user_access_validation failure.*oidc_issuer.*https://egi.test/bar.*issuer_whitelist.*https://egi.test/foo", + caplog.text + ) + class TestProcessing: def test_processes_basic(self, api100, requests_mock, backend1, backend2): From f36e093c08565b55950caf223ecf93d8ac874cea Mon Sep 17 00:00:00 2001 From: Stefaan Lippens Date: Tue, 30 Aug 2022 18:51:05 +0200 Subject: [PATCH 06/15] Issue #68 Simplify OIDC provider handling: keep it static Just follow `configured_oidc_providers` from config, don't bother calculating and caching intersection of upstream back-ends. openeo-python-driver's HttpAuthHandler does not support OIDC provider changes anyway. --- src/openeo_aggregator/backend.py | 7 ++--- src/openeo_aggregator/connection.py | 39 +++++++------------------ tests/test_backend.py | 38 +++--------------------- tests/test_connection.py | 45 ++++++++--------------------- tests/test_views.py | 28 +++--------------- 5 files changed, 33 insertions(+), 124 deletions(-) diff --git a/src/openeo_aggregator/backend.py b/src/openeo_aggregator/backend.py index a8981a1f..ff962b8c 100644 --- a/src/openeo_aggregator/backend.py +++ b/src/openeo_aggregator/backend.py @@ -673,6 +673,7 @@ def __init__(self, backends: MultiBackendConnection, config: AggregatorConfig): ) self._cache = TtlCache(default_ttl=CACHE_TTL_DEFAULT, name="General") self._backends.on_connections_change.add(self._cache.flush_all) + self._configured_oidc_providers: List[OidcProvider] = config.configured_oidc_providers self._auth_entitlement_check: Union[bool, dict] = config.auth_entitlement_check # Shorter HTTP cache TTL to adapt quicker to changed back-end configurations @@ -681,11 +682,7 @@ def __init__(self, backends: MultiBackendConnection, config: AggregatorConfig): ) def oidc_providers(self) -> List[OidcProvider]: - # TODO: openeo-python-driver (HttpAuthHandler) currently does support changes in - # the set of oidc_providers ids (id mapping is statically established at startup time) - return self._cache.get_or_call( - key="oidc_providers", callback=self._backends.get_oidc_providers, log_on_miss=True, - ) + return self._configured_oidc_providers def file_formats(self) -> dict: return self._cache.get_or_call(key="file_formats", callback=self._file_formats, log_on_miss=True) diff --git a/src/openeo_aggregator/connection.py b/src/openeo_aggregator/connection.py index dd2cf117..6acd8975 100644 --- a/src/openeo_aggregator/connection.py +++ b/src/openeo_aggregator/connection.py @@ -68,6 +68,9 @@ def __init__( self.default_timeout = default_timeout + def __repr__(self): + return f"<{type(self).__name__} {self.id}: {self._root_url}>" + def _get_auth(self) -> Union[None, OpenEoApiAuthBase]: return None if self._auth_locked else self._auth @@ -92,8 +95,7 @@ def _build_oidc_provider_map(self, configured_providers: List[OidcProvider]) -> pid_map[agg_provider.id] = targets[0] return pid_map - @property - def oidc_provider_map(self) -> Dict[str, str]: + def get_oidc_provider_map(self) -> Dict[str, str]: return self._oidc_provider_map def _get_bearer(self, request: flask.Request) -> str: @@ -106,9 +108,13 @@ def _get_bearer(self, request: flask.Request) -> str: return auth.partition("Bearer ")[2] elif auth.startswith("Bearer oidc/"): _, pid, token = auth.split("/") - if pid not in self._oidc_provider_map: - _log.warning(f"OIDC provider mapping failure: {pid} not in {self._oidc_provider_map}.") - backend_pid = self._oidc_provider_map.get(pid, pid) + try: + backend_pid = self._oidc_provider_map[pid] + except KeyError: + _log.error(f"Back-end {self} lacks OIDC provider support: {pid!r} not in {self._oidc_provider_map}.") + raise OpenEOApiException( + code="OidcSupportError", message=f"Back-end {self.id!r} does not support OIDC provider {pid!r}." + ) return f"oidc/{backend_pid}/{token}" else: raise AuthenticationSchemeInvalidException @@ -289,29 +295,6 @@ def map(self, callback: Callable[[BackendConnection], Any]) -> Iterator[Tuple[st # TODO: customizable exception handling: skip, warn, re-raise? yield con.id, res - def get_oidc_providers(self) -> List[OidcProvider]: - """ - Determine OIDC providers to use in aggregator (based on OIDC issuers supported by all backends) - and set up provider id mapping in the backend connections - - :param configured_providers: OIDC providers dedicated/configured for the aggregator - :return: list of actual OIDC providers to use (configured for aggregator and supported by all backends) - """ - # Get intersection of aggregator OIDC provider ids - agg_pids_per_backend = [set(c.oidc_provider_map.keys()) for c in self.get_connections()] - intersection: Set[str] = functools.reduce((lambda x, y: x.intersection(y)), agg_pids_per_backend) - _log.debug(f"OIDC provider intersection: {intersection}") - if len(intersection) == 0: - _log.error(f"Emtpy OIDC provider intersection. Issuers per backend: {agg_pids_per_backend}") - intersection = set(p.id for p in self._configured_oidc_providers) - _log.warning(f"Emtpy OIDC provider intersection. Falling back on {intersection} from config") - - # Take configured providers for common issuers. - agg_providers = [p for p in self._configured_oidc_providers if p.id in intersection] - _log.info(f"Actual aggregator OIDC providers: {agg_providers}") - - return agg_providers - def streaming_flask_response( backend_response: requests.Response, diff --git a/tests/test_backend.py b/tests/test_backend.py index 68025217..ed6a3ad1 100644 --- a/tests/test_backend.py +++ b/tests/test_backend.py @@ -13,45 +13,15 @@ class TestAggregatorBackendImplementation: def test_oidc_providers(self, multi_backend_connection, config, backend1, backend2, requests_mock): - requests_mock.get(backend1 + "/credentials/oidc", json={"providers": [ - {"id": "x", "issuer": "https://x.test", "title": "X"}, - {"id": "y", "issuer": "https://y.test", "title": "YY"}, - ]}) - requests_mock.get(backend2 + "/credentials/oidc", json={"providers": [ - {"id": "y", "issuer": "https://y.test", "title": "YY"}, - {"id": "z", "issuer": "https://z.test", "title": "ZZZ"}, - ]}) implementation = AggregatorBackendImplementation(backends=multi_backend_connection, config=config) providers = implementation.oidc_providers() assert providers == [ - OidcProvider(id="y-agg", issuer="https://y.test", title="Y (agg)") + OidcProvider(id='egi', issuer='https://egi.test', title='EGI'), + OidcProvider(id='x-agg', issuer='https://x.test', title='X (agg)'), + OidcProvider(id='y-agg', issuer='https://y.test', title='Y (agg)'), + OidcProvider(id='z-agg', issuer='https://z.test', title='Z (agg)'), ] - def test_oidc_providers_caching(self, multi_backend_connection, config, backend1, backend2, requests_mock): - m1 = requests_mock.get(backend1 + "/credentials/oidc", json={"providers": [ - {"id": "x", "issuer": "https://x.test", "title": "X"}, - {"id": "y", "issuer": "https://y.test", "title": "YY"}, - ]}) - m2 = requests_mock.get(backend2 + "/credentials/oidc", json={"providers": [ - {"id": "y", "issuer": "https://y.test", "title": "YY"}, - {"id": "z", "issuer": "https://z.test", "title": "ZZZ"}, - ]}) - implementation = AggregatorBackendImplementation(backends=multi_backend_connection, config=config) - assert (m1.call_count, m2.call_count) == (0, 0) - providers = implementation.oidc_providers() - assert providers == [OidcProvider(id="y-agg", issuer="https://y.test", title="Y (agg)")] - assert (m1.call_count, m2.call_count) == (1, 1) - providers = implementation.oidc_providers() - assert providers == [OidcProvider(id="y-agg", issuer="https://y.test", title="Y (agg)")] - assert (m1.call_count, m2.call_count) == (1, 1) - - MultiBackendConnection._clock = itertools.count(time.time() + 1000).__next__ - implementation._cache.flush_all() - - providers = implementation.oidc_providers() - assert providers == [OidcProvider(id="y-agg", issuer="https://y.test", title="Y (agg)")] - assert (m1.call_count, m2.call_count) == (2, 2) - def test_file_formats_simple(self, multi_backend_connection, config, backend1, backend2, requests_mock): just_geotiff = { "input": {"GTiff": {"gis_data_types": ["raster"], "parameters": {}, "title": "GeoTiff"}}, diff --git a/tests/test_connection.py b/tests/test_connection.py index a1f6524f..e9134f61 100644 --- a/tests/test_connection.py +++ b/tests/test_connection.py @@ -13,7 +13,7 @@ from openeo_aggregator.connection import BackendConnection, MultiBackendConnection, LockedAuthException, \ InvalidatedConnection from openeo_driver.backend import OidcProvider -from openeo_driver.errors import AuthenticationRequiredException +from openeo_driver.errors import AuthenticationRequiredException, OpenEOApiException class TestBackendConnection: @@ -246,12 +246,9 @@ def test_build_oidc_handling_basic(self, config, pid, issuer, title): OidcProvider(id=pid, issuer=issuer, title=title), OidcProvider(id="egi-dev", issuer="https://egi-dev.test", title="EGI dev"), ]) - assert multi_backend_connection.get_oidc_providers() == [ - OidcProvider(id=pid, issuer=issuer, title=title, scopes=["openid"]), - ] for con in multi_backend_connection: - assert con._oidc_provider_map == {pid: "egi"} + assert con.get_oidc_provider_map() == {pid: "egi"} @pytest.mark.parametrize(["issuer_y1", "issuer_y2"], [ ("https://y.test", "https://y.test"), @@ -278,10 +275,7 @@ def test_build_oidc_handling_intersection( OidcProvider("za", "https://z.test", "A-Z"), ]) - assert multi_backend_connection.get_oidc_providers() == [ - OidcProvider(id="ya", issuer="https://y.test", title="A-Y", scopes=["openid"]), - ] - assert [con._oidc_provider_map for con in multi_backend_connection] == [ + assert [con.get_oidc_provider_map() for con in multi_backend_connection] == [ {"xa": "x1", "ya": "y1"}, {"ya": "y2", "za": "z2"}, ] @@ -303,11 +297,7 @@ def test_build_oidc_handling_intersection_empty( OidcProvider("za", "https://z.test", "A-Z"), ]) - assert multi_backend_connection.get_oidc_providers() == [ - OidcProvider(id="ya", issuer="https://y.test", title="A-Y", scopes=["openid"]), - OidcProvider(id="za", issuer="https://z.test", title="A-Z", scopes=["openid"]), - ] - assert [con._oidc_provider_map for con in multi_backend_connection] == [ + assert [con.get_oidc_provider_map() for con in multi_backend_connection] == [ {}, {"ya": "y2"}, ] @@ -338,11 +328,7 @@ def test_build_oidc_handling_order(self, config, requests_mock, backend1, backen OidcProvider("a-c", "https://c.test/", "A-C"), ]) - providers = multi_backend_connection.get_oidc_providers() - assert [p.issuer for p in providers] == [ - "https://b.test", "https://e.test/", "https://a.test", "https://d.test", "https://c.test/" - ] - assert [con._oidc_provider_map for con in multi_backend_connection] == [ + assert [con.get_oidc_provider_map() for con in multi_backend_connection] == [ {'a-a': 'a1', 'a-b': 'b1', 'a-c': 'c1', 'a-d': 'd1', 'a-e': 'e1'}, {'a-a': 'a2', 'a-b': 'b2', 'a-c': 'c2', 'a-d': 'd2', 'a-e': 'e2'}, ] @@ -377,11 +363,6 @@ def test_oidc_provider_mapping(self, requests_mock): OidcProvider("ay", "https://y.test", "A-Y"), ]) - assert multi_backend_connection.get_oidc_providers() == [ - OidcProvider(id="ax", issuer="https://x.test", title="A-X", scopes=["openid"]), - OidcProvider(id="ay", issuer="https://y.test", title="A-Y", scopes=["openid"]), - ] - def get_me(request: requests.Request, context): auth = request.headers.get("Authorization") return {"user_id": auth} @@ -431,9 +412,7 @@ def get_me(request: requests.Request, context): configured_oidc_providers=configured_oidc_providers ) - agg_providers = multi_backend_connection.get_oidc_providers() - assert agg_providers == [OidcProvider(id="ax", issuer="https://x.test", title="A-X", scopes=["openid"])] - warnings = "\n".join(r.getMessage() for r in caplog.records if r.levelno == logging.WARNING) + warnings = "\n".join(r.getMessage() for r in caplog.records if r.levelno >= logging.WARNING) assert warnings == "" # Fake aggregator request containing bearer token for aggregator providers @@ -446,16 +425,16 @@ def get_me(request: requests.Request, context): requests_mock.get(domain1 + "/credentials/oidc", json={"providers": [ {"id": "y1", "issuer": "https://y.test/", "title": "Y1"}, ]}) - agg_providers = multi_backend_connection.get_oidc_providers() - assert agg_providers == [OidcProvider(id="ay", issuer="https://y.test", title="A-Y", scopes=["openid"])] # Try old auth headers request = flask.Request(environ={"HTTP_AUTHORIZATION": "Bearer oidc/ax/yadayadayada"}) - with multi_backend_connection.get_connection("b1").authenticated_from_request(request=request) as con: - assert con.get("/me").json() == {"user_id": "Bearer oidc/ax/yadayadayada"} - warnings = "\n".join(r.getMessage() for r in caplog.records if r.levelno == logging.WARNING) - assert "OIDC provider mapping failure" in warnings + with pytest.raises(OpenEOApiException, match="Back-end 'b1' does not support OIDC provider 'ax'"): + with multi_backend_connection.get_connection("b1").authenticated_from_request(request=request) as con: + pass + + errors = "\n".join(r.getMessage() for r in caplog.records if r.levelno >= logging.ERROR) + assert "lacks OIDC provider support: 'ax' not in {'ay': 'y1'}." in errors # New headers request = flask.Request(environ={"HTTP_AUTHORIZATION": "Bearer oidc/ay/yadayadayada"}) diff --git a/tests/test_views.py b/tests/test_views.py index fd068a74..15d9720c 100644 --- a/tests/test_views.py +++ b/tests/test_views.py @@ -185,27 +185,10 @@ class TestAuthentication: def test_credentials_oidc_default(self, api100, backend1, backend2): res = api100.get("/credentials/oidc").assert_status_code(200).json assert res == {"providers": [ - {"id": "egi", "issuer": "https://egi.test", "title": "EGI", "scopes": ["openid"]} - ]} - - def test_credentials_oidc_intersection(self, requests_mock, config, backend1, backend2): - # When mocking `/credentials/oidc` we have to do that before build flask app - # because it's requested during app building (through `HttpAuthHandler`), - # so unlike other tests we can not use fixtures that build the app/client/api automatically - requests_mock.get(backend1 + "/credentials/oidc", json={"providers": [ - {"id": "x", "issuer": "https://x.test", "title": "X"}, - {"id": "y", "issuer": "https://y.test", "title": "YY"}, - ]}) - requests_mock.get(backend2 + "/credentials/oidc", json={"providers": [ - {"id": "y", "issuer": "https://y.test", "title": "YY"}, - {"id": "z", "issuer": "https://z.test", "title": "ZZZ"}, - ]}) - # Manually creating app and api100 (which we do with fixtures elsewhere) - api100 = get_api100(get_flask_app(config)) - - res = api100.get("/credentials/oidc").assert_status_code(200).json - assert res == {"providers": [ - {"id": "y-agg", "issuer": "https://y.test", "title": "Y (agg)", "scopes": ["openid"]} + {"id": "egi", "issuer": "https://egi.test", "title": "EGI", "scopes": ["openid"]}, + {"id": "x-agg", "issuer": "https://x.test", "title": "X (agg)", "scopes": ["openid"]}, + {"id": "y-agg", "issuer": "https://y.test", "title": "Y (agg)", "scopes": ["openid"]}, + {"id": "z-agg", "issuer": "https://z.test", "title": "Z (agg)", "scopes": ["openid"]}, ]} def test_me_unauthorized(self, api100): @@ -1190,9 +1173,6 @@ def test_startup_during_backend_downtime(self, backend1, broken_backend2, reques backend2, config, b2_root = broken_backend2 api100 = get_api100(get_flask_app(config)) - assert "Failed to create backend 'b2' connection" in caplog.text - assert b2_root.call_count == 1 - api100.get("/").assert_status_code(200) resp = api100.get("/health").assert_status_code(200) From 3e2728a764bce4580e7431cb3a18bdc09b9d2914 Mon Sep 17 00:00:00 2001 From: Stefaan Lippens Date: Wed, 31 Aug 2022 17:18:09 +0200 Subject: [PATCH 07/15] Issue #68 re-enable egi-dev provider for easier testing new Keycloak setup --- conf/aggregator.dev.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/conf/aggregator.dev.py b/conf/aggregator.dev.py index 0dead634..6c8227eb 100644 --- a/conf/aggregator.dev.py +++ b/conf/aggregator.dev.py @@ -37,17 +37,17 @@ title="EGI Check-in (legacy)", default_clients=[DEFAULT_OIDC_CLIENT_EGI], ), - # OidcProvider( - # id="egi-dev", - # issuer="https://aai-dev.egi.eu/oidc/", - # scopes=[ - # "openid", "email", - # "eduperson_entitlement", - # "eduperson_scoped_affiliation", - # ], - # title="EGI Check-in (dev)", - # default_clients=[_DEFAULT_OIDC_CLIENT_EGI], - # ), + OidcProvider( + id="egi-dev", + issuer="https://aai-dev.egi.eu/oidc/", + scopes=[ + "openid", "email", + "eduperson_entitlement", + "eduperson_scoped_affiliation", + ], + title="EGI Check-in (dev)", + default_clients=[DEFAULT_OIDC_CLIENT_EGI], + ), ] config = AggregatorConfig( From 377e3c8c49414ae3e2ea9112f253ae1c4979c082 Mon Sep 17 00:00:00 2001 From: Stefaan Lippens Date: Wed, 31 Aug 2022 17:23:59 +0200 Subject: [PATCH 08/15] better EGI meteadata reuse in aggregator configs --- conf/aggregator.dev.py | 40 ++++++++++++++++++---------------------- conf/aggregator.prod.py | 29 ++++++++++++++--------------- 2 files changed, 32 insertions(+), 37 deletions(-) diff --git a/conf/aggregator.dev.py b/conf/aggregator.dev.py index 6c8227eb..d670b6c4 100644 --- a/conf/aggregator.dev.py +++ b/conf/aggregator.dev.py @@ -1,7 +1,7 @@ from openeo_aggregator.config import AggregatorConfig from openeo_driver.users.oidc import OidcProvider -DEFAULT_OIDC_CLIENT_EGI = { +_DEFAULT_OIDC_CLIENT_EGI = { "id": "openeo-platform-default-client", "grant_types": [ "authorization_code+pkce", @@ -14,39 +14,35 @@ "https://editor.openeo.org", ] } + +_DEFAULT_EGI_SCOPES = [ + "openid", + "email", + "eduperson_entitlement", + "eduperson_scoped_affiliation", +] + configured_oidc_providers = [ OidcProvider( id="egi", - issuer="https://aai.egi.eu/auth/realms/egi/", - scopes=[ - "openid", "email", - "eduperson_entitlement", - "eduperson_scoped_affiliation", - ], title="EGI Check-in", - default_clients=[DEFAULT_OIDC_CLIENT_EGI], + issuer="https://aai.egi.eu/auth/realms/egi/", + scopes=_DEFAULT_EGI_SCOPES, + default_clients=[_DEFAULT_OIDC_CLIENT_EGI], ), OidcProvider( id="egi-legacy", - issuer="https://aai.egi.eu/oidc/", # TODO: remove old EGI provider refs (issuer https://aai.egi.eu/oidc/) - scopes=[ - "openid", "email", - "eduperson_entitlement", - "eduperson_scoped_affiliation", - ], title="EGI Check-in (legacy)", - default_clients=[DEFAULT_OIDC_CLIENT_EGI], + issuer="https://aai.egi.eu/oidc/", # TODO: remove old EGI provider refs (issuer https://aai.egi.eu/oidc/) + scopes=_DEFAULT_EGI_SCOPES, + default_clients=[_DEFAULT_OIDC_CLIENT_EGI], ), OidcProvider( id="egi-dev", - issuer="https://aai-dev.egi.eu/oidc/", - scopes=[ - "openid", "email", - "eduperson_entitlement", - "eduperson_scoped_affiliation", - ], title="EGI Check-in (dev)", - default_clients=[DEFAULT_OIDC_CLIENT_EGI], + issuer="https://aai-dev.egi.eu/oidc/", + scopes=_DEFAULT_EGI_SCOPES, + default_clients=[_DEFAULT_OIDC_CLIENT_EGI], ), ] diff --git a/conf/aggregator.prod.py b/conf/aggregator.prod.py index f52981a8..895eef3c 100644 --- a/conf/aggregator.prod.py +++ b/conf/aggregator.prod.py @@ -1,7 +1,7 @@ from openeo_aggregator.config import AggregatorConfig from openeo_driver.users.oidc import OidcProvider -DEFAULT_OIDC_CLIENT_EGI = { +_DEFAULT_OIDC_CLIENT_EGI = { "id": "openeo-platform-default-client", "grant_types": [ "authorization_code+pkce", @@ -15,28 +15,27 @@ ] } +_DEFAULT_EGI_SCOPES = [ + "openid", + "email", + "eduperson_entitlement", + "eduperson_scoped_affiliation", +] + configured_oidc_providers = [ OidcProvider( id="egi", - issuer="https://aai.egi.eu/auth/realms/egi/", - scopes=[ - "openid", "email", - "eduperson_entitlement", - "eduperson_scoped_affiliation", - ], title="EGI Check-in", - default_clients=[DEFAULT_OIDC_CLIENT_EGI], + issuer="https://aai.egi.eu/auth/realms/egi/", + scopes=_DEFAULT_EGI_SCOPES, + default_clients=[_DEFAULT_OIDC_CLIENT_EGI], ), OidcProvider( id="egi-legacy", - issuer="https://aai.egi.eu/oidc/", # TODO: remove old EGI provider refs (issuer https://aai.egi.eu/oidc/) - scopes=[ - "openid", "email", - "eduperson_entitlement", - "eduperson_scoped_affiliation", - ], title="EGI Check-in (legacy)", - default_clients=[DEFAULT_OIDC_CLIENT_EGI], + issuer="https://aai.egi.eu/oidc/", # TODO: remove old EGI provider refs (issuer https://aai.egi.eu/oidc/) + scopes=_DEFAULT_EGI_SCOPES, + default_clients=[_DEFAULT_OIDC_CLIENT_EGI], ), ] From bee3349c61e8fc9b6e520dbbc2e70ca7f1dd62f3 Mon Sep 17 00:00:00 2001 From: Stefaan Lippens Date: Wed, 31 Aug 2022 17:33:42 +0200 Subject: [PATCH 09/15] fixup! Issue #68 re-enable egi-dev provider for easier testing new Keycloak setup --- conf/aggregator.dev.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/aggregator.dev.py b/conf/aggregator.dev.py index d670b6c4..9bf19870 100644 --- a/conf/aggregator.dev.py +++ b/conf/aggregator.dev.py @@ -40,7 +40,7 @@ OidcProvider( id="egi-dev", title="EGI Check-in (dev)", - issuer="https://aai-dev.egi.eu/oidc/", + issuer="https://aai-dev.egi.eu/auth/realms/egi/", scopes=_DEFAULT_EGI_SCOPES, default_clients=[_DEFAULT_OIDC_CLIENT_EGI], ), From af3802f596ac7345d4a5b5d4f02c4bc741e545e1 Mon Sep 17 00:00:00 2001 From: Stefaan Lippens Date: Thu, 1 Sep 2022 09:38:25 +0200 Subject: [PATCH 10/15] Update SentinelHub backend in dev config --- conf/aggregator.dev.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/aggregator.dev.py b/conf/aggregator.dev.py index 9bf19870..027b19ac 100644 --- a/conf/aggregator.dev.py +++ b/conf/aggregator.dev.py @@ -54,7 +54,7 @@ # internal version of https://openeo.creo.vito.be/openeo/1.0/ "creo": "https://openeo.creo.vgt.vito.be/openeo/1.0", # Sentinel Hub OpenEO by Sinergise - "sentinelhub": "https://lj00amcev7.execute-api.eu-central-1.amazonaws.com/testing/", + "sentinelhub": "https://openeo.sentinel-hub.com/production/", }, auth_entitlement_check={"oidc_issuer_whitelist": { "https://aai.egi.eu/auth/realms/egi/", From 1bd931cb6a501ca7a1beef94cb5dd15af32aa73d Mon Sep 17 00:00:00 2001 From: Stefaan Lippens Date: Mon, 5 Sep 2022 09:20:17 +0200 Subject: [PATCH 11/15] Issue #68 keep egi-legacy as default provider for now --- conf/aggregator.dev.py | 12 ++++++------ conf/aggregator.prod.py | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/conf/aggregator.dev.py b/conf/aggregator.dev.py index 027b19ac..22d75e24 100644 --- a/conf/aggregator.dev.py +++ b/conf/aggregator.dev.py @@ -24,16 +24,16 @@ configured_oidc_providers = [ OidcProvider( - id="egi", - title="EGI Check-in", - issuer="https://aai.egi.eu/auth/realms/egi/", + id="egi-legacy", + title="EGI Check-in (legacy)", + issuer="https://aai.egi.eu/oidc/", # TODO: remove old EGI provider refs (issuer https://aai.egi.eu/oidc/) scopes=_DEFAULT_EGI_SCOPES, default_clients=[_DEFAULT_OIDC_CLIENT_EGI], ), OidcProvider( - id="egi-legacy", - title="EGI Check-in (legacy)", - issuer="https://aai.egi.eu/oidc/", # TODO: remove old EGI provider refs (issuer https://aai.egi.eu/oidc/) + id="egi", + title="EGI Check-in", + issuer="https://aai.egi.eu/auth/realms/egi/", scopes=_DEFAULT_EGI_SCOPES, default_clients=[_DEFAULT_OIDC_CLIENT_EGI], ), diff --git a/conf/aggregator.prod.py b/conf/aggregator.prod.py index 895eef3c..81bf3045 100644 --- a/conf/aggregator.prod.py +++ b/conf/aggregator.prod.py @@ -24,16 +24,16 @@ configured_oidc_providers = [ OidcProvider( - id="egi", - title="EGI Check-in", - issuer="https://aai.egi.eu/auth/realms/egi/", + id="egi-legacy", + title="EGI Check-in (legacy)", + issuer="https://aai.egi.eu/oidc/", # TODO: remove old EGI provider refs (issuer https://aai.egi.eu/oidc/) scopes=_DEFAULT_EGI_SCOPES, default_clients=[_DEFAULT_OIDC_CLIENT_EGI], ), OidcProvider( - id="egi-legacy", - title="EGI Check-in (legacy)", - issuer="https://aai.egi.eu/oidc/", # TODO: remove old EGI provider refs (issuer https://aai.egi.eu/oidc/) + id="egi", + title="EGI Check-in", + issuer="https://aai.egi.eu/auth/realms/egi/", scopes=_DEFAULT_EGI_SCOPES, default_clients=[_DEFAULT_OIDC_CLIENT_EGI], ), From 2500468cea378e0e613523274cbfb1fd30cd777b Mon Sep 17 00:00:00 2001 From: Stefaan Lippens Date: Wed, 14 Sep 2022 17:00:21 +0200 Subject: [PATCH 12/15] Issue #68 make new (keycloak) based EGI provider default again --- conf/aggregator.dev.py | 12 ++++++------ conf/aggregator.prod.py | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/conf/aggregator.dev.py b/conf/aggregator.dev.py index 22d75e24..027b19ac 100644 --- a/conf/aggregator.dev.py +++ b/conf/aggregator.dev.py @@ -24,16 +24,16 @@ configured_oidc_providers = [ OidcProvider( - id="egi-legacy", - title="EGI Check-in (legacy)", - issuer="https://aai.egi.eu/oidc/", # TODO: remove old EGI provider refs (issuer https://aai.egi.eu/oidc/) + id="egi", + title="EGI Check-in", + issuer="https://aai.egi.eu/auth/realms/egi/", scopes=_DEFAULT_EGI_SCOPES, default_clients=[_DEFAULT_OIDC_CLIENT_EGI], ), OidcProvider( - id="egi", - title="EGI Check-in", - issuer="https://aai.egi.eu/auth/realms/egi/", + id="egi-legacy", + title="EGI Check-in (legacy)", + issuer="https://aai.egi.eu/oidc/", # TODO: remove old EGI provider refs (issuer https://aai.egi.eu/oidc/) scopes=_DEFAULT_EGI_SCOPES, default_clients=[_DEFAULT_OIDC_CLIENT_EGI], ), diff --git a/conf/aggregator.prod.py b/conf/aggregator.prod.py index 81bf3045..895eef3c 100644 --- a/conf/aggregator.prod.py +++ b/conf/aggregator.prod.py @@ -24,16 +24,16 @@ configured_oidc_providers = [ OidcProvider( - id="egi-legacy", - title="EGI Check-in (legacy)", - issuer="https://aai.egi.eu/oidc/", # TODO: remove old EGI provider refs (issuer https://aai.egi.eu/oidc/) + id="egi", + title="EGI Check-in", + issuer="https://aai.egi.eu/auth/realms/egi/", scopes=_DEFAULT_EGI_SCOPES, default_clients=[_DEFAULT_OIDC_CLIENT_EGI], ), OidcProvider( - id="egi", - title="EGI Check-in", - issuer="https://aai.egi.eu/auth/realms/egi/", + id="egi-legacy", + title="EGI Check-in (legacy)", + issuer="https://aai.egi.eu/oidc/", # TODO: remove old EGI provider refs (issuer https://aai.egi.eu/oidc/) scopes=_DEFAULT_EGI_SCOPES, default_clients=[_DEFAULT_OIDC_CLIENT_EGI], ), From 27cc98a1accc7ec189023e8bc46fa31150af4eda Mon Sep 17 00:00:00 2001 From: Jeroen Verstraelen Date: Tue, 13 Sep 2022 12:47:32 +0200 Subject: [PATCH 13/15] Issue #70 split job id for load_ml_model parameters --- src/openeo_aggregator/backend.py | 17 +++++++++++++ tests/test_views.py | 41 ++++++++++++++++++++++++++++---- 2 files changed, 54 insertions(+), 4 deletions(-) diff --git a/src/openeo_aggregator/backend.py b/src/openeo_aggregator/backend.py index ff962b8c..1c1cecba 100644 --- a/src/openeo_aggregator/backend.py +++ b/src/openeo_aggregator/backend.py @@ -370,6 +370,14 @@ def get_backend_for_process_graph(self, process_graph: dict, api_version: str) - aggregator_job_id=arguments["id"] ) backend_candidates = [b for b in backend_candidates if b == job_backend_id] + elif process_id == "load_ml_model": + if not arguments["id"].startswith("http"): + # Extract backend id that can load this ML model. + _, job_backend_id = JobIdMapping.parse_aggregator_job_id( + backends=self.backends, + aggregator_job_id=arguments["id"] + ) + backend_candidates = [b for b in backend_candidates if b == job_backend_id] except Exception as e: _log.error(f"Failed to parse process graph: {e!r}", exc_info=True) raise ProcessGraphInvalidException() @@ -435,6 +443,15 @@ def preprocess(node: Any) -> Any: assert job_backend_id == backend_id, f"{job_backend_id} != {backend_id}" # Create new load_result node dict with updated job id return dict_merge(node, arguments=dict_merge(arguments, id=job_id)) + if process_id == "load_ml_model" and "id" in arguments: + if not arguments["id"].startswith("http"): + job_id, job_backend_id = JobIdMapping.parse_aggregator_job_id( + backends=self.backends, + aggregator_job_id=arguments["id"] + ) + assert job_backend_id == backend_id, f"{job_backend_id} != {backend_id}" + # Create new load_ml_model node dict with updated job id + return dict_merge(node, arguments=dict_merge(arguments, id=job_id)) return {k: preprocess(v) for k, v in node.items()} elif isinstance(node, list): return [preprocess(x) for x in node] diff --git a/tests/test_views.py b/tests/test_views.py index 15d9720c..5fa7ec3b 100644 --- a/tests/test_views.py +++ b/tests/test_views.py @@ -670,10 +670,43 @@ def post_result(request: requests.Request, context): pg = { "lr": {"process_id": "load_result", "arguments": {"id": job_id}}, "lc": {"process_id": "load_collection", "arguments": {"id": "S2"}}, - "merge": {"process_id": "merge_cubes", "arguments": { - "cube1": {"from_node": "lr"}, - "cube2": {"from_node": "lc"} - }} + } + api100.set_auth_bearer_token(token=TEST_USER_BEARER_TOKEN) + request = {"process": {"process_graph": pg}} + if expected_success: + api100.post("/result", json=request).assert_status_code(200) + assert (b1_mock.call_count, b2_mock.call_count) == {1: (1, 0), 2: (0, 1)}[s2_backend] + else: + api100.post("/result", json=request).assert_error(400, "BackendLookupFailure") + assert (b1_mock.call_count, b2_mock.call_count) == (0, 0) + + @pytest.mark.parametrize(["job_id", "s2_backend", "expected_success"], [ + ("b1-b6tch-j08", 1, True), + ("b2-b6tch-j08", 1, False), + ("b1-b6tch-j08", 2, False), + ("b2-b6tch-j08", 2, True), + ("https://example.com/ml_model_metadata.json", 1, True), # In this case it picks the first backend. + ("https://example.com/ml_model_metadata.json", 2, True), + ]) + def test_load_result_job_id_parsing_with_load_ml_model( + self, api100, requests_mock, backend1, backend2, job_id, s2_backend, expected_success + ): + """Issue #70: random forest: providing training job with aggregator job id fails""" + + backend_root = {1: backend1, 2: backend2}[s2_backend] + requests_mock.get(backend_root + "/collections", json={"collections": [{"id": "S2"}]}) + + def post_result(request: requests.Request, context): + pg = request.json()["process"]["process_graph"] + assert pg["lmm"]["arguments"]["id"] in ["b6tch-j08", "https://example.com/ml_model_metadata.json"] + context.headers["Content-Type"] = "application/json" + + b1_mock = requests_mock.post(backend1 + "/result", json=post_result) + b2_mock = requests_mock.post(backend2 + "/result", json=post_result) + + pg = { + "lmm": {"process_id": "load_ml_model", "arguments": {"id": job_id}}, + "lc": {"process_id": "load_collection", "arguments": {"id": "S2"}}, } api100.set_auth_bearer_token(token=TEST_USER_BEARER_TOKEN) request = {"process": {"process_graph": pg}} From 0c8541cf6b090ee5bb03b4c9a3a0ccef4e00f347 Mon Sep 17 00:00:00 2001 From: Stefaan Lippens Date: Thu, 15 Sep 2022 10:22:32 +0200 Subject: [PATCH 14/15] Issue #70 finetune load_ml_model handling a bit more (PR #71) --- CHANGELOG.md | 2 ++ src/openeo_aggregator/backend.py | 39 +++++++++++++++++++------------- 2 files changed, 25 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0ef3acb8..d8254d23 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed +- Properly rewrite model id in `load_ml_model` ([#70](https://github.com/Open-EO/openeo-aggregator/issues/70)) + ## [0.4.x] ### Added diff --git a/src/openeo_aggregator/backend.py b/src/openeo_aggregator/backend.py index 1c1cecba..63a5d773 100644 --- a/src/openeo_aggregator/backend.py +++ b/src/openeo_aggregator/backend.py @@ -371,13 +371,9 @@ def get_backend_for_process_graph(self, process_graph: dict, api_version: str) - ) backend_candidates = [b for b in backend_candidates if b == job_backend_id] elif process_id == "load_ml_model": - if not arguments["id"].startswith("http"): - # Extract backend id that can load this ML model. - _, job_backend_id = JobIdMapping.parse_aggregator_job_id( - backends=self.backends, - aggregator_job_id=arguments["id"] - ) - backend_candidates = [b for b in backend_candidates if b == job_backend_id] + model_backend_id = self._process_load_ml_model(arguments)[0] + if model_backend_id: + backend_candidates = [b for b in backend_candidates if b == model_backend_id] except Exception as e: _log.error(f"Failed to parse process graph: {e!r}", exc_info=True) raise ProcessGraphInvalidException() @@ -443,15 +439,10 @@ def preprocess(node: Any) -> Any: assert job_backend_id == backend_id, f"{job_backend_id} != {backend_id}" # Create new load_result node dict with updated job id return dict_merge(node, arguments=dict_merge(arguments, id=job_id)) - if process_id == "load_ml_model" and "id" in arguments: - if not arguments["id"].startswith("http"): - job_id, job_backend_id = JobIdMapping.parse_aggregator_job_id( - backends=self.backends, - aggregator_job_id=arguments["id"] - ) - assert job_backend_id == backend_id, f"{job_backend_id} != {backend_id}" - # Create new load_ml_model node dict with updated job id - return dict_merge(node, arguments=dict_merge(arguments, id=job_id)) + if process_id == "load_ml_model": + model_id = self._process_load_ml_model(arguments, expected_backend=backend_id)[1] + if model_id: + return dict_merge(node, arguments=dict_merge(arguments, id=model_id)) return {k: preprocess(v) for k, v in node.items()} elif isinstance(node, list): return [preprocess(x) for x in node] @@ -459,6 +450,22 @@ def preprocess(node: Any) -> Any: return preprocess(process_graph) + def _process_load_ml_model( + self, arguments: dict, expected_backend: Optional[str] = None + ) -> Tuple[Union[str, None], str]: + """Handle load_ml_model: detect/strip backend_id from model_id if it is a job_id""" + model_id = arguments.get("id") + if model_id and not model_id.startswith("http"): + # TODO: load_ml_model's `id` could also be file path (see https://github.com/Open-EO/openeo-processes/issues/384) + job_id, job_backend_id = JobIdMapping.parse_aggregator_job_id( + backends=self.backends, + aggregator_job_id=model_id + ) + if expected_backend and job_backend_id != expected_backend: + raise BackendLookupFailureException(f"{job_backend_id} != {expected_backend}") + return job_backend_id, job_id + return None, model_id + class AggregatorBatchJobs(BatchJobs): From f334bcb4f8a125c3944e33e2f9527dd9b71ca003 Mon Sep 17 00:00:00 2001 From: Stefaan Lippens Date: Thu, 15 Sep 2022 11:24:04 +0200 Subject: [PATCH 15/15] Update creodias back-end URL in dev config --- conf/aggregator.dev.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/aggregator.dev.py b/conf/aggregator.dev.py index 027b19ac..b8dc050a 100644 --- a/conf/aggregator.dev.py +++ b/conf/aggregator.dev.py @@ -52,7 +52,7 @@ "vito": "https://openeo-dev.vito.be/openeo/1.0/", "eodc": "https://openeo-dev.eodc.eu/v1.0/", # internal version of https://openeo.creo.vito.be/openeo/1.0/ - "creo": "https://openeo.creo.vgt.vito.be/openeo/1.0", + "creo": "https://openeo-dev.creo.vito.be/openeo/1.0", # Sentinel Hub OpenEO by Sinergise "sentinelhub": "https://openeo.sentinel-hub.com/production/", },