From 4b73e73ba5065b3cfa17046b8df746ffae2fb0de Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Thu, 29 Sep 2022 13:30:15 -0700 Subject: [PATCH 01/35] logs router first commit --- pangeo_forge_orchestrator/api.py | 2 ++ pangeo_forge_orchestrator/routers/logs.py | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+) create mode 100644 pangeo_forge_orchestrator/routers/logs.py diff --git a/pangeo_forge_orchestrator/api.py b/pangeo_forge_orchestrator/api.py index 9c29461c..6852f69f 100644 --- a/pangeo_forge_orchestrator/api.py +++ b/pangeo_forge_orchestrator/api.py @@ -8,6 +8,7 @@ from .http import http_session from .metadata import app_metadata from .routers.github_app import github_app_router +from .routers.logs import logs_router from .routers.model_router import router as model_router from .routers.repr import repr_router from .routers.stats import stats_router @@ -41,6 +42,7 @@ def on_shutdown(): http_session.stop() +app.include_router(logs_router) app.include_router(model_router) app.include_router(stats_router) app.include_router(github_app_router) diff --git a/pangeo_forge_orchestrator/routers/logs.py b/pangeo_forge_orchestrator/routers/logs.py new file mode 100644 index 00000000..85f3b791 --- /dev/null +++ b/pangeo_forge_orchestrator/routers/logs.py @@ -0,0 +1,18 @@ +from fastapi import APIRouter, Depends, Query +from sqlmodel import Session, SQLModel, select + +from ..dependencies import get_session as get_database_session + +logs_router = APIRouter() + + +@logs_router.get( + "/recipe_runs/{id}/logs", + summary=" ", + tags=["", "", "admin"], +) +async def get_recipe_run_logs( + id: int, + db_session: Session = Depends(get_database_session), +): + ... From e057902fef5b51b71352e3cfeff4e09526edffe8 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Thu, 29 Sep 2022 16:04:08 -0700 Subject: [PATCH 02/35] logs router WIP --- pangeo_forge_orchestrator/routers/logs.py | 60 ++++++++++++++++++++--- 1 file changed, 54 insertions(+), 6 deletions(-) diff --git a/pangeo_forge_orchestrator/routers/logs.py b/pangeo_forge_orchestrator/routers/logs.py index 85f3b791..42bf43e9 100644 --- a/pangeo_forge_orchestrator/routers/logs.py +++ b/pangeo_forge_orchestrator/routers/logs.py @@ -1,18 +1,66 @@ +import subprocess + from fastapi import APIRouter, Depends, Query -from sqlmodel import Session, SQLModel, select +from fastapi.responses import PlainTextResponse +from sqlmodel import Session, select -from ..dependencies import get_session as get_database_session +from ..dependencies import check_authentication_header, get_session as get_database_session logs_router = APIRouter() +def get_logs( + job_id="2022-09-29_11_31_40-14379398480910960453", # set from recipe_run + severity="ERROR", # set from query + limit=1, # set from query +): + query = ( + 'resource.type="dataflow_step" ' + f'AND resource.labels.job_id="{job_id}" ' + "AND logName=(" + '"projects/pangeo-forge-4967/logs/dataflow.googleapis.com%2Fjob-message" ' + 'OR "projects/pangeo-forge-4967/logs/dataflow.googleapis.com%2Flauncher") ' + f'AND severity="{severity}"' + ) + cmd = "gcloud logging read".split() + [query] + + if limit: + cmd += [f"--limit={limit}"] + + print(cmd) + logs = subprocess.check_output(cmd) + + return logs + + @logs_router.get( "/recipe_runs/{id}/logs", - summary=" ", - tags=["", "", "admin"], + summary="Get job logs for a recipe_run, specified by database id.", + tags=["recipe_run", "logs", "admin"], + response_class=PlainTextResponse, + dependencies=[Depends(check_authentication_header)], ) -async def get_recipe_run_logs( +async def logs_from_recipe_run_id( id: int, db_session: Session = Depends(get_database_session), ): - ... + logs = get_logs() + return logs + + +@logs_router.get( + "/feedstocks/{org}/{repo}/{commit}/{recipe_id}/logs", + summary="Get job logs for a recipe run, specified by feedstock_spec, commit, and recipe_id.", + tags=["feedstock", "logs", "admin"], + response_class=PlainTextResponse, + dependencies=[Depends(check_authentication_header)], +) +async def logs_from_feedstock_spec_commit_and_recipe_id( + org: str, + repo: str, + commit: str, + recipe_id: str, + db_session: Session = Depends(get_database_session), +): + logs = get_logs() + return logs From 4167666cb302fb63e56931f3e9679c7c47464675 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Thu, 29 Sep 2022 16:57:01 -0700 Subject: [PATCH 03/35] logs router cont. --- pangeo_forge_orchestrator/routers/logs.py | 67 +++++++++++++++++++---- 1 file changed, 55 insertions(+), 12 deletions(-) diff --git a/pangeo_forge_orchestrator/routers/logs.py b/pangeo_forge_orchestrator/routers/logs.py index 42bf43e9..7896e33c 100644 --- a/pangeo_forge_orchestrator/routers/logs.py +++ b/pangeo_forge_orchestrator/routers/logs.py @@ -1,25 +1,50 @@ +import json import subprocess -from fastapi import APIRouter, Depends, Query +from fastapi import APIRouter, Depends, HTTPException, Query from fastapi.responses import PlainTextResponse -from sqlmodel import Session, select +from sqlmodel import Session, SQLModel, select +from starlette import status from ..dependencies import check_authentication_header, get_session as get_database_session +from ..models import MODELS logs_router = APIRouter() +DEFAULT_SEVERITY = Query( + "ERROR", + description=( + "A valid gcloud logging severity as defined in " + "https://cloud.google.com/logging/docs/reference/v2/rest/v2/LogEntry#LogSeverity" + ), +) +DEFAULT_LIMIT = Query(1, description="Max number of log entries to return.") + + +def job_id_from_recipe_run(recipe_run: SQLModel) -> str: + try: + job_id = json.loads(recipe_run.message)["job_id"] + except (KeyError, json.JSONDecodeError) as e: + detail = ( + f"Message field of {recipe_run = } missing 'job_id'." + if type(e) == KeyError + else f"Message field of {recipe_run = } not JSON decodable." + ) + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=detail) + + return job_id + def get_logs( job_id="2022-09-29_11_31_40-14379398480910960453", # set from recipe_run severity="ERROR", # set from query limit=1, # set from query ): + log_name_prefix = "projects/pangeo-forge-4967/logs/dataflow.googleapis.com" query = ( 'resource.type="dataflow_step" ' f'AND resource.labels.job_id="{job_id}" ' - "AND logName=(" - '"projects/pangeo-forge-4967/logs/dataflow.googleapis.com%2Fjob-message" ' - 'OR "projects/pangeo-forge-4967/logs/dataflow.googleapis.com%2Flauncher") ' + f'AND logName=("{log_name_prefix}%2Fjob-message" OR "{log_name_prefix}%2Flauncher") ' f'AND severity="{severity}"' ) cmd = "gcloud logging read".split() + [query] @@ -27,9 +52,7 @@ def get_logs( if limit: cmd += [f"--limit={limit}"] - print(cmd) logs = subprocess.check_output(cmd) - return logs @@ -42,25 +65,45 @@ def get_logs( ) async def logs_from_recipe_run_id( id: int, + *, db_session: Session = Depends(get_database_session), + severity: str = DEFAULT_SEVERITY, + limit: int = DEFAULT_LIMIT, ): - logs = get_logs() + recipe_run = db_session.exec( + select(MODELS["recipe_run"].table).where(MODELS["recipe_run"].table.id == id) + ).one() + job_id = job_id_from_recipe_run(recipe_run) + logs = get_logs(job_id, severity, limit) return logs @logs_router.get( - "/feedstocks/{org}/{repo}/{commit}/{recipe_id}/logs", + "/feedstocks/{feedstock_spec:path}/{commit}/{recipe_id}/logs", summary="Get job logs for a recipe run, specified by feedstock_spec, commit, and recipe_id.", tags=["feedstock", "logs", "admin"], response_class=PlainTextResponse, dependencies=[Depends(check_authentication_header)], ) async def logs_from_feedstock_spec_commit_and_recipe_id( - org: str, - repo: str, + feedstock_spec: str, commit: str, recipe_id: str, + *, db_session: Session = Depends(get_database_session), + severity: str = DEFAULT_SEVERITY, + limit: int = DEFAULT_LIMIT, ): - logs = get_logs() + feedstock = db_session.exec( + select(MODELS["feedstock"].table).where(MODELS["feedstock"].table.spec == feedstock_spec) + ).one() + statement = ( + select(MODELS["recipe_run"].table) + .where(MODELS["recipe_run"].table.recipe_id == recipe_id) + .where(MODELS["recipe_run"].table.head_sha == commit) + .where(MODELS["recipe_run"].table.feedstock_id == feedstock.id) + ) + recipe_run = db_session.exec(statement).one() + job_id = job_id_from_recipe_run(recipe_run) + logs = get_logs(job_id, severity, limit) return logs From 057760f49128853e0830d1b6d206eff6e8ccb6b3 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Thu, 29 Sep 2022 17:40:11 -0700 Subject: [PATCH 04/35] test logs router first commit --- tests/routers/test_logs.py | 41 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 tests/routers/test_logs.py diff --git a/tests/routers/test_logs.py b/tests/routers/test_logs.py new file mode 100644 index 00000000..4c66d4a0 --- /dev/null +++ b/tests/routers/test_logs.py @@ -0,0 +1,41 @@ +import json + +import pytest +from fastapi import HTTPException + +from pangeo_forge_orchestrator.models import MODELS +from pangeo_forge_orchestrator.routers.logs import job_id_from_recipe_run + + +@pytest.mark.parametrize( + "message, expected_error", + [ + ('{"job_id": "123"}', None), + ('{"job_name": "123"}', KeyError), + ('"job_id": "123"', json.JSONDecodeError), + ], +) +def test_job_id_from_recipe_run(message, expected_error): + recipe_run_kws = { + "recipe_id": "liveocean", + "bakery_id": 1, + "feedstock_id": 1, + "head_sha": "35d889f7c89e9f0d72353a0649ed1cd8da04826b", + "version": "", + "started_at": "2022-09-19T16:31:43", + "completed_at": None, + "conclusion": None, + "status": "in_progress", + "is_test": True, + "dataset_type": "zarr", + "dataset_public_url": None, + "message": message, + "id": 1, + } + recipe_run = MODELS["recipe_run"].table(**recipe_run_kws) + if not expected_error: + job_id = job_id_from_recipe_run(recipe_run) + assert job_id == json.loads(message)["job_id"] + else: + with pytest.raises(HTTPException): + job_id_from_recipe_run(recipe_run) From 550bd8609e1b33d6b68cd83febbdb39811d8271d Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Thu, 29 Sep 2022 17:56:59 -0700 Subject: [PATCH 05/35] test get_logs helper --- tests/routers/test_logs.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/tests/routers/test_logs.py b/tests/routers/test_logs.py index 4c66d4a0..f26d76ae 100644 --- a/tests/routers/test_logs.py +++ b/tests/routers/test_logs.py @@ -1,10 +1,12 @@ import json +import subprocess +from typing import List import pytest from fastapi import HTTPException from pangeo_forge_orchestrator.models import MODELS -from pangeo_forge_orchestrator.routers.logs import job_id_from_recipe_run +from pangeo_forge_orchestrator.routers.logs import get_logs, job_id_from_recipe_run @pytest.mark.parametrize( @@ -39,3 +41,21 @@ def test_job_id_from_recipe_run(message, expected_error): else: with pytest.raises(HTTPException): job_id_from_recipe_run(recipe_run) + + +@pytest.mark.parametrize( + "gcloud_logging_response", + ["Some logs returned by gcloud logging API"], +) +def test_get_logs(mocker, gcloud_logging_response): + def mock_gcloud_logging_call(cmd: List[str]): + return gcloud_logging_response + + mocker.patch.object(subprocess, "check_output", mock_gcloud_logging_call) + + logs = get_logs( + job_id="2022-09-29_11_31_40-14379398480910960453", + severity="ERROR", + limit=1, + ) + assert logs == gcloud_logging_response From 79204aee4474d551c23c27fd28d86f985eafe494 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Thu, 29 Sep 2022 18:23:05 -0700 Subject: [PATCH 06/35] test get logs route --- tests/routers/test_logs.py | 133 +++++++++++++++++++++++++++++++++++++ 1 file changed, 133 insertions(+) diff --git a/tests/routers/test_logs.py b/tests/routers/test_logs.py index f26d76ae..0ace9db7 100644 --- a/tests/routers/test_logs.py +++ b/tests/routers/test_logs.py @@ -3,11 +3,14 @@ from typing import List import pytest +import pytest_asyncio from fastapi import HTTPException from pangeo_forge_orchestrator.models import MODELS from pangeo_forge_orchestrator.routers.logs import get_logs, job_id_from_recipe_run +from ..conftest import clear_database + @pytest.mark.parametrize( "message, expected_error", @@ -59,3 +62,133 @@ def mock_gcloud_logging_call(cmd: List[str]): limit=1, ) assert logs == gcloud_logging_response + + +@pytest_asyncio.fixture +async def get_logs_fixture( + api_key, + async_app_client, + request, +): + admin_headers = {"X-API-Key": api_key} + bakery_create_response = await async_app_client.post( + "/bakeries/", + json={ + "region": "us-central1", + "name": "pangeo-ldeo-nsf-earthcube", + "description": "A great bakery to test with!", + }, + headers=admin_headers, + ) + assert bakery_create_response.status_code == 200 + feedstock_create_response = await async_app_client.post( + "/feedstocks/", + json={"spec": request.param["feedstock_spec"]}, + headers=admin_headers, + ) + assert feedstock_create_response.status_code == 200 + recipe_run_kws = { + "recipe_id": request.param["recipe_id"], + "bakery_id": 1, + "feedstock_id": 1, + "head_sha": request.param["commit"], + "version": "", + "started_at": "2022-09-19T16:31:43", + "completed_at": None, + "conclusion": None, + "status": "in_progress", + "is_test": True, + "dataset_type": "zarr", + "dataset_public_url": None, + "message": request.param["message"], + "id": 1, + } + recipe_run_response = await async_app_client.post( + "/recipe_runs/", + json=recipe_run_kws, + headers=admin_headers, + ) + assert recipe_run_response.status_code == 200 + yield ( + admin_headers, + request.param["gcloud_logging_response"], + request.param["feedstock_spec"], + request.param["commit"], + request.param["recipe_id"], + ) + # database teardown + clear_database() + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "get_logs_fixture", + [ + dict( + message='{"job_id": "abc"}', + feedstock_spec="pangeo-forge/staged-recipes", + commit="35d889f7c89e9f0d72353a0649ed1cd8da04826b", + recipe_id="liveocean", + gcloud_logging_response="Some logging message from gcloud API.", + ), + ], + indirect=True, +) +async def test_get_logs_via_recipe_run_id( + mocker, + get_logs_fixture, + async_app_client, +): + admin_headers, gcloud_logging_response, *_ = get_logs_fixture + + def mock_gcloud_logging_call(cmd: List[str]): + return gcloud_logging_response + + mocker.patch.object(subprocess, "check_output", mock_gcloud_logging_call) + + response = await async_app_client.get( + "/recipe_runs/1/logs", + headers=admin_headers, + ) + assert response.status_code == 200 + assert response.text == gcloud_logging_response + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "get_logs_fixture", + [ + dict( + message='{"job_id": "abc"}', + feedstock_spec="pangeo-forge/staged-recipes", + commit="35d889f7c89e9f0d72353a0649ed1cd8da04826b", + recipe_id="liveocean", + gcloud_logging_response="Some logging message from gcloud API.", + ), + ], + indirect=True, +) +async def test_get_logs_human_readable_method( + mocker, + get_logs_fixture, + async_app_client, +): + ( + admin_headers, + gcloud_logging_response, + feedstock_spec, + commit, + recipe_id, + ) = get_logs_fixture + + def mock_gcloud_logging_call(cmd: List[str]): + return gcloud_logging_response + + mocker.patch.object(subprocess, "check_output", mock_gcloud_logging_call) + + response = await async_app_client.get( + f"/feedstocks/{feedstock_spec}/{commit}/{recipe_id}/logs", + headers=admin_headers, + ) + assert response.status_code == 200 + assert response.text == gcloud_logging_response From 94283aad56844811e899f4619268c403222dc4a7 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Thu, 29 Sep 2022 18:46:45 -0700 Subject: [PATCH 07/35] drop defaults from get_logs helper --- pangeo_forge_orchestrator/routers/logs.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pangeo_forge_orchestrator/routers/logs.py b/pangeo_forge_orchestrator/routers/logs.py index 7896e33c..dc08fc72 100644 --- a/pangeo_forge_orchestrator/routers/logs.py +++ b/pangeo_forge_orchestrator/routers/logs.py @@ -36,9 +36,9 @@ def job_id_from_recipe_run(recipe_run: SQLModel) -> str: def get_logs( - job_id="2022-09-29_11_31_40-14379398480910960453", # set from recipe_run - severity="ERROR", # set from query - limit=1, # set from query + job_id: str, + severity: str, + limit: int, ): log_name_prefix = "projects/pangeo-forge-4967/logs/dataflow.googleapis.com" query = ( From aed5d975a346d65e71e14ff0710626d968b55628 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Thu, 29 Sep 2022 19:00:24 -0700 Subject: [PATCH 08/35] on run, save job_name & job_id in recipe_run message --- .../routers/github_app.py | 9 +++++ tests/github_app/mock_pangeo_forge_runner.py | 2 -- tests/github_app/test_helpers_run.py | 36 ++++++++++++++++--- 3 files changed, 40 insertions(+), 7 deletions(-) diff --git a/pangeo_forge_orchestrator/routers/github_app.py b/pangeo_forge_orchestrator/routers/github_app.py index 57d185e3..47afc946 100644 --- a/pangeo_forge_orchestrator/routers/github_app.py +++ b/pangeo_forge_orchestrator/routers/github_app.py @@ -620,6 +620,15 @@ async def run( try: out = subprocess.check_output(cmd) logger.debug(f"Command output is {out.decode('utf-8')}") + out = subprocess.check_output(cmd) + for line in out.splitlines(): + p = json.loads(line) + if p["status"] == "submitted": + recipe_run.message = json.dumps( + dict(job_name=p["job_name"], job_id=p["job_id"]) + ) + db_session.add(recipe_run) + db_session.commit() except subprocess.CalledProcessError as e: for line in e.output.splitlines(): p = json.loads(line) diff --git a/tests/github_app/mock_pangeo_forge_runner.py b/tests/github_app/mock_pangeo_forge_runner.py index 51bf6b5d..18aaa066 100644 --- a/tests/github_app/mock_pangeo_forge_runner.py +++ b/tests/github_app/mock_pangeo_forge_runner.py @@ -43,8 +43,6 @@ def mock_subprocess_check_output(cmd: List[str]): '{"message": "HEAD is now at 0fd9b13 Update foo.txt\\n", "status": "fetching"}\n' '{"message": "Expansion complete", "status": "completed", "meta": {"title": "Global Precipitation Climatology Project", "description": "Global Precipitation Climatology Project (GPCP) Daily Version 1.3 gridded, merged ty satellite/gauge precipitation Climate data Record (CDR) from 1996 to present.\\n", "pangeo_forge_version": "0.9.0", "pangeo_notebook_version": "2022.06.02", "recipes": [{"id": "gpcp", "object": "recipe:recipe"}], "provenance": {"providers": [{"name": "NOAA NCEI", "description": "National Oceanographic & Atmospheric Administration National Centers for Environmental Information", "roles": ["host", "licensor"], "url": "https://www.ncei.noaa.gov/products/global-precipitation-climatology-project"}, {"name": "University of Maryland", "description": "University of Maryland College Park Earth System Science Interdisciplinary Center (ESSIC) and Cooperative Institute for Climate and Satellites (CICS).\\n", "roles": ["producer"], "url": "http://gpcp.umd.edu/"}], "license": "No constraints on data access or use."}, "maintainers": [{"name": "Ryan Abernathey", "orcid": "0000-0001-5999-4917", "github": "rabernat"}], "bakery": {"id": "pangeo-ldeo-nsf-earthcube"}}}\n' ) - elif cmd[1] == "bake": - return '{"message": ""}'.encode("utf-8") else: raise NotImplementedError(f"Command {cmd} not implemented in tests.") else: diff --git a/tests/github_app/test_helpers_run.py b/tests/github_app/test_helpers_run.py index 22fb2890..9858cc34 100644 --- a/tests/github_app/test_helpers_run.py +++ b/tests/github_app/test_helpers_run.py @@ -2,7 +2,9 @@ This module tests the `run` function, which is a special case. """ +import json import subprocess +from typing import List import pytest import pytest_asyncio @@ -15,10 +17,7 @@ from ..conftest import clear_database from .fixtures import _MockGitHubBackend, get_mock_github_session -from .mock_pangeo_forge_runner import ( - mock_subprocess_check_output, - mock_subprocess_check_output_raises_called_process_error, -) +from .mock_pangeo_forge_runner import mock_subprocess_check_output_raises_called_process_error @pytest_asyncio.fixture( @@ -107,11 +106,38 @@ async def run_fixture( @pytest.mark.asyncio -async def test_run(mocker, run_fixture): +@pytest.mark.parametrize( + "job_name, job_id", + [ + ( + "a6170692e70616e67656f2d666f7267652e6f7267251162", + "2022-09-28_13_30_25-9666562633575851936", + ) + ], +) +async def test_run(mocker, run_fixture, job_name, job_id, async_app_client): run_kws = run_fixture + + def mock_subprocess_check_output(cmd: List[str]): + loglines = [ + { + "message": f"Submitted job {job_id} for recipe casm-soil-moisture", + "recipe": "casm-soil-moisture", + "job_name": job_name, + "job_id": job_id, + "status": "submitted", + }, + ] + return "\n".join([json.dumps(line) for line in loglines]).encode("utf-8") + mocker.patch.object(subprocess, "check_output", mock_subprocess_check_output) await run(**run_kws) + recipe_run = await async_app_client.get("recipe_runs/1") + recipe_run_message = recipe_run.json()["message"] + assert json.loads(recipe_run_message)["job_name"] == job_name + assert json.loads(recipe_run_message)["job_id"] == job_id + @pytest.mark.xfail(reason="https://github.com/pangeo-forge/pangeo-forge-orchestrator/issues/132") @pytest.mark.asyncio From b7244ac5ab7badda48a49d11569cc0788ed34830 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Thu, 29 Sep 2022 19:12:23 -0700 Subject: [PATCH 09/35] fix issue comment test --- tests/github_app/mock_pangeo_forge_runner.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/github_app/mock_pangeo_forge_runner.py b/tests/github_app/mock_pangeo_forge_runner.py index 18aaa066..3db94e29 100644 --- a/tests/github_app/mock_pangeo_forge_runner.py +++ b/tests/github_app/mock_pangeo_forge_runner.py @@ -43,6 +43,16 @@ def mock_subprocess_check_output(cmd: List[str]): '{"message": "HEAD is now at 0fd9b13 Update foo.txt\\n", "status": "fetching"}\n' '{"message": "Expansion complete", "status": "completed", "meta": {"title": "Global Precipitation Climatology Project", "description": "Global Precipitation Climatology Project (GPCP) Daily Version 1.3 gridded, merged ty satellite/gauge precipitation Climate data Record (CDR) from 1996 to present.\\n", "pangeo_forge_version": "0.9.0", "pangeo_notebook_version": "2022.06.02", "recipes": [{"id": "gpcp", "object": "recipe:recipe"}], "provenance": {"providers": [{"name": "NOAA NCEI", "description": "National Oceanographic & Atmospheric Administration National Centers for Environmental Information", "roles": ["host", "licensor"], "url": "https://www.ncei.noaa.gov/products/global-precipitation-climatology-project"}, {"name": "University of Maryland", "description": "University of Maryland College Park Earth System Science Interdisciplinary Center (ESSIC) and Cooperative Institute for Climate and Satellites (CICS).\\n", "roles": ["producer"], "url": "http://gpcp.umd.edu/"}], "license": "No constraints on data access or use."}, "maintainers": [{"name": "Ryan Abernathey", "orcid": "0000-0001-5999-4917", "github": "rabernat"}], "bakery": {"id": "pangeo-ldeo-nsf-earthcube"}}}\n' ) + elif cmd[1] == "bake": + loglines = [ + { + "message": "Submitted job ABC for recipe run EFG", + "job_id": "ABC", + "job_name": "a132456", + "status": "submitted", + }, + ] + return "\n".join([json.dumps(line) for line in loglines]).encode("utf-8") else: raise NotImplementedError(f"Command {cmd} not implemented in tests.") else: From 4c854058da6ea7bf393ff137ec7b07edce6c253b Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Thu, 29 Sep 2022 19:17:02 -0700 Subject: [PATCH 10/35] preserve existing message, if there is one --- pangeo_forge_orchestrator/routers/github_app.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pangeo_forge_orchestrator/routers/github_app.py b/pangeo_forge_orchestrator/routers/github_app.py index 47afc946..06a8bb1d 100644 --- a/pangeo_forge_orchestrator/routers/github_app.py +++ b/pangeo_forge_orchestrator/routers/github_app.py @@ -625,7 +625,8 @@ async def run( p = json.loads(line) if p["status"] == "submitted": recipe_run.message = json.dumps( - dict(job_name=p["job_name"], job_id=p["job_id"]) + json.loads(recipe_run.message) + | dict(job_name=p["job_name"], job_id=p["job_id"]) ) db_session.add(recipe_run) db_session.commit() From 9198d9b24bc96459730caef6cea18b4447bc9b40 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Thu, 29 Sep 2022 19:20:57 -0700 Subject: [PATCH 11/35] fix existing message syntax --- pangeo_forge_orchestrator/routers/github_app.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pangeo_forge_orchestrator/routers/github_app.py b/pangeo_forge_orchestrator/routers/github_app.py index 06a8bb1d..7f42abd2 100644 --- a/pangeo_forge_orchestrator/routers/github_app.py +++ b/pangeo_forge_orchestrator/routers/github_app.py @@ -624,9 +624,9 @@ async def run( for line in out.splitlines(): p = json.loads(line) if p["status"] == "submitted": + existing_message = json.loads(recipe_run.message) if recipe_run.message else {} recipe_run.message = json.dumps( - json.loads(recipe_run.message) - | dict(job_name=p["job_name"], job_id=p["job_id"]) + existing_message | dict(job_name=p["job_name"], job_id=p["job_id"]) ) db_session.add(recipe_run) db_session.commit() From ef92a15ad9363bdf3a2336f320124e649737fbef Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Thu, 29 Sep 2022 20:19:56 -0700 Subject: [PATCH 12/35] add query job logs doc --- docs/README.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/docs/README.md b/docs/README.md index eee8d7ab..ea6fec7a 100644 --- a/docs/README.md +++ b/docs/README.md @@ -32,6 +32,8 @@ external to the above-listed teams to contribute code to this repository. compute & storage backends. - [Bakeries: job status monitoring](#bakeries-job-status-monitoring) - This is how the FastAPI app knows when compute jobs have concluded. Terraform for this infra is run on each Heroku deploy. +- [Bakeries: querying job logs](#bakeries-querying-job-logs) - Query bakery job logs for recipe runs. + Especially useful for diagnosing failed jobs. - [Security](#security) - Some notes on security. - [Testing](#testing) - Automated testing in both local envs and containerized contexts. - [GitHub App: manual API calls](#github-app-manual-api-calls) - How to manually call the GitHub API _as a GitHub App_. @@ -639,6 +641,18 @@ This project layout is based directly on https://blog.gruntwork.io/how-to-manage See also [Resource locations](#resource-locations) for a table referencing these terraform envs. +# Bakeries: querying job logs + +To query logs for recipe runs, see the API docs at the `/docs#/logs` route of the deployment +(https://api.pangeo-forge.org/docs#/logs for the production deployment). +These routes are protected, due to the risk of leaking secrets in the logs. The +`PANGEO_FORGE_API_KEY` is available to admins via in the deployments secrets config: + +```console +$ sops -d -i secrets/config.${PANGEO_FORGE_DEPLOYMENT}.yaml +# cat secrets/config.${PANGEO_FORGE_DEPLOYMENT}.yaml +``` + # Security ## Database API From a484fab88f765e66811d85943d4ff57ec0fb27b3 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Thu, 29 Sep 2022 20:47:53 -0700 Subject: [PATCH 13/35] update secrets + bakeries config for pr 150 --- ...geo-ldeo-nsf-earthcube.pforge-pr-150.yaml} | 0 secrets/config.pforge-pr-149.yaml | 26 ------------------- secrets/config.pforge-pr-150.yaml | 26 +++++++++++++++++++ 3 files changed, 26 insertions(+), 26 deletions(-) rename bakeries/{pangeo-ldeo-nsf-earthcube.pforge-pr-149.yaml => pangeo-ldeo-nsf-earthcube.pforge-pr-150.yaml} (100%) delete mode 100644 secrets/config.pforge-pr-149.yaml create mode 100644 secrets/config.pforge-pr-150.yaml diff --git a/bakeries/pangeo-ldeo-nsf-earthcube.pforge-pr-149.yaml b/bakeries/pangeo-ldeo-nsf-earthcube.pforge-pr-150.yaml similarity index 100% rename from bakeries/pangeo-ldeo-nsf-earthcube.pforge-pr-149.yaml rename to bakeries/pangeo-ldeo-nsf-earthcube.pforge-pr-150.yaml diff --git a/secrets/config.pforge-pr-149.yaml b/secrets/config.pforge-pr-149.yaml deleted file mode 100644 index 62cc5afd..00000000 --- a/secrets/config.pforge-pr-149.yaml +++ /dev/null @@ -1,26 +0,0 @@ -fastapi: - PANGEO_FORGE_API_KEY: ENC[AES256_GCM,data:7Fl9MprZdsP0wv/dgfAXNX8sr2yk9hi8ZgcZ+j+uB98=,iv:Gg0n9udubSlTRvoAhQWk04QlCXOhK/G10KOHI+90G0Q=,tag:jGXmomUXbfS2kJhQJtMv0Q==,type:str] -github_app: - app_name: ENC[AES256_GCM,data:ueMUc3y011nxB5QA4g==,iv:3NBte4vuH2v3PNMD3pDXG7wkpQnwNYqmqS3IFTrtIVw=,tag:Ii4lwFrw5VQMb3jv5sKi4g==,type:str] - id: ENC[AES256_GCM,data:9JhA2xeW,iv:dNKwSPh3wENi9+77UsvcZQdXdDuHNaR/ayQbUs8uSW0=,tag:QldaPE7E/r0eaOzV2qlspQ==,type:int] - private_key: ENC[AES256_GCM,data:LUA+Gp/leh/7X9ITbzy4grryiXn4pfyIS4dLL9iFrPSG6hekOslQlaKObKGToE+tQ+3ipSxrpd+j814EU1h2weL5R43OUO3/jv+CvOBoi7WB1sMeIDopBZ2rYiplVSUKWSbFlG6kSOay/r1eS1aOrvAo7ZQnwv4jV5Y6LQA/6X6rD+pUFTGnVDnjO3tzqv/62ONwezQH5BgtkWPej8hai2zGT3JCbo1Nk9d+BiZaSDlypGZ1S2FwNGMrYNhZiU8IQYG4DM/UQMYRil4WOiIoCIGtD24O0Z0d9gVXVG0YiNoa9EzABBI1oJU4y549pNVP2AWZxnCJEd6bAq6QTHOgVoRj/6bWzyzssNB2U1YkV8T8i2juYHYNo3dOKl/E0JLvSrEgmOcuRBb+H+AjNRb/4HBCLoMGRaA3k8DBvpkxtaPLqmHlY1xR2r1uvagc2lGdNgnOBkekOTw91CyRJBt+z+Wcb7xv+3WhWBMpm2ks8X6OV7eMYZhFwk9iEF3Ze79ZGJVUbTp8lt0rBgxqsfCy9WC5TUKQlubHIau2JOVDt9PZmD2PtThBZG5CZa8SUNLYaTFMMmnj5iW704NVW/zslC8iBrCWXlWKSZ7n7Eu0ZPlogtuf95fvltWHMlwnSM0vaixtOX7wbaDjirlRGk7t86VFyZ35YJutuUuvucjgQf7gRNe5qlJTXLj++nplY8wOs0FYf02ZXBBAAucZb++p9WVDbWzxS8PRz38CZTRGEZGs4czkuS9EHBrtGrDcTz7bVrSbdxx6wKXDeXebuXNmurMwa8yrT/MtGL6dGCokLQWULb1NhcOGJi5vGPLDkBBXL8HwoCaAerqlioRkx+szTv0zOlkWSZ/+61uRgAGZ/4rmRvLROvRav7xGfrJeOvOUc9onHJmh3V9rbiaV1EKlbRQnRHhqhOX25t1oOOCy2fyGkDgzcLDFMQ7cD5ktuujIoB0hO1fqr9QRYAbajZFtSvPDebchxZpL9Hw+EyguNX9+K2qOaFCokdnOQX+TqrQGrBk5kDuwxuadr5Nf6YDUIAfn3LRlJkQY9L01fTYrGSlTkkqqxrXndBrHnXTpYGSg1vJQ0qjgwuK9Uy/dNgsJM+81XzYI+H8C6LophuyQQIDE1Ppu6pw7DmYIkir0IRV7evqz2cyjOvj4ZhqYXhQReauNYURqbZCFKDDNbfeBPBSdLnBdYF9xBQqrkhc4A1iBb4r6DI0M9cfHdUoA9/f9/J2v2N5htCd1NfCy0luEdJLWZTMMCyK7mX1uQcLpGit2HK9Xedzs/779+t9Z5yyKVm60pps6Vj9+qz5QE1clwELjfxNWs9nAM6V/tOyUofKf1g4wcXieqZTcAw7kbHPO04doLD7z4g+sTXfjEueahoNyzdl7VDbelCezFJZjl5zN2Z0R0+RDmWvFQcBS5mUeeivAiwkyOw7tqsgWPNl3wGxmJOeMZ7hqpgMT0BnjVpE8vLb+qNE0JzR/MMKdBpS0JuBzG5YjKETXwWFOR9ojvgvwUgkj8ixB16/WEsTW2agLO7i0Vx8NKz85/UixNmhNfvOkqgV7fF07S38mxJaaYHyDYb9telXiWuYX85bESx5iH+ajD8nHqnqvdcRw8Eng3+jMtsaQbA620D9oIec+K6HOzEueSiZzLgVwR9+1RKviwu6UFOyIFcFfyL6mi023gfoQ5NrqG1kye1KwV4Oh1VquLSPf4DI5gOl06zZeG9klGvE6bW/m5WAIVGVc2vX3ulStRRJfBC84Qamj0UTYvo4EbAzMdbrUQqe45XEEvIdH1b19n0Dw46x2ipVIB+0QN3uETM3/P1kLDmumhmNRfhbFdMqT+f1pMadR7RZKegbiKmXNCLeqz/sMMNoBSFUZL3G2WKVdDyWRkIRnsLYxQnhkx9swGYzHfMLGYIvgW4BmSurJENGhtvnKb96qFBwu8yquqjXcQItomhwCugc8MywPuLcQ4c5YiIIWqYjs/Rk0Fc0yY1Rei70B6Z7pwVtA77UBdZFafWgrn4oEAGCx40A7p3Z/nhkKyIP9Rh/DTw50QhqfW6lZAU9LEPFbV+eIa43jkVwL/CWG3eU+d2WWBoGGUD6IR9jGEiMWc9NcXGDbFiVeNvRRYR4K/eML9UM3qvKQpZFDH+wQlJ2lRA6RSIGuo0cReg1GWrMf486AQ9pRSnlITeuh2AlX4f8DQPUDop/AWbddpmZTcUARHpjExrGQWW1PUTJCcIgKdb00/N8=,iv:CqyowrRrRNG1cLTOAuGp8mJ5ID44lNxKVcgb/Bhhkwc=,tag:YzChKnCMY4MCuW0g0hhqsA==,type:str] - webhook_secret: ENC[AES256_GCM,data:OIOaG9VXJhSx5R0lkF3QSBz49NwUkVnQT1AAl/LOVtPMUvv4ROfsqw==,iv:WCCGQLIrBJSReB7I4jOc8xZXYBoKoe9fwKB+bJlnjI8=,tag:nmoCfbChHp1ccY88LB+GOQ==,type:str] -sops: - kms: - - arn: arn:aws:kms:us-east-1:256317687910:key/d8b153c3-20a9-4364-a553-94405d4c1027 - created_at: "2022-09-30T00:01:45Z" - enc: AQICAHgpH4G+b2ULBcvMucaHVvQi3QdX1B0xlVvF3iFfhxUVOwFCzIdgct5+uZZ0cvbViR7qAAAAfjB8BgkqhkiG9w0BBwagbzBtAgEAMGgGCSqGSIb3DQEHATAeBglghkgBZQMEAS4wEQQMes/HVEmqw9cUh/X5AgEQgDsguVE9ZK6Rlj+ZnPN7V58oIFO98p2UgHFJT9aopDRoQIGmGrTPNQBE1u0V10b4RerRHj5H1dCuB+DGdg== - aws_profile: "" - - arn: arn:aws:kms:us-west-1:256317687910:key/0f31de65-bcc9-4fef-b9f5-67ce086f532e - created_at: "2022-09-30T00:01:45Z" - enc: AQICAHiYQfpCkhiXnfU8apZYPITKv6caFhMAr7Hx04ufaTWvvgG00mJl+iZYTkmvpiFfx55VAAAAfjB8BgkqhkiG9w0BBwagbzBtAgEAMGgGCSqGSIb3DQEHATAeBglghkgBZQMEAS4wEQQM1ogvUw81byHA+tV0AgEQgDslOUsjnOfGmftUAS/vZ5zFu24vYXJOauEWFU83ujVAZbWvaGAzKZkI+tW8b98VS7NC5kylFcJED85n1g== - aws_profile: "" - gcp_kms: [] - azure_kv: [] - hc_vault: [] - age: [] - lastmodified: "2022-09-30T00:01:46Z" - mac: ENC[AES256_GCM,data:bpBkZzCFwWJTFOHo9/eNjHMWFxTDjPmKsk2i5Yx+4uWLuHdopUFTYAbKuFSxCYQQPMglV0x1p6ASXwNGQVoEoTzSs8OBrx0dWPDsYrzo30+CduRfvvU8sBRi0Sk3TltI8DkgE8CFEpQYqosVgPhkFlt1qNmxu3h1QJMLzBt7yys=,iv:h11zskx9VfWaYnq/r5C0QGwgej+jY9dA+RVus+q/ofg=,tag:3weaTS5/69fHOo26lnmlhg==,type:str] - pgp: [] - unencrypted_suffix: _unencrypted - version: 3.7.3 diff --git a/secrets/config.pforge-pr-150.yaml b/secrets/config.pforge-pr-150.yaml new file mode 100644 index 00000000..c0bfd3cf --- /dev/null +++ b/secrets/config.pforge-pr-150.yaml @@ -0,0 +1,26 @@ +fastapi: + PANGEO_FORGE_API_KEY: ENC[AES256_GCM,data:ZdjyPYHZR8r6Kl6NiuISeG25jnC0VQybft5aIZffJn0=,iv:5XFvhDxGuO11KHYCZw0niPbDuwDXAyYez9m8LszLB9s=,tag:BI1JCvOe7iZ5kxiihA0fBw==,type:str] +github_app: + app_name: ENC[AES256_GCM,data:1YoO1BhTGsgBTLtXPQ==,iv:fli0ZIq+w81ki7kVocNtOmBRz0j9X5EkqFNMO1XMx9g=,tag:9J+mI5z1TYElQ6qAIYfJuA==,type:str] + id: ENC[AES256_GCM,data:AHfnXieL,iv:MKEe6+n/2PZ7JXq69wHhukMS3KhynQR7jiveMkz0NoQ=,tag:SWDUstmTIdXhKbtjOk3/9w==,type:int] + private_key: ENC[AES256_GCM,data:7wSiBPM4KiC7TBuQXW3OL1q7n2q0TRZoZ5enyRVh8mu1bT3zXM1Vlg6TzsghrOCognYaI73Hb+toMKX91M6Q2GuOP29fKtbZQuddPrm6TLoTQf7LWmvsOGjD4NEEOVwF83flbwMZ3p7qTjlKpSJ1O2637lKhd5FrzX0rHYKSF6dJ24zpz7Z+FUjHrACEkKn+MOX0ni8LdvEu5jMt6J0KiaTswwSXbsIUH6OJl47Gyr/OTtLGARTTiMf+HCMmH8Q5Bo62GzV7IEfBK9k9XbtYlmM6nUbasbSJ/TtiZ2w5gDnJ6AcSB37FXuRDUIzQeQSUZXhSEVnIvSi5fvcui/oLva6yzVUw/+zfk2vx5hocBevHKLCMHulJNhaSCpac0QOYMakMNH2VL4KJXHCrlkfS65TPZnNZEMdWrbjkEDypGP0wv9gs5S5H2PsUfxtyZdvTEKflfwtr70NlhQ2u3QvH0Oy1qMf4/v1+w/K5DGeCIvsJTFcf68yDomEfOE1XqvHmAbyW3GkTpVM47rWsUIcJLw/VloQyjeS/vL8+/jQkhza/Tsx5thYPXabFW7Ud/Yg7jL7O3ytUIZVQ0UqC3HbteFlWWceSVPa/jIYfv2rdlAyDn6mWN/IRdzXESlQqR2oxFSF6Ndysf580dpzpaKUNuj4wfzkvPBpXAW0zlokxPsPAnqii166uRCYKBTOcpzQC2mGcgRzIy689XacMxKjkeezZ1klL63ewsTlOXTt/v0fDZUXGfqDaSIbXgUIlHnqCjiUFwXbvptKGHVmNVFCtHFNuliiafnc5TCFdXIanQE76uM8keBc7D6h7sEn/NyW1AA18QU1W5vzCqC/4qrYlg1qubBF7zGThxjFWYyWH2mplP+Kz785pUAiCqEyu4daKXJlIwbM0ZD7zMG84LZtt7Iwjk5YB5xPCe00n4vmh0e3tJ1mGm/Stoc94Borse5BugzG0jCP26pgRfGP41yOiBPwah6ObEryM8LeDgG0wIs2MNtnwWTxQQ5vOF7Ij44yrZdTEIDXmWfbKaXrXVZJUe7Uu8FwE+rE+RIo6IMffZISMTSsmQnEdcCF7QRyzTp5xgDvXlVEK96GB2nmBmZfcmltzhdgAR6trJoX9JQf9+Ke1Sp72BKdf9BMYVcd959eq7yCBipwar4wHbs08++vM7HLawc+3VjG+DgsZMKtT6vw9yXq9f8PoKKC1WvqfMwK/j7xAMdulHb0Um3xDJ4iPAKXvjncQQg+eYXrS2Kc0srjuMzu1oLYhHrGTsILIC4xiKuw8FODp1RCA61FINj2b5adBMTVGSLLQ1G9oWNEVJz1fu++UneuJ74LOEy2ec34Twu4S7+JFNtGidH/nWm09lzsiUaSfqmc82LUj53U4OuNk17iGHIm6cnMLn7AfcwbCbDjCX9HKuSKzAfXnHb3ZmxZ9X/9+/DWbvcQy8FVP6N7rzIJ5AwgMOMsJHkQ4kIBjT5B3n2DTbAMt24qaxE4BuQ0RhnklWbtVglAmN3Gu4CibiEUHvV7mtSEMvZp86Et9ZLr/T2uqwndPimFm11qplY5q18vd7CEoUN/O/WG4cJTPtEoKixr/hj68hyIpng26bc+DMcbXYGWGK+gALRIkvKNIjs4U0cSMP4cn62O67n1qde8Oi/eXeymeSmv6y7Ew3vOMPIM2tLMZjylIi7A3dYjniLl9ZHlECs9iX5qyp4nPW8H4UV7kzJ1upCM+gnmvAVdF7Ltzl40ByulZ/Kkcnj9X1FJ+i9UyvLAHJz7QAJlHStuqCJLzYV6FTgm5Nw2j2Joi9T6hKZEg6198v4iMc9HRLIN8pUHCFpYw3c6MjuDZ/RgPhQEI3/Q8dYMsjIF7QIJ6rlUj+fsMl985rmhrwnC6h62E1Qspew48O3JRg7JJZh+BYtDVvfzFTt5h4rCvX+Hr3yc2fXImAUXDsSdQND4NdIce1Aa4Eqvjzpzkal0SOGVauBo9Zmud749qIHPnLmJVFpEdDST1OrOkWnrKXhLxE1ynQgumDd5Kt/z6UGiXWOPYilNDJAayJm+9Iq5DBbfeo1cQgfAENCEmL4XMNGTGJ4OJG1MVhqCdnm0llVQsGUXybEyp+3NHA9ZvfsuJHS+ZT8oArNONyfiPQlMGqdNQdQMwKgTjIrOLR/VB1f9J4SOoytcnXGGUH8Wweq4B9XQFB3WUFw5mIO+8/ky6tdP9OupbfZ9wr7kecQkG5+mYMjhC0+7Xop0CLUgl5qg=,iv:kxU1Jp6pkiKMOLmSDKtSp6Gux9OfJ1qbbW7OMmuZj6k=,tag:0g7ByefASDsB1MH7Eg7O2w==,type:str] + webhook_secret: ENC[AES256_GCM,data:27LxEmZvGqHBaY0jyryGN/PKCzV30Ba9Tr85b9ELUOKwbiCOXBd1MA==,iv:Ck8QAfTXGrSCyiyAYhcXZAUJ86tQtcti3c1DBBQx1x8=,tag:tcb5U8Cte0z52+PMviGv1g==,type:str] +sops: + kms: + - arn: arn:aws:kms:us-east-1:256317687910:key/d8b153c3-20a9-4364-a553-94405d4c1027 + created_at: "2022-09-30T03:46:52Z" + enc: AQICAHgpH4G+b2ULBcvMucaHVvQi3QdX1B0xlVvF3iFfhxUVOwGZbRqBOGoH9Q+xr/oo6DtjAAAAfjB8BgkqhkiG9w0BBwagbzBtAgEAMGgGCSqGSIb3DQEHATAeBglghkgBZQMEAS4wEQQMU5UxmEmq5YWUh/slAgEQgDtLciu+4NdV6uHtQMvJSMFdSapvXU+u9vIMOAzDNrUxRHxzPqe0UVZE9fIiuFlGggEQ5EzSBXIR/nKl9A== + aws_profile: "" + - arn: arn:aws:kms:us-west-1:256317687910:key/0f31de65-bcc9-4fef-b9f5-67ce086f532e + created_at: "2022-09-30T03:46:52Z" + enc: AQICAHiYQfpCkhiXnfU8apZYPITKv6caFhMAr7Hx04ufaTWvvgHc01L7UFdK2dYDnoXeYCfgAAAAfjB8BgkqhkiG9w0BBwagbzBtAgEAMGgGCSqGSIb3DQEHATAeBglghkgBZQMEAS4wEQQMUzXys7YP5drtoCAwAgEQgDuUy1MBIuq/poDdzZCfEp/zfm1mNoWc4l0W8TKQUVQL+qN0nrWxpRCKNf7dVCH7MOxVJvYbqUrBhlJeTQ== + aws_profile: "" + gcp_kms: [] + azure_kv: [] + hc_vault: [] + age: [] + lastmodified: "2022-09-30T03:46:52Z" + mac: ENC[AES256_GCM,data:PxZayGmpLwWOGrjZvh1FKpdpUq2feo6WS0xCv7Zb3YEogmqJIWJ0Q6+lOExGk1uauDjYBuCMRUn/++u5Uh5/KNoZJNPzRVkGsuGkMFBNlxcdYzQrmnb6LzgUyIRDaNkZBFbb0nXcLC0FtAhnzv/eb7OTtXUsk49KGCs17Ngr2ow=,iv:1iKouxmFyIP5iCKxnG1uTkJTDZoTH8s76VS9tkFpmp4=,tag:c1O1byeQ3145C66zM3kqvg==,type:str] + pgp: [] + unencrypted_suffix: _unencrypted + version: 3.7.3 From cb2ffb4365c53752c68124abc32bf0bddb240c9a Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Thu, 29 Sep 2022 21:08:48 -0700 Subject: [PATCH 14/35] tweak heroku.yml --- heroku.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/heroku.yml b/heroku.yml index 0d14b867..d8fa8e42 100644 --- a/heroku.yml +++ b/heroku.yml @@ -9,13 +9,13 @@ release: - ./scripts.deploy/release.sh image: web run: - # The first line of this command sets PANGEO_FORGE_DEPLOYMENT to itself, if it exists, - # but if it doesn't exist, PANGEO_FORGE_DEPLOYMENT is set to the value of HEROKU_APP_NAME. - # The latter case occurs only in the review app context. We use this method because review - # app names are dynaically generated based on the PR number and are therefore to cumbersome - # to set manually for each PR. More on this syntax in: https://stackoverflow.com/a/2013589. + # The first line of this command sets PANGEO_FORGE_DEPLOYMENT to HEROKU_APP_NAME, if + # HEROKU_APP_NAME exists. The HEROKU_APP_NAME env var only exists in the review app context, + # in which app names are dynamically generated. The generated names are deterministic, but + # it is cumbersome to set PANGEO_FORGE_DEPLOYMENT manually for each review PR, so this method + # is preferable. More on the syntax: https://stackoverflow.com/a/2013589. web: > - export PANGEO_FORGE_DEPLOYMENT="${PANGEO_FORGE_DEPLOYMENT:=$HEROKU_APP_NAME}" + export PANGEO_FORGE_DEPLOYMENT="${HEROKU_APP_NAME:=$PANGEO_FORGE_DEPLOYMENT}" && echo "PANGEO_FORGE_DEPLOYMENT set to ${PANGEO_FORGE_DEPLOYMENT}" && sops -d -i secrets/config.${PANGEO_FORGE_DEPLOYMENT}.yaml && export DATAFLOW_CREDS='./secrets/dataflow-job-submission.json' From 297b16f5134d5579cfe77bc9c03175b760a48d66 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Thu, 29 Sep 2022 21:48:25 -0700 Subject: [PATCH 15/35] factor logs query into plain funcs --- pangeo_forge_orchestrator/routers/logs.py | 61 +++++++++++++++++------ 1 file changed, 45 insertions(+), 16 deletions(-) diff --git a/pangeo_forge_orchestrator/routers/logs.py b/pangeo_forge_orchestrator/routers/logs.py index dc08fc72..7d7571d5 100644 --- a/pangeo_forge_orchestrator/routers/logs.py +++ b/pangeo_forge_orchestrator/routers/logs.py @@ -56,6 +56,20 @@ def get_logs( return logs +def logs_from_recipe_run_id( + id: int, + db_session: Session, + severity: str, + limit: int, +): + recipe_run = db_session.exec( + select(MODELS["recipe_run"].table).where(MODELS["recipe_run"].table.id == id) + ).one() + job_id = job_id_from_recipe_run(recipe_run) + logs = get_logs(job_id, severity, limit) + return logs + + @logs_router.get( "/recipe_runs/{id}/logs", summary="Get job logs for a recipe_run, specified by database id.", @@ -63,16 +77,35 @@ def get_logs( response_class=PlainTextResponse, dependencies=[Depends(check_authentication_header)], ) -async def logs_from_recipe_run_id( +async def raw_logs_from_recipe_run_id( id: int, *, db_session: Session = Depends(get_database_session), severity: str = DEFAULT_SEVERITY, limit: int = DEFAULT_LIMIT, ): - recipe_run = db_session.exec( - select(MODELS["recipe_run"].table).where(MODELS["recipe_run"].table.id == id) + raw_logs = logs_from_recipe_run_id(id, db_session, severity, limit) + return raw_logs + + +def logs_from_feedstock_spec_commit_and_recipe_id( + feedstock_spec: str, + commit: str, + recipe_id: str, + db_session: Session, + severity: str, + limit: int, +): + feedstock = db_session.exec( + select(MODELS["feedstock"].table).where(MODELS["feedstock"].table.spec == feedstock_spec) ).one() + statement = ( + select(MODELS["recipe_run"].table) + .where(MODELS["recipe_run"].table.recipe_id == recipe_id) + .where(MODELS["recipe_run"].table.head_sha == commit) + .where(MODELS["recipe_run"].table.feedstock_id == feedstock.id) + ) + recipe_run = db_session.exec(statement).one() job_id = job_id_from_recipe_run(recipe_run) logs = get_logs(job_id, severity, limit) return logs @@ -85,7 +118,7 @@ async def logs_from_recipe_run_id( response_class=PlainTextResponse, dependencies=[Depends(check_authentication_header)], ) -async def logs_from_feedstock_spec_commit_and_recipe_id( +async def raw_logs_from_feedstock_spec_commit_and_recipe_id( feedstock_spec: str, commit: str, recipe_id: str, @@ -94,16 +127,12 @@ async def logs_from_feedstock_spec_commit_and_recipe_id( severity: str = DEFAULT_SEVERITY, limit: int = DEFAULT_LIMIT, ): - feedstock = db_session.exec( - select(MODELS["feedstock"].table).where(MODELS["feedstock"].table.spec == feedstock_spec) - ).one() - statement = ( - select(MODELS["recipe_run"].table) - .where(MODELS["recipe_run"].table.recipe_id == recipe_id) - .where(MODELS["recipe_run"].table.head_sha == commit) - .where(MODELS["recipe_run"].table.feedstock_id == feedstock.id) + raw_logs = logs_from_feedstock_spec_commit_and_recipe_id( + feedstock_spec, + commit, + recipe_id, + db_session, + severity, + limit, ) - recipe_run = db_session.exec(statement).one() - job_id = job_id_from_recipe_run(recipe_run) - logs = get_logs(job_id, severity, limit) - return logs + return raw_logs From b66cff1d5c24eed6e26367ebb58ebdb7d4a28870 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Thu, 29 Sep 2022 21:57:17 -0700 Subject: [PATCH 16/35] factor logs routes to return recipe_run as intermediate output --- pangeo_forge_orchestrator/routers/logs.py | 30 +++++++++-------------- 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/pangeo_forge_orchestrator/routers/logs.py b/pangeo_forge_orchestrator/routers/logs.py index 7d7571d5..dfece3d6 100644 --- a/pangeo_forge_orchestrator/routers/logs.py +++ b/pangeo_forge_orchestrator/routers/logs.py @@ -56,18 +56,14 @@ def get_logs( return logs -def logs_from_recipe_run_id( +def recipe_run_from_id( id: int, db_session: Session, - severity: str, - limit: int, -): +) -> SQLModel: recipe_run = db_session.exec( select(MODELS["recipe_run"].table).where(MODELS["recipe_run"].table.id == id) ).one() - job_id = job_id_from_recipe_run(recipe_run) - logs = get_logs(job_id, severity, limit) - return logs + return recipe_run @logs_router.get( @@ -84,18 +80,18 @@ async def raw_logs_from_recipe_run_id( severity: str = DEFAULT_SEVERITY, limit: int = DEFAULT_LIMIT, ): - raw_logs = logs_from_recipe_run_id(id, db_session, severity, limit) + recipe_run = recipe_run_from_id(id, db_session) + job_id = job_id_from_recipe_run(recipe_run) + raw_logs = get_logs(job_id, severity, limit) return raw_logs -def logs_from_feedstock_spec_commit_and_recipe_id( +def recipe_run_from_feedstock_spec_commit_and_recipe_id( feedstock_spec: str, commit: str, recipe_id: str, db_session: Session, - severity: str, - limit: int, -): +) -> SQLModel: feedstock = db_session.exec( select(MODELS["feedstock"].table).where(MODELS["feedstock"].table.spec == feedstock_spec) ).one() @@ -106,9 +102,7 @@ def logs_from_feedstock_spec_commit_and_recipe_id( .where(MODELS["recipe_run"].table.feedstock_id == feedstock.id) ) recipe_run = db_session.exec(statement).one() - job_id = job_id_from_recipe_run(recipe_run) - logs = get_logs(job_id, severity, limit) - return logs + return recipe_run @logs_router.get( @@ -127,12 +121,12 @@ async def raw_logs_from_feedstock_spec_commit_and_recipe_id( severity: str = DEFAULT_SEVERITY, limit: int = DEFAULT_LIMIT, ): - raw_logs = logs_from_feedstock_spec_commit_and_recipe_id( + recipe_run = recipe_run_from_feedstock_spec_commit_and_recipe_id( feedstock_spec, commit, recipe_id, db_session, - severity, - limit, ) + job_id = job_id_from_recipe_run(recipe_run) + raw_logs = get_logs(job_id, severity, limit) return raw_logs From 7391f50a8812eafcacb6ffee119f12078de3c12b Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Thu, 29 Sep 2022 22:34:38 -0700 Subject: [PATCH 17/35] add public trace routes WIP --- pangeo_forge_orchestrator/routers/logs.py | 58 +++++++++++++++++++++++ tests/routers/test_logs.py | 58 ++++++++++++++++++++++- 2 files changed, 114 insertions(+), 2 deletions(-) diff --git a/pangeo_forge_orchestrator/routers/logs.py b/pangeo_forge_orchestrator/routers/logs.py index dfece3d6..c848c1f6 100644 --- a/pangeo_forge_orchestrator/routers/logs.py +++ b/pangeo_forge_orchestrator/routers/logs.py @@ -86,6 +86,30 @@ async def raw_logs_from_recipe_run_id( return raw_logs +@logs_router.get( + "/recipe_runs/{id}/logs/trace", + summary="Get an error trace for a failed recipe_run, specified by database id.", + tags=["recipe_run", "logs", "public"], + response_class=PlainTextResponse, +) +async def trace_from_recipe_run_id( + id: int, + *, + db_session: Session = Depends(get_database_session), +): + recipe_run = recipe_run_from_id(id, db_session) + if recipe_run.status != "completed" or recipe_run.conclusion != "failure": + raise HTTPException( + status_code=status.HTTP_204_NO_CONTENT, + detail=f"{recipe_run = } has either not completed or did not fail.", + ) + job_id = job_id_from_recipe_run(recipe_run) + raw_logs = get_logs(job_id, severity="ERROR", limit=1) + trace = raw_logs.split("Traceback")[-1] + trace = "Traceback" + trace + return trace + + def recipe_run_from_feedstock_spec_commit_and_recipe_id( feedstock_spec: str, commit: str, @@ -130,3 +154,37 @@ async def raw_logs_from_feedstock_spec_commit_and_recipe_id( job_id = job_id_from_recipe_run(recipe_run) raw_logs = get_logs(job_id, severity, limit) return raw_logs + + +@logs_router.get( + "/feedstocks/{feedstock_spec:path}/{commit}/{recipe_id}/logs/trace", + summary=( + "Get an error trace a failed recipe run, " + "specified by feedstock_spec, commit, and recipe_id." + ), + tags=["feedstock", "logs", "public"], + response_class=PlainTextResponse, +) +async def trace_from_feedstock_spec_commit_and_recipe_id( + feedstock_spec: str, + commit: str, + recipe_id: str, + *, + db_session: Session = Depends(get_database_session), +): + recipe_run = recipe_run_from_feedstock_spec_commit_and_recipe_id( + feedstock_spec, + commit, + recipe_id, + db_session, + ) + if recipe_run.status != "completed" or recipe_run.conclusion != "failure": + raise HTTPException( + status_code=status.HTTP_204_NO_CONTENT, + detail=f"{recipe_run = } has is either not completed or did not fail.", + ) + job_id = job_id_from_recipe_run(recipe_run) + raw_logs = get_logs(job_id, severity="ERROR", limit=1) + trace = raw_logs.split("Traceback")[-1] + trace = "Traceback" + trace + return trace diff --git a/tests/routers/test_logs.py b/tests/routers/test_logs.py index 0ace9db7..35aa6cb7 100644 --- a/tests/routers/test_logs.py +++ b/tests/routers/test_logs.py @@ -1,5 +1,6 @@ import json import subprocess +from textwrap import dedent from typing import List import pytest @@ -95,8 +96,8 @@ async def get_logs_fixture( "version": "", "started_at": "2022-09-19T16:31:43", "completed_at": None, - "conclusion": None, - "status": "in_progress", + "conclusion": request.param["conclusion"], + "status": request.param["status"], "is_test": True, "dataset_type": "zarr", "dataset_public_url": None, @@ -130,6 +131,8 @@ async def get_logs_fixture( commit="35d889f7c89e9f0d72353a0649ed1cd8da04826b", recipe_id="liveocean", gcloud_logging_response="Some logging message from gcloud API.", + status="completed", + conclusion="failure", ), ], indirect=True, @@ -154,6 +157,55 @@ def mock_gcloud_logging_call(cmd: List[str]): assert response.text == gcloud_logging_response +trace_tests_base_params = dict( + message='{"job_id": "abc"}', + feedstock_spec="pangeo-forge/staged-recipes", + commit="35d889f7c89e9f0d72353a0649ed1cd8da04826b", + recipe_id="liveocean", + gcloud_logging_response=dedent( + """ + Traceback + a b c error + Traceback + e f g error + """ + ), +) + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "get_logs_fixture", + [ + trace_tests_base_params | dict(status="completed", conclusion="failure"), + trace_tests_base_params | dict(status="in_progress", conclusion=None), + trace_tests_base_params | dict(status="queued", conclusion=None), + trace_tests_base_params | dict(status="completed", conclusion="success"), + ], + indirect=True, +) +async def test_get_trace_via_recipe_run_id( + mocker, + get_logs_fixture, + async_app_client, +): + _, gcloud_logging_response, *_ = get_logs_fixture + + def mock_gcloud_logging_call(cmd: List[str]): + return gcloud_logging_response + + mocker.patch.object(subprocess, "check_output", mock_gcloud_logging_call) + + recipe_run = await async_app_client.get("/recipe_runs/1") + trace_response = await async_app_client.get("/recipe_runs/1/logs/trace") + if recipe_run.json()["status"] == "completed" and recipe_run.json()["conclusion"] == "failure": + assert trace_response.status_code == 200 + assert trace_response.text == "Traceback\n e f g error\n" + else: + assert trace_response.status_code == 204 + assert trace_response.json()["detail"].endswith("has either not completed or did not fail.") + + @pytest.mark.asyncio @pytest.mark.parametrize( "get_logs_fixture", @@ -164,6 +216,8 @@ def mock_gcloud_logging_call(cmd: List[str]): commit="35d889f7c89e9f0d72353a0649ed1cd8da04826b", recipe_id="liveocean", gcloud_logging_response="Some logging message from gcloud API.", + status="completed", + conclusion="failure", ), ], indirect=True, From 0161d09da8bbf080b118cd8c6726cddabd4632b2 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Thu, 29 Sep 2022 22:38:18 -0700 Subject: [PATCH 18/35] finish trace route tests --- pangeo_forge_orchestrator/routers/logs.py | 2 +- tests/routers/test_logs.py | 49 ++++++++++++++++++----- 2 files changed, 40 insertions(+), 11 deletions(-) diff --git a/pangeo_forge_orchestrator/routers/logs.py b/pangeo_forge_orchestrator/routers/logs.py index c848c1f6..b71cd210 100644 --- a/pangeo_forge_orchestrator/routers/logs.py +++ b/pangeo_forge_orchestrator/routers/logs.py @@ -181,7 +181,7 @@ async def trace_from_feedstock_spec_commit_and_recipe_id( if recipe_run.status != "completed" or recipe_run.conclusion != "failure": raise HTTPException( status_code=status.HTTP_204_NO_CONTENT, - detail=f"{recipe_run = } has is either not completed or did not fail.", + detail=f"{recipe_run = } has either not completed or did not fail.", ) job_id = job_id_from_recipe_run(recipe_run) raw_logs = get_logs(job_id, severity="ERROR", limit=1) diff --git a/tests/routers/test_logs.py b/tests/routers/test_logs.py index 35aa6cb7..ed5707b6 100644 --- a/tests/routers/test_logs.py +++ b/tests/routers/test_logs.py @@ -171,19 +171,16 @@ def mock_gcloud_logging_call(cmd: List[str]): """ ), ) +trace_tests_params = [ + trace_tests_base_params | dict(status="completed", conclusion="failure"), + trace_tests_base_params | dict(status="in_progress", conclusion=None), + trace_tests_base_params | dict(status="queued", conclusion=None), + trace_tests_base_params | dict(status="completed", conclusion="success"), +] @pytest.mark.asyncio -@pytest.mark.parametrize( - "get_logs_fixture", - [ - trace_tests_base_params | dict(status="completed", conclusion="failure"), - trace_tests_base_params | dict(status="in_progress", conclusion=None), - trace_tests_base_params | dict(status="queued", conclusion=None), - trace_tests_base_params | dict(status="completed", conclusion="success"), - ], - indirect=True, -) +@pytest.mark.parametrize("get_logs_fixture", trace_tests_params, indirect=True) async def test_get_trace_via_recipe_run_id( mocker, get_logs_fixture, @@ -246,3 +243,35 @@ def mock_gcloud_logging_call(cmd: List[str]): ) assert response.status_code == 200 assert response.text == gcloud_logging_response + + +@pytest.mark.asyncio +@pytest.mark.parametrize("get_logs_fixture", trace_tests_params, indirect=True) +async def test_get_trace_human_readable_method( + mocker, + get_logs_fixture, + async_app_client, +): + ( + _, + gcloud_logging_response, + feedstock_spec, + commit, + recipe_id, + ) = get_logs_fixture + + def mock_gcloud_logging_call(cmd: List[str]): + return gcloud_logging_response + + mocker.patch.object(subprocess, "check_output", mock_gcloud_logging_call) + + recipe_run = await async_app_client.get("recipe_runs/1") + trace_response = await async_app_client.get( + f"/feedstocks/{feedstock_spec}/{commit}/{recipe_id}/logs/trace", + ) + if recipe_run.json()["status"] == "completed" and recipe_run.json()["conclusion"] == "failure": + assert trace_response.status_code == 200 + assert trace_response.text == "Traceback\n e f g error\n" + else: + assert trace_response.status_code == 204 + assert trace_response.json()["detail"].endswith("has either not completed or did not fail.") From 82b1dadd651b64798d40a6dc22a557fce5dc632d Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Thu, 29 Sep 2022 22:48:23 -0700 Subject: [PATCH 19/35] different assert for trace tests --- tests/routers/test_logs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/routers/test_logs.py b/tests/routers/test_logs.py index ed5707b6..269ce9f5 100644 --- a/tests/routers/test_logs.py +++ b/tests/routers/test_logs.py @@ -200,7 +200,7 @@ def mock_gcloud_logging_call(cmd: List[str]): assert trace_response.text == "Traceback\n e f g error\n" else: assert trace_response.status_code == 204 - assert trace_response.json()["detail"].endswith("has either not completed or did not fail.") + assert "has either not completed or did not fail." in trace_response.text @pytest.mark.asyncio @@ -274,4 +274,4 @@ def mock_gcloud_logging_call(cmd: List[str]): assert trace_response.text == "Traceback\n e f g error\n" else: assert trace_response.status_code == 204 - assert trace_response.json()["detail"].endswith("has either not completed or did not fail.") + assert "has either not completed or did not fail." in trace_response.text From 0e45aa3c0a814abb7964c07d265e6e2d37052dd2 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Thu, 29 Sep 2022 22:54:12 -0700 Subject: [PATCH 20/35] update logs docs --- docs/README.md | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/docs/README.md b/docs/README.md index 4d2bb61e..c9a7fe04 100644 --- a/docs/README.md +++ b/docs/README.md @@ -660,14 +660,24 @@ See also [Resource locations](#resource-locations) for a table referencing these To query logs for recipe runs, see the API docs at the `/docs#/logs` route of the deployment (https://api.pangeo-forge.org/docs#/logs for the production deployment). -These routes are protected, due to the risk of leaking secrets in the logs. The + +## Raw logs (admin only) + +Routes returning raw logs are protected, due to the risk of leaking secrets. The `PANGEO_FORGE_API_KEY` is available to admins via in the deployments secrets config: ```console $ sops -d -i secrets/config.${PANGEO_FORGE_DEPLOYMENT}.yaml -# cat secrets/config.${PANGEO_FORGE_DEPLOYMENT}.yaml +$ cat secrets/config.${PANGEO_FORGE_DEPLOYMENT}.yaml ``` +## Error traces (public) + +Logs routes returning (truncated) error traces are public, given the utility of these logs to +recipe contributors, and the fact that truncation reduces the risk of leaked secrets. Further +sanitization and/or formatting of these logs could be implemented in the future, to make them +even more secure, and useful. + # Security ## Database API From fd3737185e4a2c7b7548b0ff4aab09c658991af7 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Thu, 29 Sep 2022 22:55:56 -0700 Subject: [PATCH 21/35] add note about logs retention --- docs/README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/README.md b/docs/README.md index c9a7fe04..3b5c4a07 100644 --- a/docs/README.md +++ b/docs/README.md @@ -678,6 +678,12 @@ recipe contributors, and the fact that truncation reduces the risk of leaked sec sanitization and/or formatting of these logs could be implemented in the future, to make them even more secure, and useful. +## Retention + +We should look into the retention policies for Dataflow job logs. It's possible these logs will +not be queryable after some time has elapsed since the completed recipe run. Initial observations +suggest the logs may be discarded by default after as short as 24 hours. + # Security ## Database API From 2bf7d8aa497e03049ef89d320da05d326b4ff7a7 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Thu, 29 Sep 2022 22:57:03 -0700 Subject: [PATCH 22/35] ok, i guess drop the details assert --- tests/routers/test_logs.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/routers/test_logs.py b/tests/routers/test_logs.py index 269ce9f5..c2c4b1c4 100644 --- a/tests/routers/test_logs.py +++ b/tests/routers/test_logs.py @@ -200,7 +200,6 @@ def mock_gcloud_logging_call(cmd: List[str]): assert trace_response.text == "Traceback\n e f g error\n" else: assert trace_response.status_code == 204 - assert "has either not completed or did not fail." in trace_response.text @pytest.mark.asyncio @@ -274,4 +273,3 @@ def mock_gcloud_logging_call(cmd: List[str]): assert trace_response.text == "Traceback\n e f g error\n" else: assert trace_response.status_code == 204 - assert "has either not completed or did not fail." in trace_response.text From dae96e94c95018c4622baa1af92f0d57ab3a0269 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Thu, 29 Sep 2022 23:44:37 -0700 Subject: [PATCH 23/35] remove duplicate subprocess call. huge issue. --- pangeo_forge_orchestrator/routers/github_app.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pangeo_forge_orchestrator/routers/github_app.py b/pangeo_forge_orchestrator/routers/github_app.py index 7f42abd2..f08904ed 100644 --- a/pangeo_forge_orchestrator/routers/github_app.py +++ b/pangeo_forge_orchestrator/routers/github_app.py @@ -620,7 +620,6 @@ async def run( try: out = subprocess.check_output(cmd) logger.debug(f"Command output is {out.decode('utf-8')}") - out = subprocess.check_output(cmd) for line in out.splitlines(): p = json.loads(line) if p["status"] == "submitted": From 22093ed210f1f8dbf14fa257012b581a1990563f Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Fri, 30 Sep 2022 01:05:49 -0700 Subject: [PATCH 24/35] patch for 132 --- .../routers/github_app.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/pangeo_forge_orchestrator/routers/github_app.py b/pangeo_forge_orchestrator/routers/github_app.py index d8cb229a..e6c14377 100644 --- a/pangeo_forge_orchestrator/routers/github_app.py +++ b/pangeo_forge_orchestrator/routers/github_app.py @@ -622,13 +622,18 @@ async def run( logger.debug(f"Command output is {out.decode('utf-8')}") for line in out.splitlines(): p = json.loads(line) - if p["status"] == "submitted": - existing_message = json.loads(recipe_run.message) if recipe_run.message else {} - recipe_run.message = json.dumps( - existing_message | dict(job_name=p["job_name"], job_id=p["job_id"]) - ) - db_session.add(recipe_run) - db_session.commit() + if ( + "status" in p + ): # patch for https://github.com/pangeo-forge/pangeo-forge-orchestrator/issues/132 + if p["status"] == "submitted": + existing_message = ( + json.loads(recipe_run.message) if recipe_run.message else {} + ) + recipe_run.message = json.dumps( + existing_message | dict(job_name=p["job_name"], job_id=p["job_id"]) + ) + db_session.add(recipe_run) + db_session.commit() except subprocess.CalledProcessError as e: for line in e.output.splitlines(): p = json.loads(line) From ccb545bcd9a3c8da437712f32e98afeda2b6f770 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Tue, 4 Oct 2022 12:55:55 -0700 Subject: [PATCH 25/35] add fetch_logs.py executable --- .../bakeries/dataflow/fetch_logs.py | 215 ++++++++++++++++++ pangeo_forge_orchestrator/routers/logs.py | 2 + setup.cfg | 2 +- 3 files changed, 218 insertions(+), 1 deletion(-) create mode 100644 pangeo_forge_orchestrator/bakeries/dataflow/fetch_logs.py diff --git a/pangeo_forge_orchestrator/bakeries/dataflow/fetch_logs.py b/pangeo_forge_orchestrator/bakeries/dataflow/fetch_logs.py new file mode 100644 index 00000000..c37038ba --- /dev/null +++ b/pangeo_forge_orchestrator/bakeries/dataflow/fetch_logs.py @@ -0,0 +1,215 @@ +#!/usr/bin/env python +""" +Adapted from https://github.com/yuvipanda/mybinder-analytics/blob/main/logs.py + +Note: this should be moved in pangeo-forge-runner, xref: + https://github.com/pangeo-forge/pangeo-forge-runner/issues/20 + +Putting it here for now because that seems like a more efficient path to getting +this essential functionality deployed. +""" +import argparse +import asyncio +import json +import subprocess +import sys +import time +import typing +from datetime import datetime +from typing import Union + +from dateutil.parser import isoparse, parse as dateparse # type: ignore + + +class LogRecord(typing.NamedTuple): + """ + Structured log entry coming out of dataflow. + """ + + timestamp: datetime + source: str + message: str + instance: str + + def __str__(self): + out = f"[{self.timestamp.isoformat()}] [{self.source}" + if self.instance: + out += f":{self.instance}" + out += f"] {self.message}" + return out + + +class DataFlowJob(typing.NamedTuple): + """ + Representation of a data flow job + """ + + id: str + creation_time: datetime + location: str + name: str + state: str + state_time: datetime + type: str + + +async def get_job(name: str) -> Union[DataFlowJob, None]: + """ + Return job information for job with given dataflow name. + + Returns None if no such job is present + """ + cmd = [ + "gcloud", + "dataflow", + "jobs", + "list", + f"--filter=name={name}", + "--format=json", + ] + proc = await asyncio.subprocess.create_subprocess_exec(*cmd, stdout=asyncio.subprocess.PIPE) + stdout, _ = await proc.communicate() + jobs = json.loads(stdout.decode()) + if jobs: + job = jobs[0] + return DataFlowJob( + id=job["id"], + creation_time=dateparse(job["creationTime"]), + location=job["location"], + name=job["name"], + state=job["state"], + state_time=dateparse(job["stateTime"]), + type=job["type"], + ) + return None + + +async def get_job_logs( + job_id, + source_filter: typing.List, + instance_filter: typing.List, + since: datetime = None, +): + """ + Get logs for given job from given project. + + job_id: + ID (not name) of the dataflow job we are looking for logs of + source_filter: + list of sources we want logs from. Allowed options are: + + - job-message: Messages from dataflow itself + - docker: Logs from the docker containers running our workers in dataflow + - system: Logs from the VMs running our workers + - worker: Logs from the workers themselves, sometimes overlaps with messages from docker + - kubelet: Logs from the kubelet on the VMs, responsible for starting up containers + - agent: Not sure? + - harness-startup: Not sure? + - harness: Not sure? + - shuffler-startup: Not sure? + - shuffler: Not sure? + - vm-health: Not sure? + - vm-monitor: Not sure? + - resource: Not sure? + - insights: Not sure? + Any combination of these can be provided, and logs from all these sources will be returned + instance_filter: + list of instance (VM) names we want logs from + since: + Show logs only after this timestamp + """ + query = [f'resource.labels.job_id="{job_id}"'] + if source_filter: + query.append( + "(" + + " OR ".join(f'log_id("dataflow.googleapis.com/{sf}")' for sf in source_filter) + + ")" + ) + + if instance_filter: + query.append('labels."compute.googleapis.com/resource_type"="instance"') + query.append( + 'labels."compute.googleapis.com/resource_name" = (' + " OR ".join(instance_filter) + ")" + ) + if since: + query.append(f'timestamp>"{since.isoformat()}"') + cmd = [ + "gcloud", + "logging", + "read", + "\n".join(query), + "--format=json", + "--order=asc", + ] + proc = await asyncio.subprocess.create_subprocess_exec(*cmd, stdout=subprocess.PIPE) + stdout, _ = await proc.communicate() + logs = json.loads(stdout.decode()) + + for logline in logs: # type: ignore + timestamp = isoparse(logline["timestamp"]) + + # logType looks like projects//logs/dataflow.googleapis.com%2F + # And type is what we ultimately care about + source = logline["logName"].rsplit("%2F", 1)[-1] + + # Each log type should be handled differently + if source in ( + "kubelet", + "shuffler", + "harness", + "harness-startup", + "vm-health", + "vm-monitor", + "resource", + "agent", + "docker", + "system", + "shuffler-startup", + "worker", + ): + message = logline["jsonPayload"]["message"] + elif source in ("job-message"): + message = logline["textPayload"] + elif source in ("insights",): + # Let's ignore these + continue + else: + print(source) + print(logline) + sys.exit(1) + if logline["labels"].get("compute.googleapis.com/resource_type") == "instance": + instance = logline["labels"]["compute.googleapis.com/resource_name"] + else: + instance = None + # Trim additional newlines to prevent excess blank lines + yield LogRecord(timestamp, source, message.rstrip(), instance) + + +async def main(): + argparser = argparse.ArgumentParser() + + argparser.add_argument("name") + + argparser.add_argument("--source", action="append") + argparser.add_argument("--instance", action="append") + argparser.add_argument("--follow", "-f", action="store_true") + + args = argparser.parse_args() + + job = await get_job(args.name) + last_ts = None + + while True: + newest_ts = None + async for log in get_job_logs(job.id, args.source, args.instance, last_ts): + newest_ts = log.timestamp + print(log) + if not args.follow: + break + if last_ts is None and newest_ts is not None: + last_ts = newest_ts + + time.sleep(5) + + +asyncio.run(main()) diff --git a/pangeo_forge_orchestrator/routers/logs.py b/pangeo_forge_orchestrator/routers/logs.py index b71cd210..39f931fb 100644 --- a/pangeo_forge_orchestrator/routers/logs.py +++ b/pangeo_forge_orchestrator/routers/logs.py @@ -52,6 +52,8 @@ def get_logs( if limit: cmd += [f"--limit={limit}"] + # cmd = f"python3 ../bakeries/dataflow/fetch_logs.py {job_name} --source={source}" + logs = subprocess.check_output(cmd) return logs diff --git a/setup.cfg b/setup.cfg index 158395e4..b27f4e2a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -63,7 +63,7 @@ extend-ignore = E203,E501,E402,W605,W503 [isort] known_first_party=pangeo_forge_orchestrator -known_third_party=aiohttp,alembic,asgi_lifespan,cryptography,fastapi,gidgethub,httpx,jwt,pydantic,pytest,pytest_asyncio,pytest_lazyfixture,requests,setuptools,sqlalchemy,sqlmodel,starlette,yaml +known_third_party=aiohttp,alembic,asgi_lifespan,cryptography,dateutil,fastapi,gidgethub,httpx,jwt,pydantic,pytest,pytest_asyncio,pytest_lazyfixture,requests,setuptools,sqlalchemy,sqlmodel,starlette,yaml multi_line_output=3 include_trailing_comma=True force_grid_wrap=0 From 2cd67c58a1aaa0336209386950c929a31e717a99 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Tue, 4 Oct 2022 13:13:05 -0700 Subject: [PATCH 26/35] dump dataflow logs to json --- .../bakeries/dataflow/fetch_logs.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/pangeo_forge_orchestrator/bakeries/dataflow/fetch_logs.py b/pangeo_forge_orchestrator/bakeries/dataflow/fetch_logs.py index c37038ba..fe699bb3 100644 --- a/pangeo_forge_orchestrator/bakeries/dataflow/fetch_logs.py +++ b/pangeo_forge_orchestrator/bakeries/dataflow/fetch_logs.py @@ -38,6 +38,14 @@ def __str__(self): out += f"] {self.message}" return out + def asdict(self): + return { + "timestamp": self.timestamp.isoformat(), + "source": self.source, + "instance": self.instance, + "message": self.message, + } + class DataFlowJob(typing.NamedTuple): """ @@ -199,11 +207,12 @@ async def main(): job = await get_job(args.name) last_ts = None + logs_array = [] while True: newest_ts = None async for log in get_job_logs(job.id, args.source, args.instance, last_ts): newest_ts = log.timestamp - print(log) + logs_array.append(log.asdict()) if not args.follow: break if last_ts is None and newest_ts is not None: @@ -211,5 +220,7 @@ async def main(): time.sleep(5) + print(json.dumps(logs_array)) + asyncio.run(main()) From cbe42a7e02eeffa1211a419e788cda8c13509a14 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Tue, 4 Oct 2022 13:17:27 -0700 Subject: [PATCH 27/35] comment out time.sleep --- pangeo_forge_orchestrator/bakeries/dataflow/fetch_logs.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pangeo_forge_orchestrator/bakeries/dataflow/fetch_logs.py b/pangeo_forge_orchestrator/bakeries/dataflow/fetch_logs.py index fe699bb3..617853b7 100644 --- a/pangeo_forge_orchestrator/bakeries/dataflow/fetch_logs.py +++ b/pangeo_forge_orchestrator/bakeries/dataflow/fetch_logs.py @@ -13,7 +13,6 @@ import json import subprocess import sys -import time import typing from datetime import datetime from typing import Union @@ -218,7 +217,10 @@ async def main(): if last_ts is None and newest_ts is not None: last_ts = newest_ts - time.sleep(5) + # Commenting this out, because we only need it for `--follow`, which we do not currently + # use. Leaving it here, rather than deleting, because we may want to use `--follow` + # eventually, in which case we would bring this back. + # time.sleep(5) print(json.dumps(logs_array)) From fb22a877abf4fd47853a5a58e94de74d79667e66 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Tue, 4 Oct 2022 15:38:07 -0700 Subject: [PATCH 28/35] check for secrets in logs, rewrite logs router WIP --- pangeo_forge_orchestrator/routers/logs.py | 155 ++++++++++------------ 1 file changed, 69 insertions(+), 86 deletions(-) diff --git a/pangeo_forge_orchestrator/routers/logs.py b/pangeo_forge_orchestrator/routers/logs.py index 39f931fb..89b48a03 100644 --- a/pangeo_forge_orchestrator/routers/logs.py +++ b/pangeo_forge_orchestrator/routers/logs.py @@ -1,16 +1,28 @@ import json import subprocess +import tempfile +from typing import List from fastapi import APIRouter, Depends, HTTPException, Query from fastapi.responses import PlainTextResponse +from pydantic import BaseModel, SecretStr from sqlmodel import Session, SQLModel, select from starlette import status +from ..config import get_config from ..dependencies import check_authentication_header, get_session as get_database_session from ..models import MODELS logs_router = APIRouter() +DEFAULT_SOURCE = Query( + "worker", + description=( + "A valid dataflow logging source. Must be one of: ['kubelet', 'shuffler', 'harness', " + "'harness-startup', 'vm-health', 'vm-monitor', 'resource', 'agent', 'docker', 'system', " + "'shuffler-startup', 'worker']" + ), +) DEFAULT_SEVERITY = Query( "ERROR", description=( @@ -21,40 +33,67 @@ DEFAULT_LIMIT = Query(1, description="Max number of log entries to return.") -def job_id_from_recipe_run(recipe_run: SQLModel) -> str: +def job_name_from_recipe_run(recipe_run: SQLModel) -> str: try: - job_id = json.loads(recipe_run.message)["job_id"] + job_name = json.loads(recipe_run.message)["job_name"] except (KeyError, json.JSONDecodeError) as e: detail = ( - f"Message field of {recipe_run = } missing 'job_id'." + f"Message field of {recipe_run = } missing 'job_name'." if type(e) == KeyError else f"Message field of {recipe_run = } not JSON decodable." ) raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=detail) - return job_id + return job_name + + +def secret_str_vals_from_basemodel(obj: BaseModel) -> List[str]: + """From a pydantic BaseModel, recursively surface all fields with type SecretStr.""" + + def is_pydantic_model(obj): + return isinstance(obj, BaseModel) + + secret_str_vals = [] + if is_pydantic_model(obj): + for var in vars(obj): + if is_pydantic_model(getattr(obj, var)): + secret_str_vals_from_basemodel(getattr(obj, var)) + elif isinstance(getattr(obj, var), SecretStr): + secret_str_vals.append(json.loads(obj.json())[var]) + + return secret_str_vals def get_logs( - job_id: str, - severity: str, - limit: int, + job_name: str, + # TODO: add param `severity: str,` + source: str, + recipe_run: SQLModel, + db_session: Session, ): - log_name_prefix = "projects/pangeo-forge-4967/logs/dataflow.googleapis.com" - query = ( - 'resource.type="dataflow_step" ' - f'AND resource.labels.job_id="{job_id}" ' - f'AND logName=("{log_name_prefix}%2Fjob-message" OR "{log_name_prefix}%2Flauncher") ' - f'AND severity="{severity}"' - ) - cmd = "gcloud logging read".split() + [query] - - if limit: - cmd += [f"--limit={limit}"] + cmd = f"python3 ../bakeries/dataflow/fetch_logs.py {job_name} --source={source}".split() + logs = subprocess.check_output(cmd).decode("utf-8") - # cmd = f"python3 ../bakeries/dataflow/fetch_logs.py {job_name} --source={source}" + # First security check: ensure known bakery secrets do not appear in logs + statement = select(MODELS["bakery"].table).where( + MODELS["bakery"].table.id == recipe_run.bakery_id + ) + bakery = db_session.exec(statement).one() + bakery_config = get_config().bakeries[bakery.name] + bakery_secrets = secret_str_vals_from_basemodel(bakery_config) + for secret in bakery_secrets: + if secret in logs: + raise ValueError("Bakery secret detected in logs.") + + # Second security check: gitleaks + with tempfile.TemporaryDirectory() as tmpdir: + with open(f"{tmpdir}/c.json", mode="w") as f: + json.dump(logs, f) + gitleaks_cmd = "gitleaks detect --no-git".split() + gitleaks = subprocess.run(gitleaks_cmd, cwd=tmpdir) + if not gitleaks.returncode == 0: + raise ValueError("Gitleaks detected secrets in the logs.") - logs = subprocess.check_output(cmd) return logs @@ -79,39 +118,16 @@ async def raw_logs_from_recipe_run_id( id: int, *, db_session: Session = Depends(get_database_session), - severity: str = DEFAULT_SEVERITY, - limit: int = DEFAULT_LIMIT, + source: str = DEFAULT_SOURCE, + # severity: str = DEFAULT_SEVERITY, + # limit: int = DEFAULT_LIMIT, ): recipe_run = recipe_run_from_id(id, db_session) - job_id = job_id_from_recipe_run(recipe_run) - raw_logs = get_logs(job_id, severity, limit) + job_name = job_name_from_recipe_run(recipe_run) + raw_logs = get_logs(job_name, source, recipe_run, db_session) return raw_logs -@logs_router.get( - "/recipe_runs/{id}/logs/trace", - summary="Get an error trace for a failed recipe_run, specified by database id.", - tags=["recipe_run", "logs", "public"], - response_class=PlainTextResponse, -) -async def trace_from_recipe_run_id( - id: int, - *, - db_session: Session = Depends(get_database_session), -): - recipe_run = recipe_run_from_id(id, db_session) - if recipe_run.status != "completed" or recipe_run.conclusion != "failure": - raise HTTPException( - status_code=status.HTTP_204_NO_CONTENT, - detail=f"{recipe_run = } has either not completed or did not fail.", - ) - job_id = job_id_from_recipe_run(recipe_run) - raw_logs = get_logs(job_id, severity="ERROR", limit=1) - trace = raw_logs.split("Traceback")[-1] - trace = "Traceback" + trace - return trace - - def recipe_run_from_feedstock_spec_commit_and_recipe_id( feedstock_spec: str, commit: str, @@ -144,8 +160,9 @@ async def raw_logs_from_feedstock_spec_commit_and_recipe_id( recipe_id: str, *, db_session: Session = Depends(get_database_session), - severity: str = DEFAULT_SEVERITY, - limit: int = DEFAULT_LIMIT, + source: str = DEFAULT_SOURCE, + # severity: str = DEFAULT_SEVERITY, + # limit: int = DEFAULT_LIMIT, ): recipe_run = recipe_run_from_feedstock_spec_commit_and_recipe_id( feedstock_spec, @@ -153,40 +170,6 @@ async def raw_logs_from_feedstock_spec_commit_and_recipe_id( recipe_id, db_session, ) - job_id = job_id_from_recipe_run(recipe_run) - raw_logs = get_logs(job_id, severity, limit) + job_name = job_name_from_recipe_run(recipe_run) + raw_logs = get_logs(job_name, source, recipe_run, db_session) return raw_logs - - -@logs_router.get( - "/feedstocks/{feedstock_spec:path}/{commit}/{recipe_id}/logs/trace", - summary=( - "Get an error trace a failed recipe run, " - "specified by feedstock_spec, commit, and recipe_id." - ), - tags=["feedstock", "logs", "public"], - response_class=PlainTextResponse, -) -async def trace_from_feedstock_spec_commit_and_recipe_id( - feedstock_spec: str, - commit: str, - recipe_id: str, - *, - db_session: Session = Depends(get_database_session), -): - recipe_run = recipe_run_from_feedstock_spec_commit_and_recipe_id( - feedstock_spec, - commit, - recipe_id, - db_session, - ) - if recipe_run.status != "completed" or recipe_run.conclusion != "failure": - raise HTTPException( - status_code=status.HTTP_204_NO_CONTENT, - detail=f"{recipe_run = } has either not completed or did not fail.", - ) - job_id = job_id_from_recipe_run(recipe_run) - raw_logs = get_logs(job_id, severity="ERROR", limit=1) - trace = raw_logs.split("Traceback")[-1] - trace = "Traceback" + trace - return trace From f26949af3b82189e0ec68901c9b43c16b4cecf5e Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Tue, 4 Oct 2022 15:42:27 -0700 Subject: [PATCH 29/35] log router tests rewrite WIP --- tests/routers/test_logs.py | 103 +++++-------------------------------- 1 file changed, 13 insertions(+), 90 deletions(-) diff --git a/tests/routers/test_logs.py b/tests/routers/test_logs.py index c2c4b1c4..eda666b4 100644 --- a/tests/routers/test_logs.py +++ b/tests/routers/test_logs.py @@ -1,6 +1,5 @@ import json import subprocess -from textwrap import dedent from typing import List import pytest @@ -8,7 +7,7 @@ from fastapi import HTTPException from pangeo_forge_orchestrator.models import MODELS -from pangeo_forge_orchestrator.routers.logs import get_logs, job_id_from_recipe_run +from pangeo_forge_orchestrator.routers.logs import get_logs, job_name_from_recipe_run from ..conftest import clear_database @@ -16,12 +15,12 @@ @pytest.mark.parametrize( "message, expected_error", [ - ('{"job_id": "123"}', None), - ('{"job_name": "123"}', KeyError), - ('"job_id": "123"', json.JSONDecodeError), + ('{"job_name": "123"}', None), + ('{"job_id": "123"}', KeyError), + ('"job_name": "123"', json.JSONDecodeError), ], ) -def test_job_id_from_recipe_run(message, expected_error): +def test_job_name_from_recipe_run(message, expected_error): recipe_run_kws = { "recipe_id": "liveocean", "bakery_id": 1, @@ -40,11 +39,11 @@ def test_job_id_from_recipe_run(message, expected_error): } recipe_run = MODELS["recipe_run"].table(**recipe_run_kws) if not expected_error: - job_id = job_id_from_recipe_run(recipe_run) - assert job_id == json.loads(message)["job_id"] + job_name = job_name_from_recipe_run(recipe_run) + assert job_name == json.loads(message)["job_name"] else: with pytest.raises(HTTPException): - job_id_from_recipe_run(recipe_run) + job_name_from_recipe_run(recipe_run) @pytest.mark.parametrize( @@ -58,9 +57,9 @@ def mock_gcloud_logging_call(cmd: List[str]): mocker.patch.object(subprocess, "check_output", mock_gcloud_logging_call) logs = get_logs( - job_id="2022-09-29_11_31_40-14379398480910960453", - severity="ERROR", - limit=1, + job_name="a9g8f8d7sa", + # severity="ERROR", + # limit=1, ) assert logs == gcloud_logging_response @@ -126,7 +125,7 @@ async def get_logs_fixture( "get_logs_fixture", [ dict( - message='{"job_id": "abc"}', + message='{"job_name": "abc"}', feedstock_spec="pangeo-forge/staged-recipes", commit="35d889f7c89e9f0d72353a0649ed1cd8da04826b", recipe_id="liveocean", @@ -157,57 +156,12 @@ def mock_gcloud_logging_call(cmd: List[str]): assert response.text == gcloud_logging_response -trace_tests_base_params = dict( - message='{"job_id": "abc"}', - feedstock_spec="pangeo-forge/staged-recipes", - commit="35d889f7c89e9f0d72353a0649ed1cd8da04826b", - recipe_id="liveocean", - gcloud_logging_response=dedent( - """ - Traceback - a b c error - Traceback - e f g error - """ - ), -) -trace_tests_params = [ - trace_tests_base_params | dict(status="completed", conclusion="failure"), - trace_tests_base_params | dict(status="in_progress", conclusion=None), - trace_tests_base_params | dict(status="queued", conclusion=None), - trace_tests_base_params | dict(status="completed", conclusion="success"), -] - - -@pytest.mark.asyncio -@pytest.mark.parametrize("get_logs_fixture", trace_tests_params, indirect=True) -async def test_get_trace_via_recipe_run_id( - mocker, - get_logs_fixture, - async_app_client, -): - _, gcloud_logging_response, *_ = get_logs_fixture - - def mock_gcloud_logging_call(cmd: List[str]): - return gcloud_logging_response - - mocker.patch.object(subprocess, "check_output", mock_gcloud_logging_call) - - recipe_run = await async_app_client.get("/recipe_runs/1") - trace_response = await async_app_client.get("/recipe_runs/1/logs/trace") - if recipe_run.json()["status"] == "completed" and recipe_run.json()["conclusion"] == "failure": - assert trace_response.status_code == 200 - assert trace_response.text == "Traceback\n e f g error\n" - else: - assert trace_response.status_code == 204 - - @pytest.mark.asyncio @pytest.mark.parametrize( "get_logs_fixture", [ dict( - message='{"job_id": "abc"}', + message='{"job_name": "abc"}', feedstock_spec="pangeo-forge/staged-recipes", commit="35d889f7c89e9f0d72353a0649ed1cd8da04826b", recipe_id="liveocean", @@ -242,34 +196,3 @@ def mock_gcloud_logging_call(cmd: List[str]): ) assert response.status_code == 200 assert response.text == gcloud_logging_response - - -@pytest.mark.asyncio -@pytest.mark.parametrize("get_logs_fixture", trace_tests_params, indirect=True) -async def test_get_trace_human_readable_method( - mocker, - get_logs_fixture, - async_app_client, -): - ( - _, - gcloud_logging_response, - feedstock_spec, - commit, - recipe_id, - ) = get_logs_fixture - - def mock_gcloud_logging_call(cmd: List[str]): - return gcloud_logging_response - - mocker.patch.object(subprocess, "check_output", mock_gcloud_logging_call) - - recipe_run = await async_app_client.get("recipe_runs/1") - trace_response = await async_app_client.get( - f"/feedstocks/{feedstock_spec}/{commit}/{recipe_id}/logs/trace", - ) - if recipe_run.json()["status"] == "completed" and recipe_run.json()["conclusion"] == "failure": - assert trace_response.status_code == 200 - assert trace_response.text == "Traceback\n e f g error\n" - else: - assert trace_response.status_code == 204 From 6be769189f64a2820feb8fe765df628e8d766793 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Tue, 4 Oct 2022 16:06:53 -0700 Subject: [PATCH 30/35] get logs router tests to pass --- pangeo_forge_orchestrator/routers/logs.py | 4 +- tests/routers/test_logs.py | 71 +++++++++++++++++------ 2 files changed, 55 insertions(+), 20 deletions(-) diff --git a/pangeo_forge_orchestrator/routers/logs.py b/pangeo_forge_orchestrator/routers/logs.py index 89b48a03..8e836ba0 100644 --- a/pangeo_forge_orchestrator/routers/logs.py +++ b/pangeo_forge_orchestrator/routers/logs.py @@ -72,7 +72,7 @@ def get_logs( db_session: Session, ): cmd = f"python3 ../bakeries/dataflow/fetch_logs.py {job_name} --source={source}".split() - logs = subprocess.check_output(cmd).decode("utf-8") + logs = subprocess.check_output(cmd) # First security check: ensure known bakery secrets do not appear in logs statement = select(MODELS["bakery"].table).where( @@ -82,7 +82,7 @@ def get_logs( bakery_config = get_config().bakeries[bakery.name] bakery_secrets = secret_str_vals_from_basemodel(bakery_config) for secret in bakery_secrets: - if secret in logs: + if secret in str(logs): raise ValueError("Bakery secret detected in logs.") # Second security check: gitleaks diff --git a/tests/routers/test_logs.py b/tests/routers/test_logs.py index eda666b4..62c540ed 100644 --- a/tests/routers/test_logs.py +++ b/tests/routers/test_logs.py @@ -5,7 +5,9 @@ import pytest import pytest_asyncio from fastapi import HTTPException +from sqlmodel import Session +from pangeo_forge_orchestrator.database import engine from pangeo_forge_orchestrator.models import MODELS from pangeo_forge_orchestrator.routers.logs import get_logs, job_name_from_recipe_run @@ -46,24 +48,6 @@ def test_job_name_from_recipe_run(message, expected_error): job_name_from_recipe_run(recipe_run) -@pytest.mark.parametrize( - "gcloud_logging_response", - ["Some logs returned by gcloud logging API"], -) -def test_get_logs(mocker, gcloud_logging_response): - def mock_gcloud_logging_call(cmd: List[str]): - return gcloud_logging_response - - mocker.patch.object(subprocess, "check_output", mock_gcloud_logging_call) - - logs = get_logs( - job_name="a9g8f8d7sa", - # severity="ERROR", - # limit=1, - ) - assert logs == gcloud_logging_response - - @pytest_asyncio.fixture async def get_logs_fixture( api_key, @@ -120,6 +104,57 @@ async def get_logs_fixture( clear_database() +@pytest.mark.asyncio +@pytest.mark.parametrize( + "get_logs_fixture", + [ + dict( + message='{"job_name": "abc"}', + feedstock_spec="pangeo-forge/staged-recipes", + commit="35d889f7c89e9f0d72353a0649ed1cd8da04826b", + recipe_id="liveocean", + gcloud_logging_response="Some logging message from gcloud API.", + status="completed", + conclusion="failure", + ), + ], + indirect=True, +) +async def test_get_logs(mocker, get_logs_fixture, async_app_client): + + ( + _, + gcloud_logging_response, + _, + _, + _, + ) = get_logs_fixture + + def mock_fetch_logs_call(cmd: List[str]): + return gcloud_logging_response + + mocker.patch.object(subprocess, "check_output", mock_fetch_logs_call) + + recipe_run_response = await async_app_client.get("/recipe_runs/1") + recipe_run_kws = { + # drop extended response fields + k: v + for k, v in recipe_run_response.json().items() + if k not in ["bakery", "feedstock"] + } + recipe_run = MODELS["recipe_run"].table(**recipe_run_kws) + with Session(engine) as db_session: + logs = get_logs( + job_name=json.loads(recipe_run.message)["job_name"], + source="worker", + recipe_run=recipe_run, + db_session=db_session, + # severity="ERROR", + # limit=1, + ) + assert logs == gcloud_logging_response + + @pytest.mark.asyncio @pytest.mark.parametrize( "get_logs_fixture", From fdb634fd3ac806d486ed8c273ccda4223053321c Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Tue, 4 Oct 2022 16:09:00 -0700 Subject: [PATCH 31/35] setup config for pr 150 review app --- ...geo-ldeo-nsf-earthcube.pforge-pr-150.yaml} | 0 secrets/config.pforge-pr-157.yaml | 26 ------------------- 2 files changed, 26 deletions(-) rename bakeries/{pangeo-ldeo-nsf-earthcube.pforge-pr-157.yaml => pangeo-ldeo-nsf-earthcube.pforge-pr-150.yaml} (100%) delete mode 100644 secrets/config.pforge-pr-157.yaml diff --git a/bakeries/pangeo-ldeo-nsf-earthcube.pforge-pr-157.yaml b/bakeries/pangeo-ldeo-nsf-earthcube.pforge-pr-150.yaml similarity index 100% rename from bakeries/pangeo-ldeo-nsf-earthcube.pforge-pr-157.yaml rename to bakeries/pangeo-ldeo-nsf-earthcube.pforge-pr-150.yaml diff --git a/secrets/config.pforge-pr-157.yaml b/secrets/config.pforge-pr-157.yaml deleted file mode 100644 index 7084d0d8..00000000 --- a/secrets/config.pforge-pr-157.yaml +++ /dev/null @@ -1,26 +0,0 @@ -fastapi: - PANGEO_FORGE_API_KEY: ENC[AES256_GCM,data:MlO5jf+F9maWgxsaIIyUQboOq1uuhFn+6xgS1O/+Fsc=,iv:MY7iXKpKPKrd1xPiSKirSgryvjTKTUISXw7Y0nk2CEQ=,tag:vu3GUt2uI59Q0O9dK2PX1A==,type:str] -github_app: - app_name: ENC[AES256_GCM,data:B33ymW2r5KYsYHk8eg==,iv:8+1eEKt4DoIlpzTZuIqLxFQy1ybIPMn1v+2bI3UjtxU=,tag:MFXVjUpokCkSbNhQEi4Chg==,type:str] - id: ENC[AES256_GCM,data:W1QhkzVO,iv:JnVPFQ4tEDsfdGJjHEqKIxOSSYe5HcP7fVKNeQUUs3A=,tag:zc5YBn+kTjB4Gi9BXlod4g==,type:int] - private_key: ENC[AES256_GCM,data:hZEGGOm41u3NOeK1THdcmcOebvNx6Sw488+xPYVGg4ga/4Ea9+ptd/LSWINdm3NxRTlFjTPv4yy1KhqWO5a/xdEl8kGh0vRMQrE53ODHheHQDs4iu7YUMPFQhr4NzPsVRE0z6oGPJWo5AAvoL4jb5vdhSSfROp/eI/9JIrrtlm2ir1JowzlxsCn6FXo1VB+sj98/r+0y+zApJYc7hoz2kb5hPQiysTPsz9lHv2uyKv1/RcaTc8+mt0NR9vj14sE4rrMh33ltenSRhPXSaWcZzHS8xQofIg6obR3O84PI6yRvQAu9Dsgsz8dieaZCxfI1METMp9TyeN0TZBKllczQYtkXQuE7CBOdyRGb00ehvznNRAGinrdfEt+5OaKYxsuaBbYasELFs2/veLJ4gDx3yisje94GMxb/3jkW20ko66ztQYaEt2hgy2HMCvXqvhRYPghE3wHjS4Nilmnn53WwhSRjam/UvYguqdFJtFgUijFeqWV04sk7fWTjpjDrG9Sqk083IkxLr1lsZ14lkEcyPv3afAcqLdFmFUz/Gmt38WwedBCnJ1cICWN0LyTkqRmxRS3uEgRGzcrjonMaPpeHp+DdXpb5V+2BGdIvRNXgPbil1OJ8dYHEJEO8E3U1aFrH29O1k4/Mbzgy7wtK1nplBkVQfDCRN7kJksi/ANE+2VXDoLxGVmwbuU42A1efZmaRpyRTYz0IQvbhtCB3EJhDklphQZ0y1Zjw3EEWPo9aUcQHPRiarZzEBMPve1Mx52xTIqHeOy+9aht/vuda2My1/Kki5TK5V8Yl5gs/UN/atSkJ2aph57QKNdWbdphbfv+vkLzVpmjPrP9bKltPA1lhnhpcaxnLZgS0Cp42lv6yKJfFL+GLeMvMNRBKSSo9ff17iKkvbywYDgBubfTrfsHTw/KnwED9WDornL+TdEdBhBba7oTmBGurxohFXz6GH3HPtBUc9kQTX94yyLSrAnG1HCZ4etZDVqzbjPIAvfu55o0oDV1zmHJAuOZyd+0P1UQPv3Uiwq1poCiULT8ZQCWyEvYVatHu/euRpNdDR0ESVZSHMFGiViejz5spl8r6p0iN2Ot1KGD3fa4mESluyxADWLbE+g8RRK8O1wjaXFCTpH/I7TtY2XH5LtmZSzf11tqERGzaHsOPLC97QSOwfULnEqeA2302nGEeM/gUZITS8FCh7shdEW10v+hYQ6DYtA7xhb+olSTvqOGptFZREYWCawWE2HUn9Qo9DVP8vnYycPLxwgb4harlKX0EIegm9eGDzbbP02b11GX6bUXkEAcsAW8cI52S7IHU9wzVWbkVaMVID49d5YjRYqr6VzX6DPI3ywf9bvOL9DkX/maFLHX/A1zCntYuxCnuSi+CK2s+mX9nYVj7DwsZeE7JtyxSY+dD/WkYxuzkokGsVXVARcTfPbMtEoam6QHzunBEfIQ7/suqdG1PqkW6qL8r+ATcIGUDU07V2sA4Mlr/DdUnKSMeYW5XgHTVUUVPEWBsHDtrk14HQLNR9VtqLziGQ/nt8mIurcmJM3PwcIr3gkn5FA8hepe9vltCdQ3A7tayYhTW50F200h8Z16KjTpHj/Ryo4sQnDBu/PHeI1kOqRZxXBT2Z58nj/yTEPtPY2TKUyo+xDb6szm8LbI9a3I936ia/CiE7yXyvGHx779aT1BNMZszmmC08PSqnffm6gVgUYf/bQqWOPcByUhvYWmo3J3cltMm2FyS8EPqHxWW7Ty0inkLrPupPwjhNA/z+IWSCbsNEPDMFzc4giEk4dsH1fs01nTd4IMN6P/YL5SMxiPNZ+Nj8/NdKOhxDJxRbQywDl5nVFl9N8CgnDjA/X5aYBi7rR9YxcWPHdp9morX9/PZH3fYrtdFTIXQQinBSli/dfrezM0J14d3RFTK0nlJnISL4kIr2EgsE0Wm+REn7cMRkuzvjbREk7ui4UscJ9vPwhC4+oJhFYHEcA1yOaE7B+VnfGcb3Y9j+j8nggcqTB0RhgrgBVnTH2cg84flHJqtsv2AiPV5U+EtEpOZW8/6Hs6eDJjMOX70E4V2BADPj26/iGTRcjZgPXJwFLwox10jQeMass6xRCu4ubk3UlvPzNhZ7s+jZy9vLN4yjG87TbEUkc2/LLHsKG2PmbI+43igRI09QkKTxX01uSXBsUFtkeIuGV4BXJ0flATCsTSAdIBZuyDlgs6XV/U+fMKgkK+a4offxBRlyuOecEvUtNCuJspXVeM=,iv:pwdazy/432hwXNu4CIuuCX6XaswQDbjUR78Tcxluec8=,tag:Px5FRLUGuEAekC74dezJdg==,type:str] - webhook_secret: ENC[AES256_GCM,data:XDJoW74WAEvKIpOjggjzMPkhlZklilf2YIlvul1KK7eDZUnAiJ1B0w==,iv:z/QZI8WcIvHBXnbE6kyp3vIfFiJYSR+NMI1Xm4sTbBQ=,tag:ukuYWiur+NlHJTSpax+NvA==,type:str] -sops: - kms: - - arn: arn:aws:kms:us-east-1:256317687910:key/d8b153c3-20a9-4364-a553-94405d4c1027 - created_at: "2022-10-03T16:05:31Z" - enc: AQICAHgpH4G+b2ULBcvMucaHVvQi3QdX1B0xlVvF3iFfhxUVOwEOVvZ+Bkb8BSnkr/2qDnaMAAAAfjB8BgkqhkiG9w0BBwagbzBtAgEAMGgGCSqGSIb3DQEHATAeBglghkgBZQMEAS4wEQQMo1r1NiVj8DkbjwX+AgEQgDsXHd10jxFx9KZfHFWDPONRHFP6gcDLytRzixqaOasN4pbyxNgxbXtTRgqYQyZV5UtSanQgwSMSZ9VtMw== - aws_profile: "" - - arn: arn:aws:kms:us-west-1:256317687910:key/0f31de65-bcc9-4fef-b9f5-67ce086f532e - created_at: "2022-10-03T16:05:31Z" - enc: AQICAHiYQfpCkhiXnfU8apZYPITKv6caFhMAr7Hx04ufaTWvvgHw+nXyjHtlMsZ6cHd+RSWBAAAAfjB8BgkqhkiG9w0BBwagbzBtAgEAMGgGCSqGSIb3DQEHATAeBglghkgBZQMEAS4wEQQMkwCxlaOjVIdX3xb7AgEQgDtL1O5koHLP+nlxhd13r27fsZB8Co9sO4x9KmSY6+N39urR3UUp7TV5nOS7D6Uu6FfIPhW4mQNB74Mqsg== - aws_profile: "" - gcp_kms: [] - azure_kv: [] - hc_vault: [] - age: [] - lastmodified: "2022-10-03T16:05:31Z" - mac: ENC[AES256_GCM,data:PLgAP7yVGaq7IXmXxOplwcrLgtEoocmWesFOrqkr+7pXgtZWWr9/hUJNLwBsEz2fD5ma4KZuuFY02ateh2ae3uAgIigoLciIIki5lZ7ijxk+3C0NBaWFIIubuUkKkyNwqLLqzS9kp+TP8qHdXX9txjteDQTsB7k9Fqkx/dEX3RM=,iv:5kAHobhOB4nWJai63P9gNdvbiwsbavSttJSxTDFpWW0=,tag:Ip4rJYPllxCxyFJzljcFsw==,type:str] - pgp: [] - unencrypted_suffix: _unencrypted - version: 3.7.3 From 32ebbff3b2f921fb197cadf5d9fc251ac93cff81 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Tue, 4 Oct 2022 17:01:52 -0700 Subject: [PATCH 32/35] logs route tweaks --- pangeo_forge_orchestrator/routers/logs.py | 17 ++++++++++------- tests/routers/test_logs.py | 12 ++++++------ 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/pangeo_forge_orchestrator/routers/logs.py b/pangeo_forge_orchestrator/routers/logs.py index 8e836ba0..5032feb0 100644 --- a/pangeo_forge_orchestrator/routers/logs.py +++ b/pangeo_forge_orchestrator/routers/logs.py @@ -10,7 +10,7 @@ from starlette import status from ..config import get_config -from ..dependencies import check_authentication_header, get_session as get_database_session +from ..dependencies import get_session as get_database_session from ..models import MODELS logs_router = APIRouter() @@ -71,8 +71,13 @@ def get_logs( recipe_run: SQLModel, db_session: Session, ): - cmd = f"python3 ../bakeries/dataflow/fetch_logs.py {job_name} --source={source}".split() - logs = subprocess.check_output(cmd) + cmd = [ + "python3", + "./pangeo_forge_orchestrator/bakeries/dataflow/fetch_logs.py", + job_name, + f"--source={source}", + ] + logs = subprocess.check_output(cmd).decode("utf-8") # First security check: ensure known bakery secrets do not appear in logs statement = select(MODELS["bakery"].table).where( @@ -82,13 +87,13 @@ def get_logs( bakery_config = get_config().bakeries[bakery.name] bakery_secrets = secret_str_vals_from_basemodel(bakery_config) for secret in bakery_secrets: - if secret in str(logs): + if secret in logs: raise ValueError("Bakery secret detected in logs.") # Second security check: gitleaks with tempfile.TemporaryDirectory() as tmpdir: with open(f"{tmpdir}/c.json", mode="w") as f: - json.dump(logs, f) + json.dump(json.loads(logs), f) gitleaks_cmd = "gitleaks detect --no-git".split() gitleaks = subprocess.run(gitleaks_cmd, cwd=tmpdir) if not gitleaks.returncode == 0: @@ -112,7 +117,6 @@ def recipe_run_from_id( summary="Get job logs for a recipe_run, specified by database id.", tags=["recipe_run", "logs", "admin"], response_class=PlainTextResponse, - dependencies=[Depends(check_authentication_header)], ) async def raw_logs_from_recipe_run_id( id: int, @@ -152,7 +156,6 @@ def recipe_run_from_feedstock_spec_commit_and_recipe_id( summary="Get job logs for a recipe run, specified by feedstock_spec, commit, and recipe_id.", tags=["feedstock", "logs", "admin"], response_class=PlainTextResponse, - dependencies=[Depends(check_authentication_header)], ) async def raw_logs_from_feedstock_spec_commit_and_recipe_id( feedstock_spec: str, diff --git a/tests/routers/test_logs.py b/tests/routers/test_logs.py index 62c540ed..7164a710 100644 --- a/tests/routers/test_logs.py +++ b/tests/routers/test_logs.py @@ -113,7 +113,7 @@ async def get_logs_fixture( feedstock_spec="pangeo-forge/staged-recipes", commit="35d889f7c89e9f0d72353a0649ed1cd8da04826b", recipe_id="liveocean", - gcloud_logging_response="Some logging message from gcloud API.", + gcloud_logging_response=json.dumps(dict(message="logging goes here")), status="completed", conclusion="failure", ), @@ -131,7 +131,7 @@ async def test_get_logs(mocker, get_logs_fixture, async_app_client): ) = get_logs_fixture def mock_fetch_logs_call(cmd: List[str]): - return gcloud_logging_response + return bytes(gcloud_logging_response, encoding="utf-8") mocker.patch.object(subprocess, "check_output", mock_fetch_logs_call) @@ -164,7 +164,7 @@ def mock_fetch_logs_call(cmd: List[str]): feedstock_spec="pangeo-forge/staged-recipes", commit="35d889f7c89e9f0d72353a0649ed1cd8da04826b", recipe_id="liveocean", - gcloud_logging_response="Some logging message from gcloud API.", + gcloud_logging_response=json.dumps(dict(message="logging goes here")), status="completed", conclusion="failure", ), @@ -179,7 +179,7 @@ async def test_get_logs_via_recipe_run_id( admin_headers, gcloud_logging_response, *_ = get_logs_fixture def mock_gcloud_logging_call(cmd: List[str]): - return gcloud_logging_response + return bytes(gcloud_logging_response, encoding="utf-8") mocker.patch.object(subprocess, "check_output", mock_gcloud_logging_call) @@ -200,7 +200,7 @@ def mock_gcloud_logging_call(cmd: List[str]): feedstock_spec="pangeo-forge/staged-recipes", commit="35d889f7c89e9f0d72353a0649ed1cd8da04826b", recipe_id="liveocean", - gcloud_logging_response="Some logging message from gcloud API.", + gcloud_logging_response=json.dumps(dict(message="logging goes here")), status="completed", conclusion="failure", ), @@ -221,7 +221,7 @@ async def test_get_logs_human_readable_method( ) = get_logs_fixture def mock_gcloud_logging_call(cmd: List[str]): - return gcloud_logging_response + return bytes(gcloud_logging_response, encoding="utf-8") mocker.patch.object(subprocess, "check_output", mock_gcloud_logging_call) From 34fb7ee47246ba877f610e5cc9671ade6215ed32 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Tue, 4 Oct 2022 17:14:40 -0700 Subject: [PATCH 33/35] move logs test params into shared list --- tests/routers/test_logs.py | 53 ++++++++++++++------------------------ 1 file changed, 20 insertions(+), 33 deletions(-) diff --git a/tests/routers/test_logs.py b/tests/routers/test_logs.py index 7164a710..b63e7835 100644 --- a/tests/routers/test_logs.py +++ b/tests/routers/test_logs.py @@ -104,20 +104,27 @@ async def get_logs_fixture( clear_database() +gcloud_logging_responses = [ + json.dumps(dict(message="logging goes here")), +] +logs_fixture_indirect_params = [ + dict( + message='{"job_name": "abc"}', + feedstock_spec="pangeo-forge/staged-recipes", + commit="35d889f7c89e9f0d72353a0649ed1cd8da04826b", + recipe_id="liveocean", + gcloud_logging_response=response, + status="completed", + conclusion="failure", + ) + for response in gcloud_logging_responses +] + + @pytest.mark.asyncio @pytest.mark.parametrize( "get_logs_fixture", - [ - dict( - message='{"job_name": "abc"}', - feedstock_spec="pangeo-forge/staged-recipes", - commit="35d889f7c89e9f0d72353a0649ed1cd8da04826b", - recipe_id="liveocean", - gcloud_logging_response=json.dumps(dict(message="logging goes here")), - status="completed", - conclusion="failure", - ), - ], + logs_fixture_indirect_params, indirect=True, ) async def test_get_logs(mocker, get_logs_fixture, async_app_client): @@ -158,17 +165,7 @@ def mock_fetch_logs_call(cmd: List[str]): @pytest.mark.asyncio @pytest.mark.parametrize( "get_logs_fixture", - [ - dict( - message='{"job_name": "abc"}', - feedstock_spec="pangeo-forge/staged-recipes", - commit="35d889f7c89e9f0d72353a0649ed1cd8da04826b", - recipe_id="liveocean", - gcloud_logging_response=json.dumps(dict(message="logging goes here")), - status="completed", - conclusion="failure", - ), - ], + logs_fixture_indirect_params, indirect=True, ) async def test_get_logs_via_recipe_run_id( @@ -194,17 +191,7 @@ def mock_gcloud_logging_call(cmd: List[str]): @pytest.mark.asyncio @pytest.mark.parametrize( "get_logs_fixture", - [ - dict( - message='{"job_name": "abc"}', - feedstock_spec="pangeo-forge/staged-recipes", - commit="35d889f7c89e9f0d72353a0649ed1cd8da04826b", - recipe_id="liveocean", - gcloud_logging_response=json.dumps(dict(message="logging goes here")), - status="completed", - conclusion="failure", - ), - ], + logs_fixture_indirect_params, indirect=True, ) async def test_get_logs_human_readable_method( From f3a081a0971134f2402157b0abe65a2066757664 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Tue, 4 Oct 2022 17:38:04 -0700 Subject: [PATCH 34/35] fix surface secrets func and test it --- pangeo_forge_orchestrator/routers/logs.py | 25 +++++++++++++++-------- tests/conftest.py | 5 ++++- tests/routers/test_logs.py | 21 +++++++++++++++++-- 3 files changed, 39 insertions(+), 12 deletions(-) diff --git a/pangeo_forge_orchestrator/routers/logs.py b/pangeo_forge_orchestrator/routers/logs.py index 5032feb0..e5559942 100644 --- a/pangeo_forge_orchestrator/routers/logs.py +++ b/pangeo_forge_orchestrator/routers/logs.py @@ -47,20 +47,27 @@ def job_name_from_recipe_run(recipe_run: SQLModel) -> str: return job_name -def secret_str_vals_from_basemodel(obj: BaseModel) -> List[str]: +def secret_str_vals_from_basemodel(model: BaseModel) -> List[str]: """From a pydantic BaseModel, recursively surface all fields with type SecretStr.""" + # this list must be defined outside the recursive `surface_secrets` function, + # or else it will be re-assigned to empty when the function recurses + secret_str_vals = [] + def is_pydantic_model(obj): return isinstance(obj, BaseModel) - secret_str_vals = [] - if is_pydantic_model(obj): - for var in vars(obj): - if is_pydantic_model(getattr(obj, var)): - secret_str_vals_from_basemodel(getattr(obj, var)) - elif isinstance(getattr(obj, var), SecretStr): - secret_str_vals.append(json.loads(obj.json())[var]) - + def surface_secrets(model: BaseModel): + if is_pydantic_model(model): + for var in vars(model): + if is_pydantic_model(getattr(model, var)): + surface_secrets(getattr(model, var)) + elif isinstance(getattr(model, var), SecretStr): + secret_str_vals.append(json.loads(model.json())[var]) + else: + pass + + surface_secrets(model) return secret_str_vals diff --git a/tests/conftest.py b/tests/conftest.py index f779f234..e5bfab16 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -158,7 +158,10 @@ def mock_bakeries_config_paths(mock_bakeries_dir): ), TargetStorage=dict( fsspec_class="bar", - fsspec_args={}, + fsspec_args={ + "key": secrets.token_hex(10), + "secret": secrets.token_hex(10), + }, root_path="baz", public_url="https://public-endpoint.org/bucket-name/", ), diff --git a/tests/routers/test_logs.py b/tests/routers/test_logs.py index b63e7835..605a2d89 100644 --- a/tests/routers/test_logs.py +++ b/tests/routers/test_logs.py @@ -1,5 +1,6 @@ import json import subprocess +import uuid from typing import List import pytest @@ -7,13 +8,28 @@ from fastapi import HTTPException from sqlmodel import Session +from pangeo_forge_orchestrator.config import get_config from pangeo_forge_orchestrator.database import engine from pangeo_forge_orchestrator.models import MODELS -from pangeo_forge_orchestrator.routers.logs import get_logs, job_name_from_recipe_run +from pangeo_forge_orchestrator.routers.logs import ( + get_logs, + job_name_from_recipe_run, + secret_str_vals_from_basemodel, +) from ..conftest import clear_database +@pytest.mark.parametrize("bakery_name", ["pangeo-ldeo-nsf-earthcube"]) +def test_secret_str_vals_from_basemodel(bakery_name): + bakery_config = get_config().bakeries[bakery_name] + bakery_secrets = secret_str_vals_from_basemodel(bakery_config) + assert bakery_secrets == [ + json.loads(bakery_config.TargetStorage.fsspec_args.json())["key"], + json.loads(bakery_config.TargetStorage.fsspec_args.json())["secret"], + ] + + @pytest.mark.parametrize( "message, expected_error", [ @@ -105,7 +121,8 @@ async def get_logs_fixture( gcloud_logging_responses = [ - json.dumps(dict(message="logging goes here")), + json.dumps(dict(message="[worker] here's some normal logging with no secrets")), + json.dumps(dict(message=f"[worker] a secret token={uuid.uuid4().hex}")), ] logs_fixture_indirect_params = [ dict( From f1bd920e5d503822a94370cae9e658d8db1c5016 Mon Sep 17 00:00:00 2001 From: Charles Stern <62192187+cisaacstern@users.noreply.github.com> Date: Tue, 4 Oct 2022 17:44:15 -0700 Subject: [PATCH 35/35] add gitleaks to Dockerfile --- Dockerfile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Dockerfile b/Dockerfile index c6db30af..ca2575d7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -8,8 +8,11 @@ RUN tar xzf /go/src/app/v${SOPS_VERSION}.tar.gz -C /go/src/app/ WORKDIR /go/src/app/sops-${SOPS_VERSION} RUN make install +FROM zricethezav/gitleaks:latest + FROM ubuntu:22.04 COPY --from=0 /go/bin/sops /usr/local/bin/sops +COPY --from=1 /usr/bin/gitleaks /usr/local/bin/gitleaks # we need python3.9 because apache beam is not supported on 3.10 # is the best way to get 3.9 on ubuntu? https://askubuntu.com/a/682875 RUN apt-get update \