From 15a056b01520748980abe4839407aafa0fa8d2b1 Mon Sep 17 00:00:00 2001 From: alafanechere Date: Thu, 26 Jan 2023 15:51:05 +0100 Subject: [PATCH 1/6] add adoption metrics to the qa report --- .../ci_connector_ops/qa_engine/enrichments.py | 10 +++++- .../ci_connector_ops/qa_engine/inputs.py | 4 --- .../ci_connector_ops/qa_engine/main.py | 17 ++++++---- .../ci_connector_ops/qa_engine/models.py | 3 ++ .../ci_connector_ops/qa_engine/validations.py | 11 +++---- tools/ci_connector_ops/tests/conftest.py | 31 +++++++++++++++++++ .../tests/test_qa_engine/test_enrichments.py | 23 +++++++++----- .../tests/test_qa_engine/test_inputs.py | 4 +-- .../tests/test_qa_engine/test_main.py | 20 +++++++++--- .../tests/test_qa_engine/test_validations.py | 12 ++++--- 10 files changed, 99 insertions(+), 36 deletions(-) create mode 100644 tools/ci_connector_ops/tests/conftest.py diff --git a/tools/ci_connector_ops/ci_connector_ops/qa_engine/enrichments.py b/tools/ci_connector_ops/ci_connector_ops/qa_engine/enrichments.py index e275dcb51d33b..96a2b96b0ce58 100644 --- a/tools/ci_connector_ops/ci_connector_ops/qa_engine/enrichments.py +++ b/tools/ci_connector_ops/ci_connector_ops/qa_engine/enrichments.py @@ -5,7 +5,10 @@ import pandas as pd -def get_enriched_catalog(oss_catalog: pd.DataFrame, cloud_catalog: pd.DataFrame) -> pd.DataFrame: +def get_enriched_catalog( + oss_catalog: pd.DataFrame, + cloud_catalog: pd.DataFrame, + adoption_metrics_per_connector_version: pd.DataFrame) -> pd.DataFrame: """Merge OSS and Cloud catalog in a single dataframe on their definition id. Transformations: - Rename columns to snake case. @@ -15,9 +18,11 @@ def get_enriched_catalog(oss_catalog: pd.DataFrame, cloud_catalog: pd.DataFrame) Enrichments: - is_on_cloud: determined by the merge operation results. - connector_technical_name: built from the docker repository field. airbyte/source-pokeapi -> source-pokeapi. + - Adoptions metrics: add the columns from the adoption_metrics_per_connector_version dataframe. Args: oss_catalog (pd.DataFrame): The open source catalog dataframe. cloud_catalog (pd.DataFrame): The cloud catalog dataframe. + adoption_metrics_per_connector_version (pd.DataFrame): The crowd sourced adoptions metrics. Returns: pd.DataFrame: The enriched catalog. @@ -33,10 +38,13 @@ def get_enriched_catalog(oss_catalog: pd.DataFrame, cloud_catalog: pd.DataFrame) enriched_catalog.columns = enriched_catalog.columns.str.replace( "(?<=[a-z])(?=[A-Z])", "_", regex=True ).str.lower() # column names to snake case + enriched_catalog = enriched_catalog[[c for c in enriched_catalog.columns if "_del" not in c]] enriched_catalog["is_on_cloud"] = enriched_catalog["_merge"] == "both" enriched_catalog = enriched_catalog.drop(columns="_merge") enriched_catalog["connector_name"] = enriched_catalog["name"] enriched_catalog["connector_technical_name"] = enriched_catalog["docker_repository"].str.replace("airbyte/", "") enriched_catalog["connector_version"] = enriched_catalog["docker_image_tag"] enriched_catalog["release_stage"] = enriched_catalog["release_stage"].fillna("unknown") + enriched_catalog = enriched_catalog.merge(adoption_metrics_per_connector_version, how="left", on=["connector_definition_id", "connector_version"]) + enriched_catalog[adoption_metrics_per_connector_version.columns] = enriched_catalog[adoption_metrics_per_connector_version.columns].fillna(0) return enriched_catalog diff --git a/tools/ci_connector_ops/ci_connector_ops/qa_engine/inputs.py b/tools/ci_connector_ops/ci_connector_ops/qa_engine/inputs.py index a3d918496fe98..f42b4541e1691 100644 --- a/tools/ci_connector_ops/ci_connector_ops/qa_engine/inputs.py +++ b/tools/ci_connector_ops/ci_connector_ops/qa_engine/inputs.py @@ -10,7 +10,6 @@ import requests import pandas as pd -from .constants import CLOUD_CATALOG_URL, OSS_CATALOG_URL def fetch_remote_catalog(catalog_url: str) -> pd.DataFrame: """Fetch a combined remote catalog and return a single DataFrame @@ -50,6 +49,3 @@ def fetch_adoption_metrics_per_connector_version() -> pd.DataFrame: "total_syncs_count", "sync_success_rate", ]] - -CLOUD_CATALOG = fetch_remote_catalog(CLOUD_CATALOG_URL) -OSS_CATALOG = fetch_remote_catalog(OSS_CATALOG_URL) diff --git a/tools/ci_connector_ops/ci_connector_ops/qa_engine/main.py b/tools/ci_connector_ops/ci_connector_ops/qa_engine/main.py index a1e623cddb50c..b155b2d828b15 100644 --- a/tools/ci_connector_ops/ci_connector_ops/qa_engine/main.py +++ b/tools/ci_connector_ops/ci_connector_ops/qa_engine/main.py @@ -3,13 +3,18 @@ # -from .constants import GCS_QA_REPORT_PATH -from .enrichments import get_enriched_catalog -from .inputs import CLOUD_CATALOG, OSS_CATALOG -from .validations import get_qa_report +from .constants import CLOUD_CATALOG_URL, GCS_QA_REPORT_PATH, OSS_CATALOG_URL +from . import enrichments, inputs, validations def main(): - enriched_catalog = get_enriched_catalog(OSS_CATALOG, CLOUD_CATALOG) - qa_report = get_qa_report(enriched_catalog) + oss_catalog = inputs.fetch_remote_catalog(OSS_CATALOG_URL) + cloud_catalog = inputs.fetch_remote_catalog(CLOUD_CATALOG_URL) + adoption_metrics_per_connector_version = inputs.fetch_adoption_metrics_per_connector_version() + enriched_catalog = enrichments.get_enriched_catalog( + oss_catalog, + cloud_catalog, + adoption_metrics_per_connector_version + ) + qa_report = validations.get_qa_report(enriched_catalog, len(oss_catalog)) qa_report.to_json(GCS_QA_REPORT_PATH, orient="records") diff --git a/tools/ci_connector_ops/ci_connector_ops/qa_engine/models.py b/tools/ci_connector_ops/ci_connector_ops/qa_engine/models.py index 7fef17500db3c..fed83174d6b25 100644 --- a/tools/ci_connector_ops/ci_connector_ops/qa_engine/models.py +++ b/tools/ci_connector_ops/ci_connector_ops/qa_engine/models.py @@ -32,6 +32,9 @@ class ConnectorQAReport(BaseModel): number_of_connections: int number_of_users: int sync_success_rate: float + total_syncs_count: int + failed_syncs_count: int + succeeded_syncs_count: int class QAReport(BaseModel): connectors_qa_report: List[ConnectorQAReport] diff --git a/tools/ci_connector_ops/ci_connector_ops/qa_engine/validations.py b/tools/ci_connector_ops/ci_connector_ops/qa_engine/validations.py index 8a04e8b91bb8a..c559496ac27d1 100644 --- a/tools/ci_connector_ops/ci_connector_ops/qa_engine/validations.py +++ b/tools/ci_connector_ops/ci_connector_ops/qa_engine/validations.py @@ -7,7 +7,6 @@ import requests from .constants import INAPPROPRIATE_FOR_CLOUD_USE_CONNECTORS -from .inputs import OSS_CATALOG from .models import ConnectorQAReport, QAReport class QAReportGenerationError(Exception): @@ -20,7 +19,7 @@ def url_is_reachable(url: str) -> bool: def is_appropriate_for_cloud_use(definition_id: str) -> bool: return definition_id not in INAPPROPRIATE_FOR_CLOUD_USE_CONNECTORS -def get_qa_report(enriched_catalog: pd.DataFrame) -> pd.DataFrame: +def get_qa_report(enriched_catalog: pd.DataFrame, oss_catalog_length: int) -> pd.DataFrame: """Perform validation steps on top of the enriched catalog. Adds the following columns: - documentation_is_available: @@ -37,6 +36,7 @@ def get_qa_report(enriched_catalog: pd.DataFrame) -> pd.DataFrame: Get the sync success rate of the connections with this connector version from our datawarehouse. Args: enriched_catalog (pd.DataFrame): The enriched catalog. + oss_catalog_length (pd.DataFrame): The length of the OSS catalog, for sanity check. Returns: pd.DataFrame: The final QA report. @@ -47,14 +47,11 @@ def get_qa_report(enriched_catalog: pd.DataFrame) -> pd.DataFrame: # TODO YET TO IMPLEMENT VALIDATIONS qa_report["latest_build_is_successful"] = False # TODO, tracked in https://github.com/airbytehq/airbyte/issues/21720 - qa_report["number_of_connections"] = 0 # TODO, tracked in https://github.com/airbytehq/airbyte/issues/21721 - qa_report["number_of_users"] = 0 # TODO, tracked in https://github.com/airbytehq/airbyte/issues/21721 - qa_report["sync_success_rate"] = .0 # TODO, tracked in https://github.com/airbytehq/airbyte/issues/21721 # Only select dataframe columns defined in the ConnectorQAReport model. qa_report= qa_report[[field.name for field in ConnectorQAReport.__fields__.values()]] # Validate the report structure with pydantic QAReport model. QAReport(connectors_qa_report=qa_report.to_dict(orient="records")) - if len(qa_report) != len(OSS_CATALOG): - raise QAReportGenerationError("The QA report does not contain all the connectors defined in the OSS catalog.") + if len(qa_report) != oss_catalog_length: + raise QAReportGenerationError("The QA report does not contain all the connectors defined in the OSS catalog.") return qa_report diff --git a/tools/ci_connector_ops/tests/conftest.py b/tools/ci_connector_ops/tests/conftest.py new file mode 100644 index 0000000000000..f2cd7a66cd95e --- /dev/null +++ b/tools/ci_connector_ops/tests/conftest.py @@ -0,0 +1,31 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +import pandas as pd +import pytest + +from ci_connector_ops.qa_engine.constants import OSS_CATALOG_URL, CLOUD_CATALOG_URL +from ci_connector_ops.qa_engine.inputs import fetch_remote_catalog + +@pytest.fixture(scope="module") +def oss_catalog(): + return fetch_remote_catalog(OSS_CATALOG_URL) + +@pytest.fixture(scope="module") +def cloud_catalog(): + return fetch_remote_catalog(CLOUD_CATALOG_URL) + +@pytest.fixture(scope="module") +def adoption_metrics_per_connector_version(): + return pd.DataFrame([{ + "connector_definition_id": "dfd88b22-b603-4c3d-aad7-3701784586b1", + "connector_version": "2.0.0", + "number_of_connections": 0, + "number_of_users": 0, + "succeeded_syncs_count": 0, + "failed_syncs_count": 0, + "total_syncs_count": 0, + "sync_success_rate": 0.0, + }]) diff --git a/tools/ci_connector_ops/tests/test_qa_engine/test_enrichments.py b/tools/ci_connector_ops/tests/test_qa_engine/test_enrichments.py index c108c2d612c20..c8013070649bf 100644 --- a/tools/ci_connector_ops/tests/test_qa_engine/test_enrichments.py +++ b/tools/ci_connector_ops/tests/test_qa_engine/test_enrichments.py @@ -8,30 +8,37 @@ import pandas as pd import pytest -from ci_connector_ops.qa_engine import inputs, enrichments +from ci_connector_ops.qa_engine import enrichments + @pytest.fixture -def enriched_catalog() -> pd.DataFrame: - return enrichments.get_enriched_catalog(inputs.OSS_CATALOG, inputs.CLOUD_CATALOG) +def enriched_catalog(oss_catalog, cloud_catalog, adoption_metrics_per_connector_version) -> pd.DataFrame: + return enrichments.get_enriched_catalog(oss_catalog, cloud_catalog, adoption_metrics_per_connector_version) @pytest.fixture def enriched_catalog_columns(enriched_catalog: pd.DataFrame) -> set: return set(enriched_catalog.columns) -def test_merge_performed_correctly(enriched_catalog): - assert len(enriched_catalog) == len(inputs.OSS_CATALOG) +def test_merge_performed_correctly(enriched_catalog, oss_catalog): + assert len(enriched_catalog) == len(oss_catalog) def test_new_columns_are_added(enriched_catalog_columns): expected_new_columns = { "is_on_cloud", "connector_name", "connector_technical_name", - "connector_version" + "connector_version", + "number_of_connections", + "number_of_users", + "succeeded_syncs_count", + "failed_syncs_count", + "total_syncs_count", + "sync_success_rate", } assert expected_new_columns.issubset(enriched_catalog_columns) -def test_no_column_are_removed_and_lowercased(enriched_catalog_columns): - for column in inputs.OSS_CATALOG: +def test_no_column_are_removed_and_lowercased(enriched_catalog_columns, oss_catalog): + for column in oss_catalog: assert re.sub(r"(? pd.DataFrame: def test_main(tmp_path, mocker, dummy_report): output_path = tmp_path / "output.json" mocker.patch.object(main, "GCS_QA_REPORT_PATH", output_path) - mocker.patch.object(main, "get_enriched_catalog") - mocker.patch.object(main, "get_qa_report", mocker.Mock(return_value=dummy_report)) + mocker.patch.object(main, "enrichments") + mocker.patch.object( + main.inputs, + "fetch_remote_catalog", + mocker.Mock(side_effect=["oss", "cloud"])) + mocker.patch.object(main.inputs, "fetch_adoption_metrics_per_connector_version") + mocker.patch.object(main.validations, "get_qa_report", mocker.Mock(return_value=dummy_report)) main.main() - main.get_enriched_catalog.assert_called_with(main.OSS_CATALOG, main.CLOUD_CATALOG) - main.get_qa_report.assert_called_with(main.get_enriched_catalog.return_value) + main.enrichments.get_enriched_catalog.assert_called_with( + "oss", + "cloud", + main.inputs.fetch_adoption_metrics_per_connector_version.return_value + ) + main.validations.get_qa_report.assert_called_with( + main.enrichments.get_enriched_catalog.return_value, + 3 # len of the "oss" string... + ) assert pd.read_json(output_path).to_dict() == dummy_report.to_dict() diff --git a/tools/ci_connector_ops/tests/test_qa_engine/test_validations.py b/tools/ci_connector_ops/tests/test_qa_engine/test_validations.py index c20dc12f4b8a3..f281fe076cb48 100644 --- a/tools/ci_connector_ops/tests/test_qa_engine/test_validations.py +++ b/tools/ci_connector_ops/tests/test_qa_engine/test_validations.py @@ -9,13 +9,17 @@ from ci_connector_ops.qa_engine import inputs, enrichments, models, validations @pytest.fixture -def enriched_catalog() -> pd.DataFrame: - return enrichments.get_enriched_catalog(inputs.OSS_CATALOG, inputs.CLOUD_CATALOG) +def enriched_catalog(oss_catalog, cloud_catalog, adoption_metrics_per_connector_version) -> pd.DataFrame: + return enrichments.get_enriched_catalog( + oss_catalog, + cloud_catalog, + adoption_metrics_per_connector_version + ) @pytest.fixture def qa_report(enriched_catalog, mocker) -> pd.DataFrame: mocker.patch.object(validations, "url_is_reachable", mocker.Mock(return_value=True)) - return validations.get_qa_report(enriched_catalog) + return validations.get_qa_report(enriched_catalog, len(enriched_catalog)) @pytest.fixture def qa_report_columns(qa_report: pd.DataFrame) -> set: @@ -31,4 +35,4 @@ def test_not_null_values_after_validation(qa_report: pd.DataFrame): def test_report_generation_error(enriched_catalog, mocker): mocker.patch.object(validations, "url_is_reachable", mocker.Mock(return_value=True)) with pytest.raises(validations.QAReportGenerationError): - return validations.get_qa_report(enriched_catalog.sample(10)) + return validations.get_qa_report(enriched_catalog.sample(1), 2) From 95dd606f3215236d97a8efbc2b37ce6a4466a8b0 Mon Sep 17 00:00:00 2001 From: alafanechere Date: Fri, 27 Jan 2023 10:48:55 +0100 Subject: [PATCH 2/6] make adoption fields not public --- .../ci_connector_ops/qa_engine/main.py | 4 +- .../ci_connector_ops/qa_engine/models.py | 37 +++++++------ .../ci_connector_ops/qa_engine/outputs.py | 15 ++++++ tools/ci_connector_ops/setup.py | 2 +- tools/ci_connector_ops/tests/conftest.py | 23 ++++++++ .../tests/test_qa_engine/test_main.py | 52 +++++++++---------- .../tests/test_qa_engine/test_outputs.py | 24 +++++++++ 7 files changed, 109 insertions(+), 48 deletions(-) create mode 100644 tools/ci_connector_ops/ci_connector_ops/qa_engine/outputs.py create mode 100644 tools/ci_connector_ops/tests/test_qa_engine/test_outputs.py diff --git a/tools/ci_connector_ops/ci_connector_ops/qa_engine/main.py b/tools/ci_connector_ops/ci_connector_ops/qa_engine/main.py index b155b2d828b15..2eeb77033a1c1 100644 --- a/tools/ci_connector_ops/ci_connector_ops/qa_engine/main.py +++ b/tools/ci_connector_ops/ci_connector_ops/qa_engine/main.py @@ -4,7 +4,7 @@ from .constants import CLOUD_CATALOG_URL, GCS_QA_REPORT_PATH, OSS_CATALOG_URL -from . import enrichments, inputs, validations +from . import enrichments, inputs, validations, outputs def main(): @@ -17,4 +17,4 @@ def main(): adoption_metrics_per_connector_version ) qa_report = validations.get_qa_report(enriched_catalog, len(oss_catalog)) - qa_report.to_json(GCS_QA_REPORT_PATH, orient="records") + outputs.persist_qa_report(qa_report, GCS_QA_REPORT_PATH, public_fields_only=True) diff --git a/tools/ci_connector_ops/ci_connector_ops/qa_engine/models.py b/tools/ci_connector_ops/ci_connector_ops/qa_engine/models.py index fed83174d6b25..b8c4c6daf66a1 100644 --- a/tools/ci_connector_ops/ci_connector_ops/qa_engine/models.py +++ b/tools/ci_connector_ops/ci_connector_ops/qa_engine/models.py @@ -6,7 +6,7 @@ from enum import Enum from typing import List -from pydantic import BaseModel +from pydantic import BaseModel, Field class ConnectorTypeEnum(str, Enum): source = "source" @@ -18,23 +18,26 @@ class ReleaseStageEnum(str, Enum): beta = "beta" generally_available = "generally_available" +PUBLIC_FIELD = Field(..., is_public=True) +PRIVATE_FIELD = Field(..., is_public=False) + class ConnectorQAReport(BaseModel): - connector_type: ConnectorTypeEnum - connector_name: str - connector_technical_name: str - connector_definition_id: str - connector_version: str - release_stage: ReleaseStageEnum - is_on_cloud: bool - is_appropriate_for_cloud_use: bool - latest_build_is_successful: bool - documentation_is_available: bool - number_of_connections: int - number_of_users: int - sync_success_rate: float - total_syncs_count: int - failed_syncs_count: int - succeeded_syncs_count: int + connector_type: ConnectorTypeEnum = PUBLIC_FIELD + connector_name: str = PUBLIC_FIELD + connector_technical_name: str = PUBLIC_FIELD + connector_definition_id: str = PUBLIC_FIELD + connector_version: str = PUBLIC_FIELD + release_stage: ReleaseStageEnum = PUBLIC_FIELD + is_on_cloud: bool = PUBLIC_FIELD + is_appropriate_for_cloud_use: bool = PUBLIC_FIELD + latest_build_is_successful: bool = PUBLIC_FIELD + documentation_is_available: bool = PUBLIC_FIELD + number_of_connections: int = PRIVATE_FIELD + number_of_users: int = PRIVATE_FIELD + sync_success_rate: float = PRIVATE_FIELD + total_syncs_count: int = PRIVATE_FIELD + failed_syncs_count: int = PRIVATE_FIELD + succeeded_syncs_count: int = PRIVATE_FIELD class QAReport(BaseModel): connectors_qa_report: List[ConnectorQAReport] diff --git a/tools/ci_connector_ops/ci_connector_ops/qa_engine/outputs.py b/tools/ci_connector_ops/ci_connector_ops/qa_engine/outputs.py new file mode 100644 index 0000000000000..88253303a30d1 --- /dev/null +++ b/tools/ci_connector_ops/ci_connector_ops/qa_engine/outputs.py @@ -0,0 +1,15 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +import pandas as pd + +from .models import ConnectorQAReport + +def persist_qa_report(qa_report: pd.DataFrame, path: str, public_fields_only: bool =True): + final_fields = [ + field.name for field in ConnectorQAReport.__fields__.values() + if field.field_info.extra["is_public"] or not public_fields_only + ] + qa_report[final_fields].to_json(path, orient="records") diff --git a/tools/ci_connector_ops/setup.py b/tools/ci_connector_ops/setup.py index 1b85d559a3bec..f8dced1a5444c 100644 --- a/tools/ci_connector_ops/setup.py +++ b/tools/ci_connector_ops/setup.py @@ -23,7 +23,7 @@ setup( - version="0.1.6", + version="0.1.7", name="ci_connector_ops", description="Packaged maintained by the connector operations team to perform CI for connectors", author="Airbyte", diff --git a/tools/ci_connector_ops/tests/conftest.py b/tools/ci_connector_ops/tests/conftest.py index f2cd7a66cd95e..3624905dbac30 100644 --- a/tools/ci_connector_ops/tests/conftest.py +++ b/tools/ci_connector_ops/tests/conftest.py @@ -29,3 +29,26 @@ def adoption_metrics_per_connector_version(): "total_syncs_count": 0, "sync_success_rate": 0.0, }]) + +@pytest.fixture +def dummy_qa_report() -> pd.DataFrame: + return pd.DataFrame([ + { + "connector_type": "source", + "connector_name": "test", + "connector_technical_name": "source-test", + "connector_definition_id": "foobar", + "connector_version": "0.0.0", + "release_stage": "alpha", + "is_on_cloud": False, + "is_appropriate_for_cloud_use": True, + "latest_build_is_successful": True, + "documentation_is_available": False, + "number_of_connections": 0, + "number_of_users": 0, + "sync_success_rate": .99, + "total_syncs_count": 0, + "failed_syncs_count": 0, + "succeeded_syncs_count": 0 + } + ]) diff --git a/tools/ci_connector_ops/tests/test_qa_engine/test_main.py b/tools/ci_connector_ops/tests/test_qa_engine/test_main.py index 18a63dfac61a7..7f883faa087cd 100644 --- a/tools/ci_connector_ops/tests/test_qa_engine/test_main.py +++ b/tools/ci_connector_ops/tests/test_qa_engine/test_main.py @@ -3,46 +3,42 @@ # -import pandas as pd -import pytest - from ci_connector_ops.qa_engine import main -@pytest.fixture -def dummy_report() -> pd.DataFrame: - return pd.DataFrame([ - { - "connector_type": "source", - "connector_name": "test", - "docker_image_tag": "0.0.0", - "release_stage": "alpha", - "is_on_cloud": False, - "latest_build_is_successful": False, - "documentation_is_available": False, - "number_of_connections": 0, - "number_of_users": 0, - "sync_success_rate": .99 - } - ]) -def test_main(tmp_path, mocker, dummy_report): - output_path = tmp_path / "output.json" - mocker.patch.object(main, "GCS_QA_REPORT_PATH", output_path) +def test_main(mocker, dummy_qa_report): + mock_oss_catalog = mocker.Mock(__len__=mocker.Mock(return_value=42)) + mock_cloud_catalog = mocker.Mock() + mocker.patch.object(main, "enrichments") + mocker.patch.object(main, "outputs") mocker.patch.object( main.inputs, "fetch_remote_catalog", - mocker.Mock(side_effect=["oss", "cloud"])) + mocker.Mock(side_effect=[mock_oss_catalog, mock_cloud_catalog])) mocker.patch.object(main.inputs, "fetch_adoption_metrics_per_connector_version") - mocker.patch.object(main.validations, "get_qa_report", mocker.Mock(return_value=dummy_report)) + mocker.patch.object(main.validations, "get_qa_report", mocker.Mock(return_value=dummy_qa_report)) + main.main() + + assert main.inputs.fetch_remote_catalog.call_count == 2 + main.inputs.fetch_remote_catalog.assert_has_calls( + [ + mocker.call(main.OSS_CATALOG_URL), + mocker.call(main.CLOUD_CATALOG_URL) + ] + ) main.enrichments.get_enriched_catalog.assert_called_with( - "oss", - "cloud", + mock_oss_catalog, + mock_cloud_catalog, main.inputs.fetch_adoption_metrics_per_connector_version.return_value ) main.validations.get_qa_report.assert_called_with( main.enrichments.get_enriched_catalog.return_value, - 3 # len of the "oss" string... + len(mock_oss_catalog) + ) + main.outputs.persist_qa_report.assert_called_once_with( + dummy_qa_report, + main.GCS_QA_REPORT_PATH, + public_fields_only=True ) - assert pd.read_json(output_path).to_dict() == dummy_report.to_dict() diff --git a/tools/ci_connector_ops/tests/test_qa_engine/test_outputs.py b/tools/ci_connector_ops/tests/test_qa_engine/test_outputs.py new file mode 100644 index 0000000000000..7507b6b5876cd --- /dev/null +++ b/tools/ci_connector_ops/tests/test_qa_engine/test_outputs.py @@ -0,0 +1,24 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +import pandas as pd +import pytest + +from ci_connector_ops.qa_engine import outputs + +@pytest.mark.parametrize("public_fields_only", [True, False]) +def test_persist_qa_report_public_fields_only(tmp_path, dummy_qa_report, public_fields_only): + output_path = tmp_path / "qa_report.json" + outputs.persist_qa_report(dummy_qa_report, output_path, public_fields_only=public_fields_only) + qa_report_from_disk = pd.read_json(output_path) + private_fields = { + field.name for field in outputs.ConnectorQAReport.__fields__.values() + if not field.field_info.extra["is_public"] + } + available_fields = set(qa_report_from_disk.columns) + if public_fields_only: + assert not private_fields.issubset(available_fields) + else: + assert private_fields.issubset(available_fields) From 6d4feb31797ab95b830eb23025f3b53ccc6a9d97 Mon Sep 17 00:00:00 2001 From: alafanechere Date: Fri, 27 Jan 2023 11:05:12 +0100 Subject: [PATCH 3/6] disable GCS persistence --- tools/ci_connector_ops/ci_connector_ops/qa_engine/main.py | 3 +-- tools/ci_connector_ops/tests/test_qa_engine/test_main.py | 6 +----- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/tools/ci_connector_ops/ci_connector_ops/qa_engine/main.py b/tools/ci_connector_ops/ci_connector_ops/qa_engine/main.py index 2eeb77033a1c1..a94dad5109094 100644 --- a/tools/ci_connector_ops/ci_connector_ops/qa_engine/main.py +++ b/tools/ci_connector_ops/ci_connector_ops/qa_engine/main.py @@ -16,5 +16,4 @@ def main(): cloud_catalog, adoption_metrics_per_connector_version ) - qa_report = validations.get_qa_report(enriched_catalog, len(oss_catalog)) - outputs.persist_qa_report(qa_report, GCS_QA_REPORT_PATH, public_fields_only=True) + validations.get_qa_report(enriched_catalog, len(oss_catalog)) diff --git a/tools/ci_connector_ops/tests/test_qa_engine/test_main.py b/tools/ci_connector_ops/tests/test_qa_engine/test_main.py index 7f883faa087cd..23caef04c447e 100644 --- a/tools/ci_connector_ops/tests/test_qa_engine/test_main.py +++ b/tools/ci_connector_ops/tests/test_qa_engine/test_main.py @@ -37,8 +37,4 @@ def test_main(mocker, dummy_qa_report): main.enrichments.get_enriched_catalog.return_value, len(mock_oss_catalog) ) - main.outputs.persist_qa_report.assert_called_once_with( - dummy_qa_report, - main.GCS_QA_REPORT_PATH, - public_fields_only=True - ) + From dd33060fa09101f63b29617792d339868b87275b Mon Sep 17 00:00:00 2001 From: alafanechere Date: Fri, 27 Jan 2023 16:46:56 +0100 Subject: [PATCH 4/6] rename _del to _cloud --- .../ci_connector_ops/qa_engine/enrichments.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/ci_connector_ops/ci_connector_ops/qa_engine/enrichments.py b/tools/ci_connector_ops/ci_connector_ops/qa_engine/enrichments.py index 96a2b96b0ce58..a64008ae28fb1 100644 --- a/tools/ci_connector_ops/ci_connector_ops/qa_engine/enrichments.py +++ b/tools/ci_connector_ops/ci_connector_ops/qa_engine/enrichments.py @@ -33,12 +33,13 @@ def get_enriched_catalog( how="left", on="connector_definition_id", indicator=True, - suffixes=("", "_del"), + suffixes=("", "_cloud"), ) + enriched_catalog.columns = enriched_catalog.columns.str.replace( "(?<=[a-z])(?=[A-Z])", "_", regex=True ).str.lower() # column names to snake case - enriched_catalog = enriched_catalog[[c for c in enriched_catalog.columns if "_del" not in c]] + enriched_catalog = enriched_catalog[[c for c in enriched_catalog.columns if "_cloud" not in c]] enriched_catalog["is_on_cloud"] = enriched_catalog["_merge"] == "both" enriched_catalog = enriched_catalog.drop(columns="_merge") enriched_catalog["connector_name"] = enriched_catalog["name"] From a687435e9e45825bba4258c1dca2e60d025ac646 Mon Sep 17 00:00:00 2001 From: alafanechere Date: Fri, 27 Jan 2023 17:02:15 +0100 Subject: [PATCH 5/6] fix conflicts --- .../ci_connector_ops/qa_engine/models.py | 3 + .../ci_connector_ops/qa_engine/validations.py | 24 ++++++ tools/ci_connector_ops/tests/conftest.py | 5 +- .../tests/test_qa_engine/test_validations.py | 74 ++++++++++++++++++- 4 files changed, 104 insertions(+), 2 deletions(-) diff --git a/tools/ci_connector_ops/ci_connector_ops/qa_engine/models.py b/tools/ci_connector_ops/ci_connector_ops/qa_engine/models.py index b8c4c6daf66a1..eaf60b504dea1 100644 --- a/tools/ci_connector_ops/ci_connector_ops/qa_engine/models.py +++ b/tools/ci_connector_ops/ci_connector_ops/qa_engine/models.py @@ -3,6 +3,7 @@ # +from datetime import datetime from enum import Enum from typing import List @@ -38,6 +39,8 @@ class ConnectorQAReport(BaseModel): total_syncs_count: int = PRIVATE_FIELD failed_syncs_count: int = PRIVATE_FIELD succeeded_syncs_count: int = PRIVATE_FIELD + is_eligible_for_promotion_to_cloud: bool = PUBLIC_FIELD + report_generation_datetime: datetime = PUBLIC_FIELD class QAReport(BaseModel): connectors_qa_report: List[ConnectorQAReport] diff --git a/tools/ci_connector_ops/ci_connector_ops/qa_engine/validations.py b/tools/ci_connector_ops/ci_connector_ops/qa_engine/validations.py index c559496ac27d1..81b32523443b5 100644 --- a/tools/ci_connector_ops/ci_connector_ops/qa_engine/validations.py +++ b/tools/ci_connector_ops/ci_connector_ops/qa_engine/validations.py @@ -3,12 +3,21 @@ # +from datetime import datetime +from typing import Iterable + import pandas as pd import requests from .constants import INAPPROPRIATE_FOR_CLOUD_USE_CONNECTORS from .models import ConnectorQAReport, QAReport +TRUTHY_COLUMNS_TO_BE_ELIGIBLE = [ + "documentation_is_available", + "is_appropriate_for_cloud_use", + "latest_build_is_successful" +] + class QAReportGenerationError(Exception): pass @@ -19,6 +28,14 @@ def url_is_reachable(url: str) -> bool: def is_appropriate_for_cloud_use(definition_id: str) -> bool: return definition_id not in INAPPROPRIATE_FOR_CLOUD_USE_CONNECTORS +def is_eligible_for_promotion_to_cloud(connector_qa_data: pd.Series) -> bool: + if connector_qa_data["is_on_cloud"]: + return False + return all([ + connector_qa_data[col] + for col in TRUTHY_COLUMNS_TO_BE_ELIGIBLE + ]) + def get_qa_report(enriched_catalog: pd.DataFrame, oss_catalog_length: int) -> pd.DataFrame: """Perform validation steps on top of the enriched catalog. Adds the following columns: @@ -48,6 +65,9 @@ def get_qa_report(enriched_catalog: pd.DataFrame, oss_catalog_length: int) -> pd # TODO YET TO IMPLEMENT VALIDATIONS qa_report["latest_build_is_successful"] = False # TODO, tracked in https://github.com/airbytehq/airbyte/issues/21720 + qa_report["is_eligible_for_promotion_to_cloud"] = qa_report.apply(is_eligible_for_promotion_to_cloud, axis="columns") + qa_report["report_generation_datetime"] = datetime.utcnow() + # Only select dataframe columns defined in the ConnectorQAReport model. qa_report= qa_report[[field.name for field in ConnectorQAReport.__fields__.values()]] # Validate the report structure with pydantic QAReport model. @@ -55,3 +75,7 @@ def get_qa_report(enriched_catalog: pd.DataFrame, oss_catalog_length: int) -> pd if len(qa_report) != oss_catalog_length: raise QAReportGenerationError("The QA report does not contain all the connectors defined in the OSS catalog.") return qa_report + +def get_connectors_eligible_for_cloud(qa_report: pd.DataFrame) -> Iterable[ConnectorQAReport]: + for _, row in qa_report[qa_report["is_eligible_for_promotion_to_cloud"]].iterrows(): + yield ConnectorQAReport(**row) diff --git a/tools/ci_connector_ops/tests/conftest.py b/tools/ci_connector_ops/tests/conftest.py index 3624905dbac30..60380444fd376 100644 --- a/tools/ci_connector_ops/tests/conftest.py +++ b/tools/ci_connector_ops/tests/conftest.py @@ -3,6 +3,7 @@ # +from datetime import datetime import pandas as pd import pytest @@ -49,6 +50,8 @@ def dummy_qa_report() -> pd.DataFrame: "sync_success_rate": .99, "total_syncs_count": 0, "failed_syncs_count": 0, - "succeeded_syncs_count": 0 + "succeeded_syncs_count": 0, + "is_eligible_for_promotion_to_cloud": True, + "report_generation_datetime": datetime.utcnow() } ]) diff --git a/tools/ci_connector_ops/tests/test_qa_engine/test_validations.py b/tools/ci_connector_ops/tests/test_qa_engine/test_validations.py index f281fe076cb48..3535700bc0aeb 100644 --- a/tools/ci_connector_ops/tests/test_qa_engine/test_validations.py +++ b/tools/ci_connector_ops/tests/test_qa_engine/test_validations.py @@ -6,7 +6,7 @@ import pandas as pd import pytest -from ci_connector_ops.qa_engine import inputs, enrichments, models, validations +from ci_connector_ops.qa_engine import enrichments, inputs, models, validations @pytest.fixture def enriched_catalog(oss_catalog, cloud_catalog, adoption_metrics_per_connector_version) -> pd.DataFrame: @@ -36,3 +36,75 @@ def test_report_generation_error(enriched_catalog, mocker): mocker.patch.object(validations, "url_is_reachable", mocker.Mock(return_value=True)) with pytest.raises(validations.QAReportGenerationError): return validations.get_qa_report(enriched_catalog.sample(1), 2) + +@pytest.mark.parametrize( + "connector_qa_data, expected_to_be_eligible", + [ + ( + pd.Series({ + "is_on_cloud": False, + "documentation_is_available": True, + "is_appropriate_for_cloud_use": True, + "latest_build_is_successful": True + }), + True + ), + ( + pd.Series({ + "is_on_cloud": True, + "documentation_is_available": True, + "is_appropriate_for_cloud_use": True, + "latest_build_is_successful": True + }), + False + ), + ( + pd.Series({ + "is_on_cloud": True, + "documentation_is_available": False, + "is_appropriate_for_cloud_use": False, + "latest_build_is_successful": False + }), + False + ), + ( + pd.Series({ + "is_on_cloud": False, + "documentation_is_available": False, + "is_appropriate_for_cloud_use": True, + "latest_build_is_successful": True + }), + False + ), + ( + pd.Series({ + "is_on_cloud": False, + "documentation_is_available": True, + "is_appropriate_for_cloud_use": False, + "latest_build_is_successful": True + }), + False + ), + ( + pd.Series({ + "is_on_cloud": False, + "documentation_is_available": True, + "is_appropriate_for_cloud_use": True, + "latest_build_is_successful": False + }), + False + ) + ] +) +def test_is_eligible_for_promotion_to_cloud(connector_qa_data: pd.Series, expected_to_be_eligible: bool): + assert validations.is_eligible_for_promotion_to_cloud(connector_qa_data) == expected_to_be_eligible + +def test_get_connectors_eligible_for_cloud(qa_report: pd.DataFrame): + qa_report["is_eligible_for_promotion_to_cloud"] = True + connectors_eligible_for_cloud = list(validations.get_connectors_eligible_for_cloud(qa_report)) + assert len(qa_report) == len(connectors_eligible_for_cloud) + assert all([c.is_eligible_for_promotion_to_cloud for c in connectors_eligible_for_cloud]) + + qa_report["is_eligible_for_promotion_to_cloud"] = False + connectors_eligible_for_cloud = list(validations.get_connectors_eligible_for_cloud(qa_report)) + assert len(connectors_eligible_for_cloud) == 0 From 6a6396d71d07c61f62426924f530bebf19fc6b28 Mon Sep 17 00:00:00 2001 From: alafanechere Date: Fri, 27 Jan 2023 17:08:03 +0100 Subject: [PATCH 6/6] bump --- tools/ci_connector_ops/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/ci_connector_ops/setup.py b/tools/ci_connector_ops/setup.py index f8dced1a5444c..50cc3f0fe40b3 100644 --- a/tools/ci_connector_ops/setup.py +++ b/tools/ci_connector_ops/setup.py @@ -23,7 +23,7 @@ setup( - version="0.1.7", + version="0.1.8", name="ci_connector_ops", description="Packaged maintained by the connector operations team to perform CI for connectors", author="Airbyte",