-
Notifications
You must be signed in to change notification settings - Fork 4.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
QA engine: add adoption metrics to the QA report #21917
Changes from all commits
15a056b
943a90c
ad0136a
95dd606
6d4feb3
76e3cbf
dd33060
a687435
6a6396d
3b8c2c3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,7 +10,6 @@ | |
import requests | ||
import pandas as pd | ||
|
||
from .constants import CLOUD_CATALOG_URL, OSS_CATALOG_URL | ||
|
||
def fetch_remote_catalog(catalog_url: str) -> pd.DataFrame: | ||
"""Fetch a combined remote catalog and return a single DataFrame | ||
|
@@ -50,6 +49,3 @@ def fetch_adoption_metrics_per_connector_version() -> pd.DataFrame: | |
"total_syncs_count", | ||
"sync_success_rate", | ||
]] | ||
|
||
CLOUD_CATALOG = fetch_remote_catalog(CLOUD_CATALOG_URL) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I removed these constants from the module because they cause unnecessary network call on module import. I preferred to use dependency injections and test fixtures to expose these (direct calls to the |
||
OSS_CATALOG = fetch_remote_catalog(OSS_CATALOG_URL) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,7 +7,7 @@ | |
from enum import Enum | ||
from typing import List | ||
|
||
from pydantic import BaseModel | ||
from pydantic import BaseModel, Field | ||
|
||
class ConnectorTypeEnum(str, Enum): | ||
source = "source" | ||
|
@@ -19,22 +19,28 @@ class ReleaseStageEnum(str, Enum): | |
beta = "beta" | ||
generally_available = "generally_available" | ||
|
||
PUBLIC_FIELD = Field(..., is_public=True) | ||
PRIVATE_FIELD = Field(..., is_public=False) | ||
Comment on lines
+22
to
+23
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice! |
||
|
||
class ConnectorQAReport(BaseModel): | ||
connector_type: ConnectorTypeEnum | ||
connector_name: str | ||
connector_technical_name: str | ||
connector_definition_id: str | ||
connector_version: str | ||
release_stage: ReleaseStageEnum | ||
is_on_cloud: bool | ||
is_appropriate_for_cloud_use: bool | ||
latest_build_is_successful: bool | ||
documentation_is_available: bool | ||
number_of_connections: int | ||
number_of_users: int | ||
sync_success_rate: float | ||
is_eligible_for_promotion_to_cloud: bool | ||
report_generation_datetime: datetime | ||
connector_type: ConnectorTypeEnum = PUBLIC_FIELD | ||
connector_name: str = PUBLIC_FIELD | ||
connector_technical_name: str = PUBLIC_FIELD | ||
connector_definition_id: str = PUBLIC_FIELD | ||
connector_version: str = PUBLIC_FIELD | ||
release_stage: ReleaseStageEnum = PUBLIC_FIELD | ||
is_on_cloud: bool = PUBLIC_FIELD | ||
is_appropriate_for_cloud_use: bool = PUBLIC_FIELD | ||
latest_build_is_successful: bool = PUBLIC_FIELD | ||
documentation_is_available: bool = PUBLIC_FIELD | ||
number_of_connections: int = PRIVATE_FIELD | ||
number_of_users: int = PRIVATE_FIELD | ||
sync_success_rate: float = PRIVATE_FIELD | ||
total_syncs_count: int = PRIVATE_FIELD | ||
failed_syncs_count: int = PRIVATE_FIELD | ||
succeeded_syncs_count: int = PRIVATE_FIELD | ||
is_eligible_for_promotion_to_cloud: bool = PUBLIC_FIELD | ||
report_generation_datetime: datetime = PUBLIC_FIELD | ||
|
||
class QAReport(BaseModel): | ||
connectors_qa_report: List[ConnectorQAReport] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
# | ||
# Copyright (c) 2023 Airbyte, Inc., all rights reserved. | ||
# | ||
|
||
|
||
import pandas as pd | ||
|
||
from .models import ConnectorQAReport | ||
|
||
def persist_qa_report(qa_report: pd.DataFrame, path: str, public_fields_only: bool =True): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is not called as I disabled GCS persistence in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍🏻 I assume we'll usually persist with all of the info, but can use the public-only if we plan to use it in the public repo anywhere |
||
final_fields = [ | ||
field.name for field in ConnectorQAReport.__fields__.values() | ||
if field.field_info.extra["is_public"] or not public_fields_only | ||
] | ||
qa_report[final_fields].to_json(path, orient="records") |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,7 +10,6 @@ | |
import requests | ||
|
||
from .constants import INAPPROPRIATE_FOR_CLOUD_USE_CONNECTORS | ||
from .inputs import OSS_CATALOG | ||
from .models import ConnectorQAReport, QAReport | ||
|
||
TRUTHY_COLUMNS_TO_BE_ELIGIBLE = [ | ||
|
@@ -37,7 +36,8 @@ def is_eligible_for_promotion_to_cloud(connector_qa_data: pd.Series) -> bool: | |
for col in TRUTHY_COLUMNS_TO_BE_ELIGIBLE | ||
]) | ||
|
||
def get_qa_report(enriched_catalog: pd.DataFrame) -> pd.DataFrame: | ||
|
||
def get_qa_report(enriched_catalog: pd.DataFrame, oss_catalog_length: int) -> pd.DataFrame: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Always a fan of bring only the data you need 👍 |
||
"""Perform validation steps on top of the enriched catalog. | ||
Adds the following columns: | ||
- documentation_is_available: | ||
|
@@ -54,6 +54,7 @@ def get_qa_report(enriched_catalog: pd.DataFrame) -> pd.DataFrame: | |
Get the sync success rate of the connections with this connector version from our datawarehouse. | ||
Args: | ||
enriched_catalog (pd.DataFrame): The enriched catalog. | ||
oss_catalog_length (pd.DataFrame): The length of the OSS catalog, for sanity check. | ||
|
||
Returns: | ||
pd.DataFrame: The final QA report. | ||
|
@@ -64,9 +65,9 @@ def get_qa_report(enriched_catalog: pd.DataFrame) -> pd.DataFrame: | |
|
||
# TODO YET TO IMPLEMENT VALIDATIONS | ||
qa_report["latest_build_is_successful"] = False # TODO, tracked in https://github.com/airbytehq/airbyte/issues/21720 | ||
qa_report["number_of_connections"] = 0 # TODO, tracked in https://github.com/airbytehq/airbyte/issues/21721 | ||
qa_report["number_of_users"] = 0 # TODO, tracked in https://github.com/airbytehq/airbyte/issues/21721 | ||
qa_report["sync_success_rate"] = .0 # TODO, tracked in https://github.com/airbytehq/airbyte/issues/21721 | ||
|
||
qa_report["is_eligible_for_promotion_to_cloud"] = qa_report.apply(is_eligible_for_promotion_to_cloud, axis="columns") | ||
qa_report["report_generation_datetime"] = datetime.utcnow() | ||
|
||
qa_report["is_eligible_for_promotion_to_cloud"] = qa_report.apply(is_eligible_for_promotion_to_cloud, axis="columns") | ||
qa_report["report_generation_datetime"] = datetime.utcnow() | ||
|
@@ -75,8 +76,8 @@ def get_qa_report(enriched_catalog: pd.DataFrame) -> pd.DataFrame: | |
qa_report= qa_report[[field.name for field in ConnectorQAReport.__fields__.values()]] | ||
# Validate the report structure with pydantic QAReport model. | ||
QAReport(connectors_qa_report=qa_report.to_dict(orient="records")) | ||
if len(qa_report) != len(OSS_CATALOG): | ||
raise QAReportGenerationError("The QA report does not contain all the connectors defined in the OSS catalog.") | ||
if len(qa_report) != oss_catalog_length: | ||
raise QAReportGenerationError("The QA report does not contain all the connectors defined in the OSS catalog.") | ||
return qa_report | ||
|
||
def get_connectors_eligible_for_cloud(qa_report: pd.DataFrame) -> Iterable[ConnectorQAReport]: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
# | ||
# Copyright (c) 2023 Airbyte, Inc., all rights reserved. | ||
# | ||
|
||
|
||
from datetime import datetime | ||
import pandas as pd | ||
import pytest | ||
|
||
from ci_connector_ops.qa_engine.constants import OSS_CATALOG_URL, CLOUD_CATALOG_URL | ||
from ci_connector_ops.qa_engine.inputs import fetch_remote_catalog | ||
|
||
@pytest.fixture(scope="module") | ||
def oss_catalog(): | ||
return fetch_remote_catalog(OSS_CATALOG_URL) | ||
|
||
@pytest.fixture(scope="module") | ||
def cloud_catalog(): | ||
return fetch_remote_catalog(CLOUD_CATALOG_URL) | ||
|
||
@pytest.fixture(scope="module") | ||
def adoption_metrics_per_connector_version(): | ||
return pd.DataFrame([{ | ||
"connector_definition_id": "dfd88b22-b603-4c3d-aad7-3701784586b1", | ||
"connector_version": "2.0.0", | ||
"number_of_connections": 0, | ||
"number_of_users": 0, | ||
"succeeded_syncs_count": 0, | ||
"failed_syncs_count": 0, | ||
"total_syncs_count": 0, | ||
"sync_success_rate": 0.0, | ||
}]) | ||
|
||
@pytest.fixture | ||
def dummy_qa_report() -> pd.DataFrame: | ||
return pd.DataFrame([ | ||
{ | ||
"connector_type": "source", | ||
"connector_name": "test", | ||
"connector_technical_name": "source-test", | ||
"connector_definition_id": "foobar", | ||
"connector_version": "0.0.0", | ||
"release_stage": "alpha", | ||
"is_on_cloud": False, | ||
"is_appropriate_for_cloud_use": True, | ||
"latest_build_is_successful": True, | ||
"documentation_is_available": False, | ||
"number_of_connections": 0, | ||
"number_of_users": 0, | ||
"sync_success_rate": .99, | ||
"total_syncs_count": 0, | ||
"failed_syncs_count": 0, | ||
"succeeded_syncs_count": 0, | ||
"is_eligible_for_promotion_to_cloud": True, | ||
"report_generation_datetime": datetime.utcnow() | ||
} | ||
]) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: might be nice to move this into a function instead of needing comment to explain what its doing: