From 51582e8be97c47a01dacb9781a9adbb4fe4f6f77 Mon Sep 17 00:00:00 2001 From: Arthur Galuza Date: Wed, 13 Oct 2021 23:02:22 +0300 Subject: [PATCH] =?UTF-8?q?=F0=9F=8E=89=20Source=20Google=20Sheets:=20Supp?= =?UTF-8?q?ort=20connecting=20via=20oAuth=20webflow=20(#6354)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add service account support * Upd oauth support * Upd auth creds selector --- .github/workflows/publish-command.yml | 2 + .github/workflows/test-command.yml | 2 + .../71607ba1-c0ac-4799-8049-7f4b90dd50f7.json | 2 +- .../source-google-sheets/Dockerfile | 2 +- .../acceptance-test-config.yml | 12 ++-- .../google_sheets_source.py | 15 +++- .../google_sheets_source/helpers.py | 9 ++- .../google_sheets_source/spec.json | 72 +++++++++++++++++-- .../integration_tests/invalid_config.json | 9 ++- .../sample_files/sample_config.json | 9 ++- docs/integrations/sources/google-sheets.md | 26 +++---- tools/bin/ci_credentials.sh | 2 + 12 files changed, 128 insertions(+), 34 deletions(-) diff --git a/.github/workflows/publish-command.yml b/.github/workflows/publish-command.yml index 78e58ca6f754..a5e9b8e4e6c1 100644 --- a/.github/workflows/publish-command.yml +++ b/.github/workflows/publish-command.yml @@ -113,6 +113,8 @@ jobs: GOOGLE_SEARCH_CONSOLE_CDK_TEST_CREDS: ${{ secrets.GOOGLE_SEARCH_CONSOLE_CDK_TEST_CREDS }} GOOGLE_SEARCH_CONSOLE_CDK_TEST_CREDS_SRV_ACC: ${{ secrets.GOOGLE_SEARCH_CONSOLE_CDK_TEST_CREDS_SRV_ACC }} GOOGLE_SHEETS_TESTS_CREDS: ${{ secrets.GOOGLE_SHEETS_TESTS_CREDS }} + GOOGLE_SHEETS_TESTS_CREDS_SRV_ACC: ${{ secrets.GOOGLE_SHEETS_TESTS_CREDS_SRV_ACC }} + GOOGLE_SHEETS_TESTS_CREDS_OLD: ${{ secrets.GOOGLE_SHEETS_TESTS_CREDS_OLD }} GOOGLE_WORKSPACE_ADMIN_REPORTS_TEST_CREDS: ${{ secrets.GOOGLE_WORKSPACE_ADMIN_REPORTS_TEST_CREDS }} GREENHOUSE_TEST_CREDS: ${{ secrets.GREENHOUSE_TEST_CREDS }} GREENHOUSE_TEST_CREDS_LIMITED: ${{ secrets.GREENHOUSE_TEST_CREDS_LIMITED }} diff --git a/.github/workflows/test-command.yml b/.github/workflows/test-command.yml index 3d76ab12e8d5..31c5d3cba9f9 100644 --- a/.github/workflows/test-command.yml +++ b/.github/workflows/test-command.yml @@ -108,6 +108,8 @@ jobs: GOOGLE_SEARCH_CONSOLE_CDK_TEST_CREDS: ${{ secrets.GOOGLE_SEARCH_CONSOLE_CDK_TEST_CREDS }} GOOGLE_SEARCH_CONSOLE_CDK_TEST_CREDS_SRV_ACC: ${{ secrets.GOOGLE_SEARCH_CONSOLE_CDK_TEST_CREDS_SRV_ACC }} GOOGLE_SHEETS_TESTS_CREDS: ${{ secrets.GOOGLE_SHEETS_TESTS_CREDS }} + GOOGLE_SHEETS_TESTS_CREDS_SRV_ACC: ${{ secrets.GOOGLE_SHEETS_TESTS_CREDS_SRV_ACC }} + GOOGLE_SHEETS_TESTS_CREDS_OLD: ${{ secrets.GOOGLE_SHEETS_TESTS_CREDS_OLD }} GOOGLE_WORKSPACE_ADMIN_REPORTS_TEST_CREDS: ${{ secrets.GOOGLE_WORKSPACE_ADMIN_REPORTS_TEST_CREDS }} GREENHOUSE_TEST_CREDS: ${{ secrets.GREENHOUSE_TEST_CREDS }} GREENHOUSE_TEST_CREDS_LIMITED: ${{ secrets.GREENHOUSE_TEST_CREDS_LIMITED }} diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/71607ba1-c0ac-4799-8049-7f4b90dd50f7.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/71607ba1-c0ac-4799-8049-7f4b90dd50f7.json index 91f1b055ba4b..1516a7c8d95b 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/71607ba1-c0ac-4799-8049-7f4b90dd50f7.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/71607ba1-c0ac-4799-8049-7f4b90dd50f7.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "71607ba1-c0ac-4799-8049-7f4b90dd50f7", "name": "Google Sheets", "dockerRepository": "airbyte/source-google-sheets", - "dockerImageTag": "0.2.5", + "dockerImageTag": "0.2.6", "documentationUrl": "https://docs.airbyte.io/integrations/sources/google-sheets", "icon": "google-sheets.svg" } diff --git a/airbyte-integrations/connectors/source-google-sheets/Dockerfile b/airbyte-integrations/connectors/source-google-sheets/Dockerfile index f4a0a34fe83e..cc933146063b 100644 --- a/airbyte-integrations/connectors/source-google-sheets/Dockerfile +++ b/airbyte-integrations/connectors/source-google-sheets/Dockerfile @@ -13,5 +13,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "/airbyte/base.sh" -LABEL io.airbyte.version=0.2.5 +LABEL io.airbyte.version=0.2.6 LABEL io.airbyte.name=airbyte/source-google-sheets diff --git a/airbyte-integrations/connectors/source-google-sheets/acceptance-test-config.yml b/airbyte-integrations/connectors/source-google-sheets/acceptance-test-config.yml index 0c30d0a54f49..000110eb08bf 100644 --- a/airbyte-integrations/connectors/source-google-sheets/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-google-sheets/acceptance-test-config.yml @@ -7,16 +7,20 @@ tests: connection: - config_path: "secrets/config.json" status: "succeed" + - config_path: "secrets/service_config.json" + status: "succeed" + - config_path: "secrets/old_config.json" + status: "succeed" - config_path: "integration_tests/invalid_config.json" - status: "failed" + status: "exception" discovery: - - config_path: "secrets/config.json" + - config_path: "secrets/service_config.json" basic_read: # Sometimes test could fail (on weekends) because transactions could temporary disappear from Paypal Sandbox account - - config_path: "secrets/config.json" + - config_path: "secrets/service_config.json" configured_catalog_path: "integration_tests/configured_catalog.json" full_refresh: - - config_path: "secrets/config.json" + - config_path: "secrets/service_config.json" configured_catalog_path: "integration_tests/configured_catalog.json" diff --git a/airbyte-integrations/connectors/source-google-sheets/google_sheets_source/google_sheets_source.py b/airbyte-integrations/connectors/source-google-sheets/google_sheets_source/google_sheets_source.py index e4df0541f54b..91b4339683d8 100644 --- a/airbyte-integrations/connectors/source-google-sheets/google_sheets_source/google_sheets_source.py +++ b/airbyte-integrations/connectors/source-google-sheets/google_sheets_source/google_sheets_source.py @@ -30,7 +30,7 @@ def __init__(self): def check(self, logger: AirbyteLogger, config: json) -> AirbyteConnectionStatus: # Check involves verifying that the specified spreadsheet is reachable with our credentials. try: - client = GoogleSheetsClient(json.loads(config["credentials_json"])) + client = GoogleSheetsClient(self.get_credentials(config)) except Exception as e: return AirbyteConnectionStatus(status=Status.FAILED, message=f"Please use valid credentials json file. Error: {e}") @@ -85,7 +85,7 @@ def check(self, logger: AirbyteLogger, config: json) -> AirbyteConnectionStatus: return AirbyteConnectionStatus(status=Status.SUCCEEDED) def discover(self, logger: AirbyteLogger, config: json) -> AirbyteCatalog: - client = GoogleSheetsClient(json.loads(config["credentials_json"])) + client = GoogleSheetsClient(self.get_credentials(config)) spreadsheet_id = config["spreadsheet_id"] try: logger.info(f"Running discovery on sheet {spreadsheet_id}") @@ -113,7 +113,7 @@ def discover(self, logger: AirbyteLogger, config: json) -> AirbyteCatalog: def read( self, logger: AirbyteLogger, config: json, catalog: ConfiguredAirbyteCatalog, state: Dict[str, any] ) -> Generator[AirbyteMessage, None, None]: - client = GoogleSheetsClient(json.loads(config["credentials_json"])) + client = GoogleSheetsClient(self.get_credentials(config)) sheet_to_column_name = Helpers.parse_sheet_and_column_names_from_catalog(catalog) spreadsheet_id = config["spreadsheet_id"] @@ -153,3 +153,12 @@ def read( if not Helpers.is_row_empty(row) and Helpers.row_contains_relevant_data(row, column_index_to_name.keys()): yield AirbyteMessage(type=Type.RECORD, record=Helpers.row_data_to_record_message(sheet, row, column_index_to_name)) logger.info(f"Finished syncing spreadsheet {spreadsheet_id}") + + @staticmethod + def get_credentials(config): + # backward compatible with old style config + if config.get("credentials_json"): + credentials = {"auth_type": "Service", "service_account_info": config.get("credentials_json")} + return credentials + + return config.get("credentials") diff --git a/airbyte-integrations/connectors/source-google-sheets/google_sheets_source/helpers.py b/airbyte-integrations/connectors/source-google-sheets/google_sheets_source/helpers.py index ba6e0059d074..6afcb134084c 100644 --- a/airbyte-integrations/connectors/source-google-sheets/google_sheets_source/helpers.py +++ b/airbyte-integrations/connectors/source-google-sheets/google_sheets_source/helpers.py @@ -2,13 +2,14 @@ # Copyright (c) 2021 Airbyte, Inc., all rights reserved. # - +import json from collections import defaultdict from datetime import datetime from typing import Dict, FrozenSet, Iterable, List from airbyte_protocol import AirbyteRecordMessage, AirbyteStream, ConfiguredAirbyteCatalog from base_python import AirbyteLogger +from google.oauth2 import credentials as client_account from google.oauth2 import service_account from googleapiclient import discovery @@ -30,7 +31,11 @@ def get_authenticated_drive_client(credentials: Dict[str, str], scopes: List[str @staticmethod def get_authenticated_google_credentials(credentials: Dict[str, str], scopes: List[str] = SCOPES): - return service_account.Credentials.from_service_account_info(credentials, scopes=scopes) + auth_type = credentials.pop("auth_type") + if auth_type == "Service": + return service_account.Credentials.from_service_account_info(json.loads(credentials["service_account_info"]), scopes=scopes) + elif auth_type == "Client": + return client_account.Credentials.from_authorized_user_info(info=credentials) @staticmethod def headers_to_airbyte_stream(logger: AirbyteLogger, sheet_name: str, header_row_values: List[str]) -> AirbyteStream: diff --git a/airbyte-integrations/connectors/source-google-sheets/google_sheets_source/spec.json b/airbyte-integrations/connectors/source-google-sheets/google_sheets_source/spec.json index 23dcc4a8221c..a324bd130a8f 100644 --- a/airbyte-integrations/connectors/source-google-sheets/google_sheets_source/spec.json +++ b/airbyte-integrations/connectors/source-google-sheets/google_sheets_source/spec.json @@ -4,18 +4,78 @@ "$schema": "http://json-schema.org/draft-07/schema#", "title": "Stripe Source Spec", "type": "object", - "required": ["spreadsheet_id", "credentials_json"], - "additionalProperties": false, + "required": ["spreadsheet_id"], + "additionalProperties": true, "properties": { "spreadsheet_id": { "type": "string", "description": "The ID of the spreadsheet to be replicated." }, - "credentials_json": { - "type": "string", - "description": "The contents of the JSON service account key. See the docs for more information on how to generate this key.", - "airbyte_secret": true + "credentials": { + "type": "object", + "oneOf": [ + { + "title": "Authenticate via Google (Oauth)", + "type": "object", + "required": [ + "auth_type", + "client_id", + "client_secret", + "refresh_token" + ], + "properties": { + "auth_type": { + "type": "string", + "const": "Client" + }, + "client_id": { + "title": "Client ID", + "type": "string", + "description": "The Client ID of your developer application", + "airbyte_secret": true + }, + "client_secret": { + "title": "Client Secret", + "type": "string", + "description": "The client secret of your developer application", + "airbyte_secret": true + }, + "refresh_token": { + "title": "Refresh Token", + "type": "string", + "description": "A refresh token generated using the above client ID and secret", + "airbyte_secret": true + } + } + }, + { + "title": "Service Account Key Authentication", + "type": "object", + "required": ["auth_type", "service_account_info"], + "properties": { + "auth_type": { + "type": "string", + "const": "Service" + }, + "service_account_info": { + "type": "string", + "description": "The JSON key of the service account to use for authorization", + "examples": [ + "{ \"type\": \"service_account\", \"project_id\": YOUR_PROJECT_ID, \"private_key_id\": YOUR_PRIVATE_KEY, ... }" + ] + } + } + } + ] } } + }, + "authSpecification": { + "auth_type": "oauth2.0", + "oauth2Specification": { + "rootObject": ["credentials", 0], + "oauthFlowInitParameters": [["client_id"], ["client_secret"]], + "oauthFlowOutputParameters": [["refresh_token"]] + } } } diff --git a/airbyte-integrations/connectors/source-google-sheets/integration_tests/invalid_config.json b/airbyte-integrations/connectors/source-google-sheets/integration_tests/invalid_config.json index 3313d848ee4c..fc1c0d91cf39 100644 --- a/airbyte-integrations/connectors/source-google-sheets/integration_tests/invalid_config.json +++ b/airbyte-integrations/connectors/source-google-sheets/integration_tests/invalid_config.json @@ -1,4 +1,9 @@ { - "spreadsheet_id": "randomid", - "credentials_json": "{\"type\": \"service_account\",\"project_id\": \"airbyte-310409\",\"private_key_id\": \"xyz\",\"private_key\": \"-----BEGIN PRIVATE KEY-----\\n ... -----END PRIVATE KEY-----\\n\",\"client_email\": \"airbyte@airbyte-123456.iam.gserviceaccount.com\",\"client_id\": \"121512124\",\"auth_uri\": \"https://accounts.google.com/o/oauth2/auth\",\"token_uri\": \"https://oauth2.googleapis.com/token\",\"auth_provider_x509_cert_url\": \"https://www.googleapis.com/oauth2/v1/certs\",\"client_x509_cert_url\": \"https://www.googleapis.com/robot/v1/metadata/x509/airbyte%40airbyte-123456.iam.gserviceaccount.com\"}" + "spreadsheet_id": "invalid_spreadsheet_id", + "credentials": { + "auth_type": "Client", + "client_id": "invalid_id", + "client_secret": "invalid_secret", + "refresh_token": "invalid_token" + } } diff --git a/airbyte-integrations/connectors/source-google-sheets/sample_files/sample_config.json b/airbyte-integrations/connectors/source-google-sheets/sample_files/sample_config.json index 3313d848ee4c..3bccc0560665 100644 --- a/airbyte-integrations/connectors/source-google-sheets/sample_files/sample_config.json +++ b/airbyte-integrations/connectors/source-google-sheets/sample_files/sample_config.json @@ -1,4 +1,9 @@ { - "spreadsheet_id": "randomid", - "credentials_json": "{\"type\": \"service_account\",\"project_id\": \"airbyte-310409\",\"private_key_id\": \"xyz\",\"private_key\": \"-----BEGIN PRIVATE KEY-----\\n ... -----END PRIVATE KEY-----\\n\",\"client_email\": \"airbyte@airbyte-123456.iam.gserviceaccount.com\",\"client_id\": \"121512124\",\"auth_uri\": \"https://accounts.google.com/o/oauth2/auth\",\"token_uri\": \"https://oauth2.googleapis.com/token\",\"auth_provider_x509_cert_url\": \"https://www.googleapis.com/oauth2/v1/certs\",\"client_x509_cert_url\": \"https://www.googleapis.com/robot/v1/metadata/x509/airbyte%40airbyte-123456.iam.gserviceaccount.com\"}" + "spreadsheet_id": "random_id", + "credentials": { + "auth_type": "Client", + "client_id": "CLIENT_ID", + "client_secret": "CLIENT_SECRET", + "refresh_token": "CLIENT_REFRESH_TOKEN" + } } diff --git a/docs/integrations/sources/google-sheets.md b/docs/integrations/sources/google-sheets.md index bdcb43bd348d..d5ca723e6bb9 100644 --- a/docs/integrations/sources/google-sheets.md +++ b/docs/integrations/sources/google-sheets.md @@ -86,16 +86,16 @@ The Airbyte UI will ask for two things: ## Changelog -| Version | Date | Pull Request | Subject | -| :--- | :--- | :--- | :--- | -| 0.2.5 | 2021-09-12 | [5972](https://github.com/airbytehq/airbyte/pull/5972) | Fix full\_refresh test by adding supported\_sync\_modes to Stream initialization | -| 0.2.4 | 2021-08-05 | [5233](https://github.com/airbytehq/airbyte/pull/5233) | Fix error during listing sheets with diagram only | -| 0.2.3 | 2021-06-09 | [3973](https://github.com/airbytehq/airbyte/pull/3973) | Add AIRBYTE\_ENTRYPOINT for Kubernetes support | -| 0.2.2 | 2021-04-20 | [2994](https://github.com/airbytehq/airbyte/pull/2994) | Formatting spec | -| 0.2.1 | 2021-04-03 | [2726](https://github.com/airbytehq/airbyte/pull/2726) | Fix base connector versioning | -| 0.2.0 | 2021-03-09 | [2238](https://github.com/airbytehq/airbyte/pull/2238) | Protocol allows future/unknown properties | -| 0.1.7 | 2021-01-21 | [1762](https://github.com/airbytehq/airbyte/pull/1762) | Fix issue large spreadsheet | -| 0.1.6 | 2021-01-27 | [1668](https://github.com/airbytehq/airbyte/pull/1668) | Adopt connector best practices | -| 0.1.5 | 2020-12-30 | [1438](https://github.com/airbytehq/airbyte/pull/1438) | Implement backoff | -| 0.1.4 | 2020-11-30 | [1046](https://github.com/airbytehq/airbyte/pull/1046) | Add connectors using an index YAML file | - +| Version | Date | Pull Request | Subject | +| :------ | :-------- | :----- | :------ | +| 0.2.6 | 2021-09-27 | [6354](https://github.com/airbytehq/airbyte/pull/6354) | Support connecting via Oauth webflow | +| 0.2.5 | 2021-09-12 | [5972](https://github.com/airbytehq/airbyte/pull/5972) | Fix full_refresh test by adding supported_sync_modes to Stream initialization | +| 0.2.4 | 2021-08-05 | [5233](https://github.com/airbytehq/airbyte/pull/5233) | Fix error during listing sheets with diagram only | +| 0.2.3 | 2021-06-09 | [3973](https://github.com/airbytehq/airbyte/pull/3973) | Add AIRBYTE_ENTRYPOINT for Kubernetes support | +| 0.2.2 | 2021-04-20 | [2994](https://github.com/airbytehq/airbyte/pull/2994) | Formatting spec | +| 0.2.1 | 2021-04-03 | [2726](https://github.com/airbytehq/airbyte/pull/2726) | Fix base connector versioning | +| 0.2.0 | 2021-03-09 | [2238](https://github.com/airbytehq/airbyte/pull/2238) | Protocol allows future/unknown properties | +| 0.1.7 | 2021-01-21 | [1762](https://github.com/airbytehq/airbyte/pull/1762) | Fix issue large spreadsheet | +| 0.1.6 | 2021-01-27 | [1668](https://github.com/airbytehq/airbyte/pull/1668) | Adopt connector best practices | +| 0.1.5 | 2020-12-30 | [1438](https://github.com/airbytehq/airbyte/pull/1438) | Implement backoff | +| 0.1.4 | 2020-11-30 | [1046](https://github.com/airbytehq/airbyte/pull/1046) | Add connectors using an index YAML file | diff --git a/tools/bin/ci_credentials.sh b/tools/bin/ci_credentials.sh index 0ee602c5c44b..86498ee35a15 100755 --- a/tools/bin/ci_credentials.sh +++ b/tools/bin/ci_credentials.sh @@ -81,6 +81,8 @@ write_standard_creds source-google-directory "$GOOGLE_DIRECTORY_TEST_CREDS" write_standard_creds source-google-search-console "$GOOGLE_SEARCH_CONSOLE_CDK_TEST_CREDS" write_standard_creds source-google-search-console "$GOOGLE_SEARCH_CONSOLE_CDK_TEST_CREDS_SRV_ACC" "service_account_config.json" write_standard_creds source-google-sheets "$GOOGLE_SHEETS_TESTS_CREDS" +write_standard_creds source-google-sheets "$GOOGLE_SHEETS_TESTS_CREDS_SRV_ACC" "service_config.json" +write_standard_creds source-google-sheets "$GOOGLE_SHEETS_TESTS_CREDS_OLD" "old_config.json" write_standard_creds source-google-workspace-admin-reports "$GOOGLE_WORKSPACE_ADMIN_REPORTS_TEST_CREDS" write_standard_creds source-greenhouse "$GREENHOUSE_TEST_CREDS" write_standard_creds source-greenhouse "$GREENHOUSE_TEST_CREDS_LIMITED" "config_users_only.json"