From 5526276a2278f2a4cd2e96a764f88704de5a4262 Mon Sep 17 00:00:00 2001 From: ykurochkin Date: Fri, 15 Jan 2021 17:35:11 +0200 Subject: [PATCH 1/2] #1571 Issue: adopt Best practice for Google Sheets Source --- .../google_sheets_source.py | 19 +++++++++++-------- .../google_sheets_source/helpers.py | 2 +- .../integration_tests/integration_test.py | 16 ++-------------- 3 files changed, 14 insertions(+), 23 deletions(-) diff --git a/airbyte-integrations/connectors/source-google-sheets/google_sheets_source/google_sheets_source.py b/airbyte-integrations/connectors/source-google-sheets/google_sheets_source/google_sheets_source.py index c6322f78012c0..caa935d6f8932 100644 --- a/airbyte-integrations/connectors/source-google-sheets/google_sheets_source/google_sheets_source.py +++ b/airbyte-integrations/connectors/source-google-sheets/google_sheets_source/google_sheets_source.py @@ -59,7 +59,9 @@ def check(self, logger: AirbyteLogger, config: json) -> AirbyteConnectionStatus: if err.resp.status == status_codes.NOT_FOUND: reason = "Requested spreadsheet was not found." logger.error(f"Formatted error: {reason}") - return AirbyteConnectionStatus(status=Status.FAILED, message=str(reason)) + return AirbyteConnectionStatus( + status=Status.FAILED, message=f"Unable to connect with the provided credentials to spreadsheet. Error: {reason}" + ) return AirbyteConnectionStatus(status=Status.SUCCEEDED) @@ -72,9 +74,12 @@ def discover(self, logger: AirbyteLogger, config: json) -> AirbyteCatalog: sheet_names = [sheet.properties.title for sheet in spreadsheet_metadata.sheets] streams = [] for sheet_name in sheet_names: - header_row_data = Helpers.get_first_row(client, spreadsheet_id, sheet_name) - stream = Helpers.headers_to_airbyte_stream(sheet_name, header_row_data) - streams.append(stream) + try: + header_row_data = Helpers.get_first_row(client, spreadsheet_id, sheet_name) + stream = Helpers.headers_to_airbyte_stream(sheet_name, header_row_data) + streams.append(stream) + except Exception as err: + logger.error(str(err)) return AirbyteCatalog(streams=streams) except errors.HttpError as err: @@ -99,8 +104,7 @@ def read( logger.info(f"Syncing sheet {sheet}") column_index_to_name = sheet_to_column_index_to_name[sheet] row_cursor = 2 # we start syncing past the header row - encountered_blank_row = False - while not encountered_blank_row: + while True: range = f"{sheet}!{row_cursor}:{row_cursor + ROW_BATCH_SIZE}" logger.info(f"Fetching range {range}") row_batch = SpreadsheetValues.parse_obj( @@ -119,8 +123,7 @@ def read( for row in row_values: if Helpers.is_row_empty(row): - encountered_blank_row = True - break + continue elif Helpers.row_contains_relevant_data(row, column_index_to_name.keys()): yield AirbyteMessage(type=Type.RECORD, record=Helpers.row_data_to_record_message(sheet, row, column_index_to_name)) logger.info(f"Finished syncing spreadsheet {spreadsheet_id}") diff --git a/airbyte-integrations/connectors/source-google-sheets/google_sheets_source/helpers.py b/airbyte-integrations/connectors/source-google-sheets/google_sheets_source/helpers.py index 3a00d6010708e..236817efa38d7 100644 --- a/airbyte-integrations/connectors/source-google-sheets/google_sheets_source/helpers.py +++ b/airbyte-integrations/connectors/source-google-sheets/google_sheets_source/helpers.py @@ -99,7 +99,7 @@ def get_first_row(client, spreadsheet_id: str, sheet_name: str) -> List[str]: raise Exception(f"Expected data for exactly one range for sheet {sheet_name}") all_row_data = range_data[0].rowData - if len(all_row_data) != 1: + if not all_row_data or len(all_row_data) != 1: raise Exception(f"Expected data for exactly one row for sheet {sheet_name}") first_row_data = all_row_data[0] diff --git a/airbyte-integrations/connectors/source-google-sheets/integration_tests/integration_test.py b/airbyte-integrations/connectors/source-google-sheets/integration_tests/integration_test.py index 8abc030e0b1f9..be4ff1fa4f547 100644 --- a/airbyte-integrations/connectors/source-google-sheets/integration_tests/integration_test.py +++ b/airbyte-integrations/connectors/source-google-sheets/integration_tests/integration_test.py @@ -28,8 +28,7 @@ from pathlib import Path from typing import Dict -from airbyte_protocol import ConfiguredAirbyteCatalog, ConnectorSpecification -from base_python_test import StandardSourceTestIface +from base_python_test import DefaultStandardSourceTest from google_sheets_source.client import GoogleSheetsClient from google_sheets_source.helpers import Helpers from google_sheets_source.models.spreadsheet import Spreadsheet @@ -42,22 +41,11 @@ ] -class GoogleSheetsSourceStandardTest(StandardSourceTestIface): - def __init__(self): - pass - - def get_spec(self) -> ConnectorSpecification: - raw_spec = pkgutil.get_data(self.__class__.__module__.split(".")[0], "spec.json") - return ConnectorSpecification.parse_obj(json.loads(raw_spec)) - +class GoogleSheetsSourceStandardTest(DefaultStandardSourceTest): def get_config(self) -> object: config = {"credentials_json": json.dumps(self._get_creds()), "spreadsheet_id": self._get_spreadsheet_id()} return config - def get_catalog(self) -> ConfiguredAirbyteCatalog: - raw_catalog = pkgutil.get_data(self.__class__.__module__.split(".")[0], "configured_catalog.json") - return ConfiguredAirbyteCatalog.parse_obj(json.loads(raw_catalog)) - def setup(self) -> None: Path(self._get_tmp_dir()).mkdir(parents=True, exist_ok=True) From a866b11aa8dd3d7620176d77e30f6c84767f4efb Mon Sep 17 00:00:00 2001 From: Sherif Nada Date: Mon, 18 Jan 2021 10:24:16 -0800 Subject: [PATCH 2/2] bump --- .../71607ba1-c0ac-4799-8049-7f4b90dd50f7.json | 2 +- .../init/src/main/resources/seed/source_definitions.yaml | 2 +- airbyte-integrations/connectors/source-google-sheets/Dockerfile | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/71607ba1-c0ac-4799-8049-7f4b90dd50f7.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/71607ba1-c0ac-4799-8049-7f4b90dd50f7.json index 6d415abe86f62..b59e42fc9d0ac 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/71607ba1-c0ac-4799-8049-7f4b90dd50f7.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/71607ba1-c0ac-4799-8049-7f4b90dd50f7.json @@ -2,6 +2,6 @@ "sourceDefinitionId": "71607ba1-c0ac-4799-8049-7f4b90dd50f7", "name": "Google Sheets", "dockerRepository": "airbyte/source-google-sheets", - "dockerImageTag": "0.1.5", + "dockerImageTag": "0.1.6", "documentationUrl": "https://hub.docker.com/repository/docker/airbyte/source-google-sheets" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 7b9ff2f9ab2d2..fdfd3810126bd 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -46,7 +46,7 @@ - sourceDefinitionId: 71607ba1-c0ac-4799-8049-7f4b90dd50f7 name: Google Sheets dockerRepository: airbyte/source-google-sheets - dockerImageTag: 0.1.5 + dockerImageTag: 0.1.6 documentationUrl: https://hub.docker.com/repository/docker/airbyte/source-google-sheets - sourceDefinitionId: 435bb9a5-7887-4809-aa58-28c27df0d7ad name: MySQL diff --git a/airbyte-integrations/connectors/source-google-sheets/Dockerfile b/airbyte-integrations/connectors/source-google-sheets/Dockerfile index 55f8809122818..67cb89a989486 100644 --- a/airbyte-integrations/connectors/source-google-sheets/Dockerfile +++ b/airbyte-integrations/connectors/source-google-sheets/Dockerfile @@ -11,5 +11,5 @@ COPY $CODE_PATH ./$CODE_PATH COPY setup.py ./ RUN pip install . -LABEL io.airbyte.version=0.1.5 +LABEL io.airbyte.version=0.1.6 LABEL io.airbyte.name=airbyte/source-google-sheets