Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🎉 Source Google Sheets: Support connecting via oAuth webflow #6354

Merged
merged 9 commits into from
Oct 13, 2021
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/publish-command.yml
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,8 @@ jobs:
GOOGLE_SEARCH_CONSOLE_CDK_TEST_CREDS: ${{ secrets.GOOGLE_SEARCH_CONSOLE_CDK_TEST_CREDS }}
GOOGLE_SEARCH_CONSOLE_CDK_TEST_CREDS_SRV_ACC: ${{ secrets.GOOGLE_SEARCH_CONSOLE_CDK_TEST_CREDS_SRV_ACC }}
GOOGLE_SHEETS_TESTS_CREDS: ${{ secrets.GOOGLE_SHEETS_TESTS_CREDS }}
GOOGLE_SHEETS_TESTS_CREDS_SRV_ACC: ${{ secrets.GOOGLE_SHEETS_TESTS_CREDS_SRV_ACC }}
GOOGLE_SHEETS_TESTS_CREDS_OLD: ${{ secrets.GOOGLE_SHEETS_TESTS_CREDS_OLD }}
GOOGLE_WORKSPACE_ADMIN_REPORTS_TEST_CREDS: ${{ secrets.GOOGLE_WORKSPACE_ADMIN_REPORTS_TEST_CREDS }}
GREENHOUSE_TEST_CREDS: ${{ secrets.GREENHOUSE_TEST_CREDS }}
GREENHOUSE_TEST_CREDS_LIMITED: ${{ secrets.GREENHOUSE_TEST_CREDS_LIMITED }}
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/test-command.yml
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,8 @@ jobs:
GOOGLE_SEARCH_CONSOLE_CDK_TEST_CREDS: ${{ secrets.GOOGLE_SEARCH_CONSOLE_CDK_TEST_CREDS }}
GOOGLE_SEARCH_CONSOLE_CDK_TEST_CREDS_SRV_ACC: ${{ secrets.GOOGLE_SEARCH_CONSOLE_CDK_TEST_CREDS_SRV_ACC }}
GOOGLE_SHEETS_TESTS_CREDS: ${{ secrets.GOOGLE_SHEETS_TESTS_CREDS }}
GOOGLE_SHEETS_TESTS_CREDS_SRV_ACC: ${{ secrets.GOOGLE_SHEETS_TESTS_CREDS_SRV_ACC }}
GOOGLE_SHEETS_TESTS_CREDS_OLD: ${{ secrets.GOOGLE_SHEETS_TESTS_CREDS_OLD }}
GOOGLE_WORKSPACE_ADMIN_REPORTS_TEST_CREDS: ${{ secrets.GOOGLE_WORKSPACE_ADMIN_REPORTS_TEST_CREDS }}
GREENHOUSE_TEST_CREDS: ${{ secrets.GREENHOUSE_TEST_CREDS }}
GREENHOUSE_TEST_CREDS_LIMITED: ${{ secrets.GREENHOUSE_TEST_CREDS_LIMITED }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"sourceDefinitionId": "71607ba1-c0ac-4799-8049-7f4b90dd50f7",
"name": "Google Sheets",
"dockerRepository": "airbyte/source-google-sheets",
"dockerImageTag": "0.2.5",
"dockerImageTag": "0.2.6",
"documentationUrl": "https://docs.airbyte.io/integrations/sources/google-sheets",
"icon": "google-sheets.svg"
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,5 @@ RUN pip install .

ENV AIRBYTE_ENTRYPOINT "/airbyte/base.sh"

LABEL io.airbyte.version=0.2.5
LABEL io.airbyte.version=0.2.6
LABEL io.airbyte.name=airbyte/source-google-sheets
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,20 @@ tests:
connection:
- config_path: "secrets/config.json"
status: "succeed"
- config_path: "secrets/service_config.json"
status: "succeed"
- config_path: "secrets/old_config.json"
status: "succeed"
- config_path: "integration_tests/invalid_config.json"
status: "failed"
status: "exception"
discovery:
- config_path: "secrets/config.json"
- config_path: "secrets/service_config.json"
basic_read:
# Sometimes test could fail (on weekends) because transactions could temporary disappear from Paypal Sandbox account
- config_path: "secrets/config.json"
- config_path: "secrets/service_config.json"
configured_catalog_path: "integration_tests/configured_catalog.json"
full_refresh:
- config_path: "secrets/config.json"
- config_path: "secrets/service_config.json"
configured_catalog_path: "integration_tests/configured_catalog.json"


Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def __init__(self):
def check(self, logger: AirbyteLogger, config: json) -> AirbyteConnectionStatus:
# Check involves verifying that the specified spreadsheet is reachable with our credentials.
try:
client = GoogleSheetsClient(json.loads(config["credentials_json"]))
client = GoogleSheetsClient(self.get_credentials(config))
except Exception as e:
return AirbyteConnectionStatus(status=Status.FAILED, message=f"Please use valid credentials json file. Error: {e}")

Expand Down Expand Up @@ -85,7 +85,7 @@ def check(self, logger: AirbyteLogger, config: json) -> AirbyteConnectionStatus:
return AirbyteConnectionStatus(status=Status.SUCCEEDED)

def discover(self, logger: AirbyteLogger, config: json) -> AirbyteCatalog:
client = GoogleSheetsClient(json.loads(config["credentials_json"]))
client = GoogleSheetsClient(self.get_credentials(config))
spreadsheet_id = config["spreadsheet_id"]
try:
logger.info(f"Running discovery on sheet {spreadsheet_id}")
Expand Down Expand Up @@ -113,7 +113,7 @@ def discover(self, logger: AirbyteLogger, config: json) -> AirbyteCatalog:
def read(
self, logger: AirbyteLogger, config: json, catalog: ConfiguredAirbyteCatalog, state: Dict[str, any]
) -> Generator[AirbyteMessage, None, None]:
client = GoogleSheetsClient(json.loads(config["credentials_json"]))
client = GoogleSheetsClient(self.get_credentials(config))

sheet_to_column_name = Helpers.parse_sheet_and_column_names_from_catalog(catalog)
spreadsheet_id = config["spreadsheet_id"]
Expand Down Expand Up @@ -153,3 +153,12 @@ def read(
if not Helpers.is_row_empty(row) and Helpers.row_contains_relevant_data(row, column_index_to_name.keys()):
yield AirbyteMessage(type=Type.RECORD, record=Helpers.row_data_to_record_message(sheet, row, column_index_to_name))
logger.info(f"Finished syncing spreadsheet {spreadsheet_id}")

@staticmethod
def get_credentials(config):
# backward compatible with old style config
if config.get("credentials_json"):
credentials = {"auth_type": "Service", "service_account_info": config.get("credentials_json")}
return credentials

return config.get("credentials")
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@
# Copyright (c) 2021 Airbyte, Inc., all rights reserved.
#


import json
from collections import defaultdict
from datetime import datetime
from typing import Dict, FrozenSet, Iterable, List

from airbyte_protocol import AirbyteRecordMessage, AirbyteStream, ConfiguredAirbyteCatalog
from base_python import AirbyteLogger
from google.oauth2 import credentials as client_account
from google.oauth2 import service_account
from googleapiclient import discovery

Expand All @@ -30,7 +31,10 @@ def get_authenticated_drive_client(credentials: Dict[str, str], scopes: List[str

@staticmethod
def get_authenticated_google_credentials(credentials: Dict[str, str], scopes: List[str] = SCOPES):
return service_account.Credentials.from_service_account_info(credentials, scopes=scopes)
if credentials.get("auth_type") == "Service":
return service_account.Credentials.from_service_account_info(json.loads(credentials["service_account_info"]), scopes=scopes)
elif credentials.pop("auth_type") == "Client":
return client_account.Credentials.from_authorized_user_info(info=credentials)
Copy link
Contributor

@keu keu Oct 5, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I might be wrong, but what will happen here if the first comparison fail?
so the auth_type is already removed, would it always return None?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, I read it as a two pops, but nevertheless can't we pop auth_type first and then do if/else logic?


@staticmethod
def headers_to_airbyte_stream(logger: AirbyteLogger, sheet_name: str, header_row_values: List[str]) -> AirbyteStream:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,81 @@
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Stripe Source Spec",
"type": "object",
"required": ["spreadsheet_id", "credentials_json"],
"additionalProperties": false,
"required": ["spreadsheet_id"],
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is "credentials" not required?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the idea is to have a backward compatibility with the old-style config, and credentials are still required for auth through the UI

"additionalProperties": true,
"properties": {
"spreadsheet_id": {
"type": "string",
"description": "The ID of the spreadsheet to be replicated."
},
"credentials_json": {
"type": "string",
"description": "The contents of the JSON service account key. See the <a href=\"https://docs.airbyte.io/integrations/sources/google-sheets\">docs</a> for more information on how to generate this key.",
"airbyte_secret": true
"credentials": {
"type": "object",
"oneOf": [
{
"title": "Authenticate via Google (Oauth)",
"type": "object",
"required": [
"auth_type",
"client_id",
"client_secret",
"refresh_token"
],
"properties": {
"auth_type": {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think, auth_type shouldn't be part of specification. It should be recognized in code.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@vitaliizazmic this practice is used in the others connectors, could you point to some code with a new approach?

"type": "string",
"const": "Client"
},
"client_id": {
"title": "Client ID",
"type": "string",
"description": "The Client ID of your developer application",
"airbyte_secret": true
},
"client_secret": {
"title": "Client Secret",
"type": "string",
"description": "The client secret of your developer application",
"airbyte_secret": true
},
"refresh_token": {
"title": "Refresh Token",
"type": "string",
"description": "A refresh token generated using the above client ID and secret",
"airbyte_secret": true
}
}
},
{
"title": "Service Account Key Authentication",
"type": "object",
"required": ["auth_type", "service_account_info"],
"properties": {
"auth_type": {
"type": "string",
"const": "Service"
},
"service_account_info": {
"type": "string",
"description": "The JSON key of the service account to use for authorization",
"examples": [
"{ \"type\": \"service_account\", \"project_id\": YOUR_PROJECT_ID, \"private_key_id\": YOUR_PRIVATE_KEY, ... }"
]
}
}
}
]
}
}
},
"authSpecification": {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What this for?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

based on #6456

"auth_type": "oauth2.0",
"oauth2Specification": {
"rootObject": ["credentials", 0],
"oauthFlowInitParameters": [
["client_id"],
["client_secret"],
["refresh_token"]
]
}
}
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
{
"spreadsheet_id": "randomid",
"credentials_json": "{\"type\": \"service_account\",\"project_id\": \"airbyte-310409\",\"private_key_id\": \"xyz\",\"private_key\": \"-----BEGIN PRIVATE KEY-----\\n ... -----END PRIVATE KEY-----\\n\",\"client_email\": \"[email protected]\",\"client_id\": \"121512124\",\"auth_uri\": \"https://accounts.google.com/o/oauth2/auth\",\"token_uri\": \"https://oauth2.googleapis.com/token\",\"auth_provider_x509_cert_url\": \"https://www.googleapis.com/oauth2/v1/certs\",\"client_x509_cert_url\": \"https://www.googleapis.com/robot/v1/metadata/x509/airbyte%40airbyte-123456.iam.gserviceaccount.com\"}"
"spreadsheet_id": "invalid_spreadsheet_id",
"credentials": {
"auth_type": "Client",
"client_id": "invalid_id",
"client_secret": "invalid_secret",
"refresh_token": "invalid_token"
}
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
{
"spreadsheet_id": "randomid",
"credentials_json": "{\"type\": \"service_account\",\"project_id\": \"airbyte-310409\",\"private_key_id\": \"xyz\",\"private_key\": \"-----BEGIN PRIVATE KEY-----\\n ... -----END PRIVATE KEY-----\\n\",\"client_email\": \"[email protected]\",\"client_id\": \"121512124\",\"auth_uri\": \"https://accounts.google.com/o/oauth2/auth\",\"token_uri\": \"https://oauth2.googleapis.com/token\",\"auth_provider_x509_cert_url\": \"https://www.googleapis.com/oauth2/v1/certs\",\"client_x509_cert_url\": \"https://www.googleapis.com/robot/v1/metadata/x509/airbyte%40airbyte-123456.iam.gserviceaccount.com\"}"
"spreadsheet_id": "random_id",
"credentials": {
"auth_type": "Client",
"client_id": "CLIENT_ID",
"client_secret": "CLIENT_SECRET",
"refresh_token": "CLIENT_REFRESH_TOKEN"
}
}
1 change: 1 addition & 0 deletions docs/integrations/sources/google-sheets.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ The Airbyte UI will ask for two things:

| Version | Date | Pull Request | Subject |
| :------ | :-------- | :----- | :------ |
| 0.2.6 | 2021-09-27 | [6354](https://github.com/airbytehq/airbyte/pull/6354) | Support connecting via Oauth webflow |
| 0.2.5 | 2021-09-12 | [5972](https://github.com/airbytehq/airbyte/pull/5972) | Fix full_refresh test by adding supported_sync_modes to Stream initialization |
| 0.2.4 | 2021-08-05 | [5233](https://github.com/airbytehq/airbyte/pull/5233) | Fix error during listing sheets with diagram only |
| 0.2.3 | 2021-06-09 | [3973](https://github.com/airbytehq/airbyte/pull/3973) | Add AIRBYTE_ENTRYPOINT for Kubernetes support |
Expand Down
2 changes: 2 additions & 0 deletions tools/bin/ci_credentials.sh
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ write_standard_creds source-google-directory "$GOOGLE_DIRECTORY_TEST_CREDS"
write_standard_creds source-google-search-console "$GOOGLE_SEARCH_CONSOLE_CDK_TEST_CREDS"
write_standard_creds source-google-search-console "$GOOGLE_SEARCH_CONSOLE_CDK_TEST_CREDS_SRV_ACC" "service_account_config.json"
write_standard_creds source-google-sheets "$GOOGLE_SHEETS_TESTS_CREDS"
write_standard_creds source-google-sheets "$GOOGLE_SHEETS_TESTS_CREDS_SRV_ACC" "service_config.json"
write_standard_creds source-google-sheets "$GOOGLE_SHEETS_TESTS_CREDS_OLD" "old_config.json"
write_standard_creds source-google-workspace-admin-reports "$GOOGLE_WORKSPACE_ADMIN_REPORTS_TEST_CREDS"
write_standard_creds source-greenhouse "$GREENHOUSE_TEST_CREDS"
write_standard_creds source-greenhouse "$GREENHOUSE_TEST_CREDS_LIMITED" "config_users_only.json"
Expand Down