Skip to content

Commit

Permalink
Merge branch 'master' into grubberr/11651-source-github
Browse files Browse the repository at this point in the history
  • Loading branch information
grubberr committed Apr 4, 2022
2 parents b76b8d3 + 865400e commit 4c38e00
Show file tree
Hide file tree
Showing 38 changed files with 428 additions and 226 deletions.
3 changes: 3 additions & 0 deletions airbyte-cdk/python/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# Changelog

## 0.1.53
Remove a false positive error logging during the send process.

## 0.1.52
Fix BaseBackoffException constructor

Expand Down
9 changes: 6 additions & 3 deletions airbyte-cdk/python/airbyte_cdk/sources/streams/http/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import os
from abc import ABC, abstractmethod
from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Union
from urllib.error import HTTPError
from urllib.parse import urljoin

import requests
Expand Down Expand Up @@ -294,9 +295,11 @@ def _send(self, request: requests.PreparedRequest, request_kwargs: Mapping[str,
raise DefaultBackoffException(request=request, response=response)
elif self.raise_on_http_errors:
# Raise any HTTP exceptions that happened in case there were unexpected ones
self.logger.error(f"Request raised an error with response: {response.text}")
response.raise_for_status()

try:
response.raise_for_status()
except HTTPError as exc:
self.logger.error(response.text)
raise exc
return response

def _send_request(self, request: requests.PreparedRequest, request_kwargs: Mapping[str, Any]) -> requests.Response:
Expand Down
2 changes: 1 addition & 1 deletion airbyte-cdk/python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

setup(
name="airbyte-cdk",
version="0.1.52",
version="0.1.53",
description="A framework for writing Airbyte Connectors.",
long_description=README,
long_description_content_type="text/markdown",
Expand Down
18 changes: 18 additions & 0 deletions airbyte-cdk/python/unit_tests/sources/streams/http/test_http.py
Original file line number Diff line number Diff line change
Expand Up @@ -424,3 +424,21 @@ def test_using_cache(mocker):
pass

assert parent_stream.cassete.play_count != 0


class AutoFailTrueHttpStream(StubBasicReadHttpStream):
raise_on_http_errors = True


@pytest.mark.parametrize("status_code", range(400, 600))
def test_send_raise_on_http_errors_logs(mocker, status_code):
mocker.patch.object(AutoFailTrueHttpStream, "logger")
mocker.patch.object(AutoFailTrueHttpStream, "should_retry", mocker.Mock(return_value=False))
stream = AutoFailTrueHttpStream()
req = requests.Response()
req.status_code = status_code
mocker.patch.object(requests.Session, "send", return_value=req)
with pytest.raises(requests.exceptions.HTTPError):
response = stream._send_request(req, {})
stream.logger.error.assert_called_with(response.text)
assert response.status_code == status_code
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
- name: BigQuery
destinationDefinitionId: 22f6c74f-5699-40ff-833c-4a879ea40133
dockerRepository: airbyte/destination-bigquery
dockerImageTag: 1.0.1
dockerImageTag: 1.0.2
documentationUrl: https://docs.airbyte.io/integrations/destinations/bigquery
icon: bigquery.svg
resourceRequirements:
Expand All @@ -31,7 +31,7 @@
- name: BigQuery (denormalized typed struct)
destinationDefinitionId: 079d5540-f236-4294-ba7c-ade8fd918496
dockerRepository: airbyte/destination-bigquery-denormalized
dockerImageTag: 0.2.11
dockerImageTag: 0.2.14
documentationUrl: https://docs.airbyte.io/integrations/destinations/bigquery
icon: bigquery.svg
resourceRequirements:
Expand Down Expand Up @@ -203,7 +203,7 @@
- name: S3
destinationDefinitionId: 4816b78f-1489-44c1-9060-4b19d5fa9362
dockerRepository: airbyte/destination-s3
dockerImageTag: 0.2.13
dockerImageTag: 0.3.0
documentationUrl: https://docs.airbyte.io/integrations/destinations/s3
icon: s3.svg
resourceRequirements:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@
supportsDBT: false
supported_destination_sync_modes:
- "append"
- dockerImage: "airbyte/destination-bigquery:1.0.1"
- dockerImage: "airbyte/destination-bigquery:1.0.2"
spec:
documentationUrl: "https://docs.airbyte.io/integrations/destinations/bigquery"
connectionSpecification:
Expand Down Expand Up @@ -398,7 +398,7 @@
- "overwrite"
- "append"
- "append_dedup"
- dockerImage: "airbyte/destination-bigquery-denormalized:0.2.11"
- dockerImage: "airbyte/destination-bigquery-denormalized:0.2.14"
spec:
documentationUrl: "https://docs.airbyte.io/integrations/destinations/bigquery"
connectionSpecification:
Expand Down Expand Up @@ -3469,7 +3469,7 @@
supported_destination_sync_modes:
- "append"
- "overwrite"
- dockerImage: "airbyte/destination-s3:0.2.13"
- dockerImage: "airbyte/destination-s3:0.3.0"
spec:
documentationUrl: "https://docs.airbyte.io/integrations/destinations/s3"
connectionSpecification:
Expand Down Expand Up @@ -3510,7 +3510,7 @@
\ bucket directory"
type: "string"
examples:
- "${NAMESPACE}/${STREAM_NAME}/"
- "${NAMESPACE}/${STREAM_NAME}/${YEAR}_${MONTH}_${DAY}_${EPOCH}_"
order: 3
s3_bucket_region:
title: "S3 Bucket Region"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@
- name: Google Analytics
sourceDefinitionId: eff3616a-f9c3-11eb-9a03-0242ac130003
dockerRepository: airbyte/source-google-analytics-v4
dockerImageTag: 0.1.16
dockerImageTag: 0.1.17
documentationUrl: https://docs.airbyte.io/integrations/sources/google-analytics-v4
icon: google-analytics.svg
sourceType: api
Expand Down Expand Up @@ -411,7 +411,7 @@
- name: LinkedIn Ads
sourceDefinitionId: 137ece28-5434-455c-8f34-69dc3782f451
dockerRepository: airbyte/source-linkedin-ads
dockerImageTag: 0.1.5
dockerImageTag: 0.1.6
documentationUrl: https://docs.airbyte.io/integrations/sources/linkedin-ads
icon: linkedin.svg
sourceType: api
Expand Down
15 changes: 9 additions & 6 deletions airbyte-config/init/src/main/resources/seed/source_specs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2754,7 +2754,7 @@
oauthFlowOutputParameters:
- - "access_token"
- - "refresh_token"
- dockerImage: "airbyte/source-google-analytics-v4:0.1.16"
- dockerImage: "airbyte/source-google-analytics-v4:0.1.17"
spec:
documentationUrl: "https://docs.airbyte.io/integrations/sources/google-analytics-v4"
connectionSpecification:
Expand Down Expand Up @@ -4289,7 +4289,7 @@
path_in_connector_config:
- "credentials"
- "client_secret"
- dockerImage: "airbyte/source-linkedin-ads:0.1.5"
- dockerImage: "airbyte/source-linkedin-ads:0.1.6"
spec:
documentationUrl: "https://docs.airbyte.io/integrations/sources/linkedin-ads"
connectionSpecification:
Expand All @@ -4309,16 +4309,17 @@
examples:
- "2021-05-17"
account_ids:
title: "Account IDs"
title: "Account IDs (Optional)"
type: "array"
description: "Specify the Account IDs separated by space, to pull the data\
\ from. Leave empty, if you want to pull the data from all associated\
\ accounts."
\ accounts. See the <a href=\"https://www.linkedin.com/help/linkedin/answer/a424270/find-linkedin-ads-account-details?lang=en\"\
>official LinkedIn Ads docs</a> for more info."
items:
type: "integer"
default: []
credentials:
title: "Authorization Method"
title: "Authentication *"
type: "object"
oneOf:
- type: "object"
Expand Down Expand Up @@ -4357,7 +4358,9 @@
access_token:
type: "string"
title: "Access Token"
description: "The token value generated using Authentication Code."
description: "The token value generated using the authentication code.\
\ See the <a href=\"https://docs.airbyte.com/integrations/sources/linkedin-ads#authentication\"\
>docs</a> to obtain yours."
airbyte_secret: true
supportsNormalization: false
supportsDBT: false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,5 @@ ENV ENABLE_SENTRY true

COPY --from=build /airbyte /airbyte

LABEL io.airbyte.version=0.2.11
LABEL io.airbyte.version=0.2.14
LABEL io.airbyte.name=airbyte/destination-bigquery-denormalized
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
"additionalProperties": true,
"properties": {
"big_query_client_buffer_size_mb": {
"title": "Google BigQuery client chunk size",
"description": "Google BigQuery client's chunk (buffer) size (MIN = 1, MAX = 15) for each table. The size that will be written by a single RPC. Written data will be buffered and only flushed upon reaching this size or closing the channel. It defaults to 15MiB. Smaller chunk size means less memory consumption, and is recommended for big data sets. For more details refer to the documentation <a href=\"https://googleapis.dev/python/bigquery/latest/generated/google.cloud.bigquery.client.Client.html\">here</a>",
"title": "Google BigQuery Client Chunk Size (Optional)",
"description": "Google BigQuery client's chunk (buffer) size (MIN=1, MAX = 15) for each table. The size that will be written by a single RPC. Written data will be buffered and only flushed upon reaching this size or closing the channel. The default 15MB value is used if not set explicitly. Read more <a href=\"https://googleapis.dev/python/bigquery/latest/generated/google.cloud.bigquery.client.Client.html\">here</a>.",
"type": "integer",
"minimum": 1,
"maximum": 15,
Expand All @@ -22,18 +22,18 @@
},
"project_id": {
"type": "string",
"description": "The GCP project ID for the project containing the target BigQuery dataset.",
"description": "The GCP project ID for the project containing the target BigQuery dataset. Read more <a href=\"https://cloud.google.com/resource-manager/docs/creating-managing-projects#identifying_projects\">here</a>.",
"title": "Project ID"
},
"dataset_id": {
"type": "string",
"description": "Default BigQuery Dataset ID tables are replicated to if the source does not specify a namespace.",
"description": "The default BigQuery Dataset ID that tables are replicated to if the source does not specify a namespace. Read more <a href=\"https://cloud.google.com/bigquery/docs/datasets#create-dataset\">here</a>.",
"title": "Default Dataset ID"
},
"dataset_location": {
"type": "string",
"description": "The location of the dataset. Warning: Changes made after creation will not be applied.",
"title": "Dataset Location",
"description": "The location of the dataset. Warning: Changes made after creation will not be applied. The default \"US\" value is used if not set explicitly. Read more <a href=\"https://cloud.google.com/bigquery/docs/locations\">here</a>.",
"title": "Dataset Location (Optional)",
"default": "US",
"enum": [
"US",
Expand Down Expand Up @@ -71,19 +71,18 @@
},
"credentials_json": {
"type": "string",
"description": "The contents of the JSON service account key. Check out the <a href=\"https://docs.airbyte.io/integrations/destinations/bigquery\">docs</a> if you need help generating this key. Default credentials will be used if this field is left empty.",
"title": "Credentials JSON",
"description": "The contents of the JSON service account key. Check out the <a href=\"https://docs.airbyte.com/integrations/destinations/bigquery#service-account-key\">docs</a> if you need help generating this key. Default credentials will be used if this field is left empty.",
"title": "Service Account Key JSON (Optional)",
"airbyte_secret": true
},
"loading_method": {
"type": "object",
"title": "Loading Method",
"description": "Select the way that data will be uploaded to BigQuery.",
"title": "Loading Method *",
"description": "Loading method used to send select the way data will be uploaded to BigQuery. <br><b>Standard Inserts</b> - Direct uploading using SQL INSERT statements. This method is extremely inefficient and provided only for quick testing. In almost all cases, you should use staging. <br><b>GCS Staging</b> - Writes large batches of records to a file, uploads the file to GCS, then uses <b>COPY INTO table</b> to upload the file. Recommended for most workloads for better speed and scalability. Read more about GCS Staging <a href=\"https://docs.airbyte.com/integrations/destinations/bigquery#gcs-staging\">here</a>.",
"oneOf": [
{
"title": "Standard Inserts",
"additionalProperties": false,
"description": "Direct uploading using streams.",
"required": ["method"],
"properties": {
"method": {
Expand All @@ -95,7 +94,6 @@
{
"title": "GCS Staging",
"additionalProperties": false,
"description": "Writes large batches of records to a file, uploads the file to GCS, then uses <pre>COPY INTO table</pre> to upload the file. Recommended for large production workloads for better speed and scalability.",
"required": [
"method",
"gcs_bucket_name",
Expand All @@ -110,16 +108,17 @@
"gcs_bucket_name": {
"title": "GCS Bucket Name",
"type": "string",
"description": "The name of the GCS bucket.",
"description": "The name of the GCS bucket. Read more <a href=\"https://cloud.google.com/storage/docs/naming-buckets\">here</a>.",
"examples": ["airbyte_sync"]
},
"gcs_bucket_path": {
"title": "GCS Bucket Path",
"description": "Directory under the GCS bucket where data will be written.",
"type": "string",
"examples": ["data_sync/test"]
},
"part_size_mb": {
"title": "Block Size (MB) for GCS multipart upload",
"title": "Block Size (MB) for GCS Multipart Upload (Optional)",
"description": "This is the size of a \"Part\" being buffered in memory. It limits the memory usage when writing. Larger values will allow to upload a bigger files and improve the speed, but consumes more memory. Allowed values: min=5MB, max=525MB Default: 5MB.",
"type": "integer",
"default": 5,
Expand All @@ -129,8 +128,8 @@
},
"keep_files_in_gcs-bucket": {
"type": "string",
"description": "This upload method is supposed to temporary store records in GCS bucket. What do you want to do with data in GCS bucket when migration has finished?",
"title": "GCS tmp files afterward processing",
"description": "This upload method is supposed to temporary store records in GCS bucket. What do you want to do with data in GCS bucket when migration has finished? The default \"Delete all tmp files from GCS\" value is used if not set explicitly.",
"title": "GCS Tmp Files Afterward Processing (Optional)",
"default": "Delete all tmp files from GCS",
"enum": [
"Delete all tmp files from GCS",
Expand All @@ -139,6 +138,7 @@
},
"credential": {
"title": "Credential",
"description": "An HMAC key is a type of credential and can be associated with a service account or a user account in Cloud Storage. Read more <a href=\"https://cloud.google.com/storage/docs/authentication/hmackeys\">here</a>.",
"type": "object",
"oneOf": [
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,5 @@ ENV ENABLE_SENTRY true

COPY --from=build /airbyte /airbyte

LABEL io.airbyte.version=1.0.1
LABEL io.airbyte.version=1.0.2
LABEL io.airbyte.name=airbyte/destination-bigquery
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
},
"project_id": {
"type": "string",
"description": "The GCP project ID for the project containing the target BigQuery dataset. Read more <a href=\"https://cloud.google.com/iam/docs/creating-managing-service-accounts#creating\">here</a>.",
"description": "The GCP project ID for the project containing the target BigQuery dataset. Read more <a href=\"https://cloud.google.com/resource-manager/docs/creating-managing-projects#identifying_projects\">here</a>.",
"title": "Project ID"
},
"dataset_id": {
Expand Down Expand Up @@ -72,7 +72,7 @@
"credentials_json": {
"type": "string",
"description": "The contents of the JSON service account key. Check out the <a href=\"https://docs.airbyte.com/integrations/destinations/bigquery#service-account-key\">docs</a> if you need help generating this key. Default credentials will be used if this field is left empty.",
"title": "Credentials JSON (Optional)",
"title": "Service Account Key JSON (Optional)",
"airbyte_secret": true
},
"transformation_priority": {
Expand Down
2 changes: 1 addition & 1 deletion airbyte-integrations/connectors/destination-s3/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,5 @@ ENV APPLICATION destination-s3

COPY --from=build /airbyte /airbyte

LABEL io.airbyte.version=0.2.13
LABEL io.airbyte.version=0.3.0
LABEL io.airbyte.name=airbyte/destination-s3
Original file line number Diff line number Diff line change
Expand Up @@ -80,14 +80,14 @@ private static Function<ConfiguredAirbyteStream, WriteConfig> toWriteConfig(
final AirbyteStream abStream = stream.getStream();
final String namespace = abStream.getNamespace();
final String streamName = abStream.getName();
final String bucketPath = config.get(BUCKET_PATH_FIELD).asText();
final String outputBucketPath = config.get(BUCKET_PATH_FIELD).asText();
final String customOutputFormat = String.join("/",
bucketPath,
outputBucketPath,
config.has(PATH_FORMAT_FIELD) && !config.get(PATH_FORMAT_FIELD).asText().isBlank() ? config.get(PATH_FORMAT_FIELD).asText()
: S3DestinationConstants.DEFAULT_PATH_FORMAT);
final String outputBucketPath = storageOperations.getBucketObjectPath(namespace, streamName, SYNC_DATETIME, customOutputFormat);
final String fullOutputPath = storageOperations.getBucketObjectPath(namespace, streamName, SYNC_DATETIME, customOutputFormat);
final DestinationSyncMode syncMode = stream.getDestinationSyncMode();
final WriteConfig writeConfig = new WriteConfig(namespace, streamName, outputBucketPath, syncMode);
final WriteConfig writeConfig = new WriteConfig(namespace, streamName, outputBucketPath, fullOutputPath, syncMode);
LOGGER.info("Write config: {}", writeConfig);
return writeConfig;
};
Expand Down Expand Up @@ -139,7 +139,7 @@ private CheckedBiConsumer<AirbyteStreamNameNamespacePair, SerializableBuffer, Ex
writer,
writeConfig.getNamespace(),
writeConfig.getStreamName(),
writeConfig.getOutputBucketPath()));
writeConfig.getFullOutputPath()));
} catch (final Exception e) {
LOGGER.error("Failed to flush and upload buffer to storage:", e);
throw new RuntimeException("Failed to upload buffer to storage", e);
Expand All @@ -153,7 +153,7 @@ private OnCloseFunction onCloseFunction(final BlobStorageOperations storageOpera
if (hasFailed) {
LOGGER.info("Cleaning up destination started for {} streams", writeConfigs.size());
for (final WriteConfig writeConfig : writeConfigs) {
storageOperations.cleanUpBucketObject(writeConfig.getOutputBucketPath(), writeConfig.getStoredFiles());
storageOperations.cleanUpBucketObject(writeConfig.getFullOutputPath(), writeConfig.getStoredFiles());
writeConfig.clearStoredFiles();
}
LOGGER.info("Cleaning up destination completed.");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ public final class S3DestinationConstants {
public static final S3NameTransformer NAME_TRANSFORMER = new S3NameTransformer();
public static final String PART_SIZE_MB_ARG_NAME = "part_size_mb";
public static final int DEFAULT_PART_SIZE_MB = 10;
public static final String DEFAULT_PATH_FORMAT = "${NAMESPACE}/${STREAM_NAME}/";
public static final String DEFAULT_PATH_FORMAT = "${NAMESPACE}/${STREAM_NAME}/${YEAR}_${MONTH}_${DAY}_${EPOCH}_";

private S3DestinationConstants() {}

Expand Down
Loading

1 comment on commit 4c38e00

@github-actions
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SonarQube Report

SonarQube report for Airbyte Connectors Source Github(#11678)

Measures

Name Value Name Value Name Value
Duplicated Blocks 5 Vulnerabilities 0 Security Rating A
Reliability Rating A Lines to Cover 665 Bugs 0
Lines of Code 881 Code Smells 4 Duplicated Lines (%) 4.8
Quality Gate Status OK Coverage 78.3 Blocker Issues 0
Critical Issues 2 Major Issues 2 Minor Issues 0

Detected Issues

Rule File Description Message
python:S3776 (CRITICAL) source_github/streams.py:90 Cognitive Complexity of functions should not be too high Refactor this function to reduce its Cognitive Complexity from 18 to the 15 allowed.
python:S5886 (MAJOR) source_github/streams.py:83 Function return types should be consistent with their type hint Return a value of type "Union[int, float]" instead of "NoneType" or update function "backoff_time" type hint.
python:S5797 (CRITICAL) fixtures/github.py:79 Constants should not be used as conditions Replace this expression; used as a condition it will always be constant.
python:S112 (MAJOR) source_github/source.py:75 "Exception" and "BaseException" should not be raised Replace this generic exception class with a more specific one.

Coverage (78.3%)

File Coverage File Coverage
fixtures/github.py 0.0 fixtures/main.py 0.0
source_github/init.py 100.0 source_github/source.py 73.0
source_github/streams.py 94.0

Please sign in to comment.