diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 268c4c2cba692..56541065c0f4f 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.30.25-alpha +current_version = 0.30.34-alpha commit = False tag = False parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-[a-z]+)? @@ -10,6 +10,12 @@ serialize = [bumpversion:file:.env] +[bumpversion:file:airbyte-server/Dockerfile] + +[bumpversion:file:airbyte-workers/Dockerfile] + +[bumpversion:file:airbyte-scheduler/app/Dockerfile] + [bumpversion:file:airbyte-webapp/package.json] [bumpversion:file:airbyte-webapp/package-lock.json] diff --git a/.env b/.env index afa76ab975b76..eb0f60456990c 100644 --- a/.env +++ b/.env @@ -1,4 +1,4 @@ -VERSION=0.30.25-alpha +VERSION=0.30.34-alpha # Airbyte Internal Job Database, see https://docs.airbyte.io/operator-guides/configuring-airbyte-db DATABASE_USER=docker diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 87f7d9dc3564f..a2505b2a5949b 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -29,7 +29,6 @@ Expand the relevant checklist and delete the others. - [ ] `docs/integrations/README.md` - [ ] `airbyte-integrations/builds.md` - [ ] PR name follows [PR naming conventions](https://docs.airbyte.io/contributing-to-airbyte/updating-documentation#issues-and-pull-requests) -- [ ] Connector added to connector index like described [here](https://docs.airbyte.io/connector-development#publishing-a-connector) #### Airbyter @@ -40,6 +39,8 @@ If this is a community PR, the Airbyte engineer reviewing this PR is responsible - [ ] Credentials added to Github CI. [Instructions](https://docs.airbyte.io/connector-development#using-credentials-in-ci). - [ ] [`/test connector=connectors/` command](https://docs.airbyte.io/connector-development#updating-an-existing-connector) is passing. - [ ] New Connector version released on Dockerhub by running the `/publish` command described [here](https://docs.airbyte.io/connector-development#updating-an-existing-connector) +- [ ] After the connector is published, connector added to connector index as described [here](https://docs.airbyte.io/connector-development#publishing-a-connector) +- [ ] Seed specs have been re-generated by building the platform and committing the changes to the seed spec files, as described [here](https://docs.airbyte.io/connector-development#publishing-a-connector)

@@ -59,7 +60,6 @@ If this is a community PR, the Airbyte engineer reviewing this PR is responsible - [ ] Connector's `bootstrap.md`. See [description and examples](https://docs.google.com/document/d/1ypdgmwmEHWv-TrO4_YOQ7pAJGVrMp5BOkEVh831N260/edit?usp=sharing) - [ ] Changelog updated in `docs/integrations//.md` including changelog. See changelog [example](https://docs.airbyte.io/integrations/sources/stripe#changelog) - [ ] PR name follows [PR naming conventions](https://docs.airbyte.io/contributing-to-airbyte/updating-documentation#issues-and-pull-requests) -- [ ] Connector version bumped like described [here](https://docs.airbyte.io/connector-development#publishing-a-connector) #### Airbyter @@ -70,6 +70,8 @@ If this is a community PR, the Airbyte engineer reviewing this PR is responsible - [ ] Credentials added to Github CI. [Instructions](https://docs.airbyte.io/connector-development#using-credentials-in-ci). - [ ] [`/test connector=connectors/` command](https://docs.airbyte.io/connector-development#updating-an-existing-connector) is passing. - [ ] New Connector version released on Dockerhub by running the `/publish` command described [here](https://docs.airbyte.io/connector-development#updating-an-existing-connector) +- [ ] After the new connector version is published, connector version bumped in the seed directory as described [here](https://docs.airbyte.io/connector-development#publishing-a-connector) +- [ ] Seed specs have been re-generated by building the platform and committing the changes to the seed spec files, as described [here](https://docs.airbyte.io/connector-development#publishing-a-connector)

diff --git a/.github/workflows/doc-link-check.json b/.github/workflows/doc-link-check.json index 6777f5033b01c..a7683d73d66bd 100644 --- a/.github/workflows/doc-link-check.json +++ b/.github/workflows/doc-link-check.json @@ -45,11 +45,15 @@ "reason": "Test only scaffold connector", "pattern": "destinations/scaffold-" }, + { + "reason": "Returns a 403 for many valid pages", + "pattern": "https://mvnrepository.com/artifact/" + }, { "reason": "Archived articles aren't actively maintained.", "pattern": "archive/" } ], "retryOn429": false, - "aliveStatusCodes": [200, 206, 401, 403, 429, 503] + "aliveStatusCodes": [200, 206, 429, 503, 0] } diff --git a/.github/workflows/doc-link-check.yml b/.github/workflows/doc-link-check.yml index 9ccfc86d846bf..1a52e49cd9b07 100644 --- a/.github/workflows/doc-link-check.yml +++ b/.github/workflows/doc-link-check.yml @@ -1,5 +1,6 @@ # Perform link check on all markdown files -name: Doc Link Checker (Full) + +name: Doc Link Checker on: push: @@ -12,10 +13,17 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@master + # check all files on master - uses: gaurav-nelson/github-action-markdown-link-check@v1 + if: github.ref == 'refs/heads/master' with: - use-quiet-mode: 'no' - use-verbose-mode: 'yes' + use-quiet-mode: 'yes' check-modified-files-only: 'no' config-file: .github/workflows/doc-link-check.json - base-branch: ${{ github.base_ref }} +# # check changed files for branches +# - uses: gaurav-nelson/github-action-markdown-link-check@v1 +# if: github.ref != 'refs/heads/master' +# with: +# use-quiet-mode: 'yes' +# check-modified-files-only: 'yes' +# config-file: .github/workflows/doc-link-check.json diff --git a/.github/workflows/gradle.yml b/.github/workflows/gradle.yml index f485acc781ace..87ff147195de7 100644 --- a/.github/workflows/gradle.yml +++ b/.github/workflows/gradle.yml @@ -459,6 +459,17 @@ jobs: - name: Build Platform Docker Images run: SUB_BUILD=PLATFORM ./gradlew composeBuild --scan + - name: Run Kubernetes End-to-End Acceptance Tests + env: + USER: root + HOME: /home/runner + AWS_S3_INTEGRATION_TEST_CREDS: ${{ secrets.AWS_S3_INTEGRATION_TEST_CREDS }} + SECRET_STORE_GCP_CREDENTIALS: ${{ secrets.SECRET_STORE_GCP_CREDENTIALS }} + SECRET_STORE_GCP_PROJECT_ID: ${{ secrets.SECRET_STORE_GCP_PROJECT_ID }} + SECRET_STORE_FOR_CONFIGS: ${{ secrets.SECRET_STORE_FOR_CONFIGS }} + run: | + CI=true IS_MINIKUBE=true ./tools/bin/acceptance_test_kube.sh + - name: Run Logging Tests run: ./tools/bin/cloud_storage_logging_test.sh env: @@ -481,16 +492,6 @@ jobs: run: | CI=true ./tools/bin/gcp_acceptance_tests.sh - - name: Run Kubernetes End-to-End Acceptance Tests - env: - USER: root - HOME: /home/runner - AWS_S3_INTEGRATION_TEST_CREDS: ${{ secrets.AWS_S3_INTEGRATION_TEST_CREDS }} - SECRET_STORE_GCP_CREDENTIALS: ${{ secrets.SECRET_STORE_GCP_CREDENTIALS }} - SECRET_STORE_GCP_PROJECT_ID: ${{ secrets.SECRET_STORE_GCP_PROJECT_ID }} - SECRET_STORE_FOR_CONFIGS: ${{ secrets.SECRET_STORE_FOR_CONFIGS }} - run: | - CI=true IS_MINIKUBE=true ./tools/bin/acceptance_test_kube.sh # In case of self-hosted EC2 errors, remove this block. stop-kube-acceptance-test-runner: name: Stop Kube Acceptance Test EC2 Runner diff --git a/.github/workflows/publish-command.yml b/.github/workflows/publish-command.yml index 79f3f90f128bf..22684e6e1cef9 100644 --- a/.github/workflows/publish-command.yml +++ b/.github/workflows/publish-command.yml @@ -107,6 +107,7 @@ jobs: GOOGLE_ANALYTICS_V4_TEST_CREDS_OLD: ${{ secrets.GOOGLE_ANALYTICS_V4_TEST_CREDS_OLD }} GOOGLE_CLOUD_STORAGE_TEST_CREDS: ${{ secrets.GOOGLE_CLOUD_STORAGE_TEST_CREDS }} GOOGLE_DIRECTORY_TEST_CREDS: ${{ secrets.GOOGLE_DIRECTORY_TEST_CREDS }} + GOOGLE_DIRECTORY_TEST_CREDS_OAUTH: ${{ secrets.GOOGLE_DIRECTORY_TEST_CREDS_OAUTH }} GOOGLE_SEARCH_CONSOLE_CDK_TEST_CREDS: ${{ secrets.GOOGLE_SEARCH_CONSOLE_CDK_TEST_CREDS }} GOOGLE_SEARCH_CONSOLE_CDK_TEST_CREDS_SRV_ACC: ${{ secrets.GOOGLE_SEARCH_CONSOLE_CDK_TEST_CREDS_SRV_ACC }} GOOGLE_SHEETS_TESTS_CREDS: ${{ secrets.GOOGLE_SHEETS_TESTS_CREDS }} @@ -191,6 +192,7 @@ jobs: MONGODB_TEST_CREDS: ${{ secrets.MONGODB_TEST_CREDS }} SOURCE_ONESIGNAL_TEST_CREDS: ${{ secrets.SOURCE_ONESIGNAL_TEST_CREDS }} SOURCE_SALESLOFT_TEST_CREDS: ${{ secrets.SOURCE_SALESLOFT_TEST_CREDS }} + SOURCE_CONFLUENCE_TEST_CREDS: ${{ secrets.SOURCE_CONFLUENCE_TEST_CREDS }} SOURCE_AMAZON_SQS_TEST_CREDS: ${{ secrets.SOURCE_AMAZON_SQS_TEST_CREDS }} SOURCE_FRESHSERVICE_TEST_CREDS: ${{ secrets.SOURCE_FRESHSERVICE_TEST_CREDS }} SOURCE_LEMLIST_TEST_CREDS: ${{ secrets.SOURCE_LEMLIST_TEST_CREDS }} @@ -198,6 +200,9 @@ jobs: SOURCE_PAYSTACK_TEST_CREDS: ${{ secrets.SOURCE_PAYSTACK_TEST_CREDS }} SOURCE_DELIGHTED_TEST_CREDS: ${{ secrets.SOURCE_DELIGHTED_TEST_CREDS }} SOURCE_RETENTLY_TEST_CREDS: ${{ secrets.SOURCE_RETENTLY_TEST_CREDS }} + SOURCE_SENTRY_TEST_CREDS: ${{ secrets.SOURCE_SENTRY_TEST_CREDS }} + SOURCE_FRESHSALES_TEST_CREDS: ${{ secrets.SOURCE_FRESHSALES_TEST_CREDS }} + SOURCE_MONDAY_TEST_CREDS: ${{ secrets.SOURCE_MONDAY_TEST_CREDS }} - run: | echo "$SPEC_CACHE_SERVICE_ACCOUNT_KEY" > spec_cache_key_file.json && docker login -u airbytebot -p ${DOCKER_PASSWORD} ./tools/integrations/manage.sh publish airbyte-integrations/${{ github.event.inputs.connector }} ${{ github.event.inputs.run-tests }} --publish_spec_to_cache diff --git a/.github/workflows/test-command.yml b/.github/workflows/test-command.yml index 4e984b646c11c..a25bd99cce39b 100644 --- a/.github/workflows/test-command.yml +++ b/.github/workflows/test-command.yml @@ -102,6 +102,7 @@ jobs: GOOGLE_ANALYTICS_V4_TEST_CREDS_OLD: ${{ secrets.GOOGLE_ANALYTICS_V4_TEST_CREDS_OLD }} GOOGLE_CLOUD_STORAGE_TEST_CREDS: ${{ secrets.GOOGLE_CLOUD_STORAGE_TEST_CREDS }} GOOGLE_DIRECTORY_TEST_CREDS: ${{ secrets.GOOGLE_DIRECTORY_TEST_CREDS }} + GOOGLE_DIRECTORY_TEST_CREDS_OAUTH: ${{ secrets.GOOGLE_DIRECTORY_TEST_CREDS_OAUTH }} GOOGLE_SEARCH_CONSOLE_CDK_TEST_CREDS: ${{ secrets.GOOGLE_SEARCH_CONSOLE_CDK_TEST_CREDS }} GOOGLE_SEARCH_CONSOLE_CDK_TEST_CREDS_SRV_ACC: ${{ secrets.GOOGLE_SEARCH_CONSOLE_CDK_TEST_CREDS_SRV_ACC }} GOOGLE_SHEETS_TESTS_CREDS: ${{ secrets.GOOGLE_SHEETS_TESTS_CREDS }} @@ -186,6 +187,7 @@ jobs: MONGODB_TEST_CREDS: ${{ secrets.MONGODB_TEST_CREDS }} SOURCE_ONESIGNAL_TEST_CREDS: ${{ secrets.SOURCE_ONESIGNAL_TEST_CREDS }} SOURCE_SALESLOFT_TEST_CREDS: ${{ secrets.SOURCE_SALESLOFT_TEST_CREDS }} + SOURCE_CONFLUENCE_TEST_CREDS: ${{ secrets.SOURCE_CONFLUENCE_TEST_CREDS }} SOURCE_AMAZON_SQS_TEST_CREDS: ${{ secrets.SOURCE_AMAZON_SQS_TEST_CREDS }} SOURCE_FRESHSERVICE_TEST_CREDS: ${{ secrets.SOURCE_FRESHSERVICE_TEST_CREDS }} SOURCE_LEMLIST_TEST_CREDS: ${{ secrets.SOURCE_LEMLIST_TEST_CREDS }} @@ -193,6 +195,9 @@ jobs: SOURCE_PAYSTACK_TEST_CREDS: ${{ secrets.SOURCE_PAYSTACK_TEST_CREDS }} SOURCE_DELIGHTED_TEST_CREDS: ${{ secrets.SOURCE_DELIGHTED_TEST_CREDS }} SOURCE_RETENTLY_TEST_CREDS: ${{ secrets.SOURCE_RETENTLY_TEST_CREDS }} + SOURCE_SENTRY_TEST_CREDS: ${{ secrets.SOURCE_SENTRY_TEST_CREDS }} + SOURCE_FRESHSALES_TEST_CREDS: ${{ secrets.SOURCE_FRESHSALES_TEST_CREDS }} + SOURCE_MONDAY_TEST_CREDS: ${{ secrets.SOURCE_MONDAY_TEST_CREDS }} - run: | ./tools/bin/ci_integration_test.sh ${{ github.event.inputs.connector }} name: test ${{ github.event.inputs.connector }} diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index 4f81299a37cfc..0000000000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "java.configuration.updateBuildConfiguration": "automatic" -} diff --git a/README.md b/README.md index aa1b60a45f528..719944ec993e8 100644 --- a/README.md +++ b/README.md @@ -34,8 +34,6 @@ Now visit [http://localhost:8000](http://localhost:8000) Here is a [step-by-step guide](https://github.com/airbytehq/airbyte/tree/e378d40236b6a34e1c1cb481c8952735ec687d88/docs/quickstart/getting-started.md) showing you how to load data from an API into a file, all on your computer. -If you want to schedule a 20-min call with our team to help you get set up, please select [some time directly here](https://calendly.com/nataliekwong/airbyte-onboarding). - ## Features * **Built for extensibility**: Adapt an existing connector to your needs or build a new one with ease. diff --git a/airbyte-commons/src/main/java/io/airbyte/commons/io/LineGobbler.java b/airbyte-commons/src/main/java/io/airbyte/commons/io/LineGobbler.java index 6e87ce1f45172..0918ece248910 100644 --- a/airbyte-commons/src/main/java/io/airbyte/commons/io/LineGobbler.java +++ b/airbyte-commons/src/main/java/io/airbyte/commons/io/LineGobbler.java @@ -22,17 +22,17 @@ public class LineGobbler implements VoidCallable { private final static Logger LOGGER = LoggerFactory.getLogger(LineGobbler.class); public static void gobble(final InputStream is, final Consumer consumer) { - gobble(is, consumer, "generic", MdcScope.DEFAULT); + gobble(is, consumer, "generic", MdcScope.DEFAULT_BUILDER); } - public static void gobble(final InputStream is, final Consumer consumer, final MdcScope mdcScope) { - gobble(is, consumer, "generic", mdcScope); + public static void gobble(final InputStream is, final Consumer consumer, final MdcScope.Builder mdcScopeBuilder) { + gobble(is, consumer, "generic", mdcScopeBuilder); } - public static void gobble(final InputStream is, final Consumer consumer, final String caller, final MdcScope mdcScope) { + public static void gobble(final InputStream is, final Consumer consumer, final String caller, final MdcScope.Builder mdcScopeBuilder) { final ExecutorService executor = Executors.newSingleThreadExecutor(); final Map mdc = MDC.getCopyOfContextMap(); - final var gobbler = new LineGobbler(is, consumer, executor, mdc, caller, mdcScope); + final var gobbler = new LineGobbler(is, consumer, executor, mdc, caller, mdcScopeBuilder); executor.submit(gobbler); } @@ -41,21 +41,21 @@ public static void gobble(final InputStream is, final Consumer consumer, private final ExecutorService executor; private final Map mdc; private final String caller; - private final MdcScope containerLogMDC; + private final MdcScope.Builder containerLogMdcBuilder; LineGobbler(final InputStream is, final Consumer consumer, final ExecutorService executor, final Map mdc) { - this(is, consumer, executor, mdc, "generic", MdcScope.DEFAULT); + this(is, consumer, executor, mdc, "generic", MdcScope.DEFAULT_BUILDER); } LineGobbler(final InputStream is, final Consumer consumer, final ExecutorService executor, final Map mdc, - final MdcScope mdcScope) { - this(is, consumer, executor, mdc, "generic", mdcScope); + final MdcScope.Builder mdcScopeBuilder) { + this(is, consumer, executor, mdc, "generic", mdcScopeBuilder); } LineGobbler(final InputStream is, @@ -63,13 +63,13 @@ public static void gobble(final InputStream is, final Consumer consumer, final ExecutorService executor, final Map mdc, final String caller, - final MdcScope mdcScope) { + final MdcScope.Builder mdcScopeBuilder) { this.is = IOs.newBufferedReader(is); this.consumer = consumer; this.executor = executor; this.mdc = mdc; this.caller = caller; - this.containerLogMDC = mdcScope; + this.containerLogMdcBuilder = mdcScopeBuilder; } @Override @@ -78,7 +78,7 @@ public void voidCall() { try { String line; while ((line = is.readLine()) != null) { - try (containerLogMDC) { + try (final var mdcScope = containerLogMdcBuilder.build()) { consumer.accept(line); } } diff --git a/airbyte-commons/src/main/java/io/airbyte/commons/logging/MdcScope.java b/airbyte-commons/src/main/java/io/airbyte/commons/logging/MdcScope.java index 190c1e9ebb198..b4c2f4c46f0e5 100644 --- a/airbyte-commons/src/main/java/io/airbyte/commons/logging/MdcScope.java +++ b/airbyte-commons/src/main/java/io/airbyte/commons/logging/MdcScope.java @@ -17,7 +17,7 @@ *
  *   
  *     try(final ScopedMDCChange scopedMDCChange = new ScopedMDCChange(
- *      new HashMap() {{
+ *      new HashMap<String, String>() {{
  *        put("my", "value");
  *      }}
  *     )) {
@@ -28,7 +28,7 @@
  */
 public class MdcScope implements AutoCloseable {
 
-  public final static MdcScope DEFAULT = new Builder().build();
+  public final static MdcScope.Builder DEFAULT_BUILDER = new Builder();
 
   private final Map originalContextMap;
 
diff --git a/airbyte-commons/src/main/java/io/airbyte/commons/util/AutoCloseableIterators.java b/airbyte-commons/src/main/java/io/airbyte/commons/util/AutoCloseableIterators.java
index 6154cce7219f5..65a07187cd733 100644
--- a/airbyte-commons/src/main/java/io/airbyte/commons/util/AutoCloseableIterators.java
+++ b/airbyte-commons/src/main/java/io/airbyte/commons/util/AutoCloseableIterators.java
@@ -27,8 +27,8 @@ public static  AutoCloseableIterator fromIterator(final Iterator iterat
   }
 
   /**
-   * Coerces a vanilla {@link Iterator} into a {@link AutoCloseableIterator}. The provided
-   * {@param onClose} function will be called at most one time.
+   * Coerces a vanilla {@link Iterator} into a {@link AutoCloseableIterator}. The provided onClose
+   * function will be called at most one time.
    *
    * @param iterator autocloseable iterator to add another close to
    * @param onClose the function that will be called on close
diff --git a/airbyte-commons/src/main/resources/log4j2.xml b/airbyte-commons/src/main/resources/log4j2.xml
index c07bd80c4b319..f682d8017e876 100644
--- a/airbyte-commons/src/main/resources/log4j2.xml
+++ b/airbyte-commons/src/main/resources/log4j2.xml
@@ -7,7 +7,7 @@
         
         %replace{%X{log_source} - }{^ - }{}%d{yyyy-MM-dd HH:mm:ss}{GMT+0} %p (%X{job_root}) %C{1}(%M):%L - %replace{%m}{apikey=[\w\-]*}{apikey=*****}%n
         
-        %d{yyyy-MM-dd HH:mm:ss} %-5p %replace{%m}{apikey=[\w\-]*}{apikey=*****}%n
+        %replace{%X{log_source} - }{^ - }{}%d{yyyy-MM-dd HH:mm:ss}{GMT+0} %p %C{1}(%M):%L - %replace{%m}{apikey=[\w\-]*}{apikey=*****}%n
 
         
         $${env:LOG_LEVEL:-INFO}
diff --git a/airbyte-commons/src/test/java/io/airbyte/commons/util/AutoCloseableIteratorsTest.java b/airbyte-commons/src/test/java/io/airbyte/commons/util/AutoCloseableIteratorsTest.java
index 26f160c06a1f6..145e6565454e6 100644
--- a/airbyte-commons/src/test/java/io/airbyte/commons/util/AutoCloseableIteratorsTest.java
+++ b/airbyte-commons/src/test/java/io/airbyte/commons/util/AutoCloseableIteratorsTest.java
@@ -9,7 +9,6 @@
 import static org.junit.jupiter.api.Assertions.assertTrue;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.never;
-import static org.mockito.Mockito.spy;
 import static org.mockito.Mockito.times;
 import static org.mockito.Mockito.verify;
 
@@ -18,6 +17,7 @@
 import io.airbyte.commons.concurrency.VoidCallable;
 import java.util.Iterator;
 import java.util.List;
+import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.stream.Stream;
 import org.junit.jupiter.api.Test;
 
@@ -38,7 +38,10 @@ void testFromIterator() throws Exception {
 
   @Test
   void testFromStream() throws Exception {
-    final Stream stream = spy(Stream.of("a", "b", "c"));
+    final AtomicBoolean isClosed = new AtomicBoolean(false);
+    final Stream stream = Stream.of("a", "b", "c");
+    stream.onClose(() -> isClosed.set(true));
+
     final AutoCloseableIterator iterator = AutoCloseableIterators.fromStream(stream);
 
     assertNext(iterator, "a");
@@ -46,7 +49,7 @@ void testFromStream() throws Exception {
     assertNext(iterator, "c");
     iterator.close();
 
-    verify(stream).close();
+    assertTrue(isClosed.get());
   }
 
   private void assertNext(final Iterator iterator, final String value) {
diff --git a/airbyte-config/init/Dockerfile b/airbyte-config/init/Dockerfile
index afba972dedf19..89bd5491e53a6 100644
--- a/airbyte-config/init/Dockerfile
+++ b/airbyte-config/init/Dockerfile
@@ -1,4 +1,5 @@
-FROM alpine:3.4 AS seed
+ARG ALPINE_IMAGE=alpine:3.4
+FROM ${ALPINE_IMAGE} AS seed
 
 WORKDIR /app
 
diff --git a/airbyte-config/init/build.gradle b/airbyte-config/init/build.gradle
index 945051223ea9f..c7117fd16ea78 100644
--- a/airbyte-config/init/build.gradle
+++ b/airbyte-config/init/build.gradle
@@ -6,4 +6,8 @@ dependencies {
     implementation 'commons-cli:commons-cli:1.4'
 
     implementation project(':airbyte-config:models')
+    implementation project(':airbyte-config:persistence')
+    implementation project(':airbyte-protocol:models')
+    implementation project(':airbyte-commons-docker')
+    implementation project(':airbyte-json-validation')
 }
diff --git a/airbyte-config/init/src/main/java/io/airbyte/config/init/SeedType.java b/airbyte-config/init/src/main/java/io/airbyte/config/init/SeedType.java
index 47c4c419bcf10..3730369621090 100644
--- a/airbyte-config/init/src/main/java/io/airbyte/config/init/SeedType.java
+++ b/airbyte-config/init/src/main/java/io/airbyte/config/init/SeedType.java
@@ -7,7 +7,9 @@
 public enum SeedType {
 
   STANDARD_SOURCE_DEFINITION("/seed/source_definitions.yaml", "sourceDefinitionId"),
-  STANDARD_DESTINATION_DEFINITION("/seed/destination_definitions.yaml", "destinationDefinitionId");
+  STANDARD_DESTINATION_DEFINITION("/seed/destination_definitions.yaml", "destinationDefinitionId"),
+  SOURCE_SPEC("/seed/source_specs.yaml", "dockerImage"),
+  DESTINATION_SPEC("/seed/destination_specs.yaml", "dockerImage");
 
   final String resourcePath;
   // ID field name
diff --git a/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/YamlSeedConfigPersistence.java b/airbyte-config/init/src/main/java/io/airbyte/config/init/YamlSeedConfigPersistence.java
similarity index 63%
rename from airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/YamlSeedConfigPersistence.java
rename to airbyte-config/init/src/main/java/io/airbyte/config/init/YamlSeedConfigPersistence.java
index 3bca71d57c2db..d218cfca89d4a 100644
--- a/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/YamlSeedConfigPersistence.java
+++ b/airbyte-config/init/src/main/java/io/airbyte/config/init/YamlSeedConfigPersistence.java
@@ -2,23 +2,27 @@
  * Copyright (c) 2021 Airbyte, Inc., all rights reserved.
  */
 
-package io.airbyte.config.persistence;
+package io.airbyte.config.init;
 
 import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.node.ObjectNode;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.io.Resources;
+import io.airbyte.commons.docker.DockerUtils;
 import io.airbyte.commons.json.Jsons;
 import io.airbyte.commons.util.MoreIterators;
 import io.airbyte.commons.yaml.Yamls;
 import io.airbyte.config.AirbyteConfig;
 import io.airbyte.config.ConfigSchema;
-import io.airbyte.config.init.SeedType;
+import io.airbyte.config.persistence.ConfigNotFoundException;
+import io.airbyte.config.persistence.ConfigPersistence;
 import io.airbyte.validation.json.JsonValidationException;
 import java.io.IOException;
 import java.net.URL;
 import java.nio.charset.StandardCharsets;
 import java.util.List;
 import java.util.Map;
+import java.util.Map.Entry;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
@@ -45,11 +49,40 @@ public static YamlSeedConfigPersistence get(final Class seedDefinitionsResour
     return new YamlSeedConfigPersistence(seedDefinitionsResourceClass);
   }
 
-  private YamlSeedConfigPersistence(final Class seedDefinitionsResourceClass) throws IOException {
+  private YamlSeedConfigPersistence(final Class seedResourceClass) throws IOException {
+    final Map sourceDefinitionConfigs = getConfigs(seedResourceClass, SeedType.STANDARD_SOURCE_DEFINITION);
+    final Map sourceSpecConfigs = getConfigs(seedResourceClass, SeedType.SOURCE_SPEC);
+    final Map fullSourceDefinitionConfigs = sourceDefinitionConfigs.entrySet().stream()
+        .collect(Collectors.toMap(Entry::getKey, e -> mergeSpecIntoDefinition(e.getValue(), sourceSpecConfigs)));
+
+    final Map destinationDefinitionConfigs = getConfigs(seedResourceClass, SeedType.STANDARD_DESTINATION_DEFINITION);
+    final Map destinationSpecConfigs = getConfigs(seedResourceClass, SeedType.DESTINATION_SPEC);
+    final Map fullDestinationDefinitionConfigs = destinationDefinitionConfigs.entrySet().stream()
+        .collect(Collectors.toMap(Entry::getKey, e -> mergeSpecIntoDefinition(e.getValue(), destinationSpecConfigs)));
+
     this.allSeedConfigs = ImmutableMap.>builder()
-        .put(SeedType.STANDARD_SOURCE_DEFINITION, getConfigs(seedDefinitionsResourceClass, SeedType.STANDARD_SOURCE_DEFINITION))
-        .put(SeedType.STANDARD_DESTINATION_DEFINITION, getConfigs(seedDefinitionsResourceClass, SeedType.STANDARD_DESTINATION_DEFINITION))
-        .build();
+        .put(SeedType.STANDARD_SOURCE_DEFINITION, fullSourceDefinitionConfigs)
+        .put(SeedType.STANDARD_DESTINATION_DEFINITION, fullDestinationDefinitionConfigs).build();
+  }
+
+  /**
+   * Merges the corresponding spec JSON into the definition JSON. This is necessary because specs are
+   * stored in a separate resource file from definitions.
+   *
+   * @param definitionJson JSON of connector definition that is missing a spec
+   * @param specConfigs map of docker image to JSON of docker image/connector spec pair
+   * @return JSON of connector definition including the connector spec
+   */
+  private JsonNode mergeSpecIntoDefinition(final JsonNode definitionJson, final Map specConfigs) {
+    final String dockerImage = DockerUtils.getTaggedImageName(
+        definitionJson.get("dockerRepository").asText(),
+        definitionJson.get("dockerImageTag").asText());
+    final JsonNode specConfigJson = specConfigs.get(dockerImage);
+    if (specConfigJson == null || specConfigJson.get("spec") == null) {
+      throw new UnsupportedOperationException(String.format("There is no seed spec for docker image %s", dockerImage));
+    }
+    ((ObjectNode) definitionJson).set("spec", specConfigJson.get("spec"));
+    return definitionJson;
   }
 
   @SuppressWarnings("UnstableApiUsage")
diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/079d5540-f236-4294-ba7c-ade8fd918496.json b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/079d5540-f236-4294-ba7c-ade8fd918496.json
index ea1fd8ef6ef9b..2a948beb7b301 100644
--- a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/079d5540-f236-4294-ba7c-ade8fd918496.json
+++ b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/079d5540-f236-4294-ba7c-ade8fd918496.json
@@ -2,6 +2,6 @@
   "destinationDefinitionId": "079d5540-f236-4294-ba7c-ade8fd918496",
   "name": "BigQuery (denormalized typed struct)",
   "dockerRepository": "airbyte/destination-bigquery-denormalized",
-  "dockerImageTag": "0.1.6",
+  "dockerImageTag": "0.1.8",
   "documentationUrl": "https://docs.airbyte.io/integrations/destinations/bigquery"
 }
diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/2340cbba-358e-11ec-8d3d-0242ac130203.json b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/2340cbba-358e-11ec-8d3d-0242ac130203.json
new file mode 100644
index 0000000000000..d82499161c7b8
--- /dev/null
+++ b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/2340cbba-358e-11ec-8d3d-0242ac130203.json
@@ -0,0 +1,7 @@
+{
+  "destinationDefinitionId": "2340cbba-358e-11ec-8d3d-0242ac130203",
+  "name": "Pulsar",
+  "dockerRepository": "airbyte/destination-pulsar",
+  "dockerImageTag": "0.1.0",
+  "documentationUrl": "https://docs.airbyte.io/integrations/destinations/pulsar"
+}
diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/4816b78f-1489-44c1-9060-4b19d5fa9362.json b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/4816b78f-1489-44c1-9060-4b19d5fa9362.json
index 42a5b5150bf8d..9feab0c077547 100644
--- a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/4816b78f-1489-44c1-9060-4b19d5fa9362.json
+++ b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/4816b78f-1489-44c1-9060-4b19d5fa9362.json
@@ -2,6 +2,6 @@
   "destinationDefinitionId": "4816b78f-1489-44c1-9060-4b19d5fa9362",
   "name": "S3",
   "dockerRepository": "airbyte/destination-s3",
-  "dockerImageTag": "0.1.12",
+  "dockerImageTag": "0.1.13",
   "documentationUrl": "https://docs.airbyte.io/integrations/destinations/s3"
 }
diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/707456df-6f4f-4ced-b5c6-03f73bcad1c5.json b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/707456df-6f4f-4ced-b5c6-03f73bcad1c5.json
new file mode 100644
index 0000000000000..52e0f38dddfb7
--- /dev/null
+++ b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/707456df-6f4f-4ced-b5c6-03f73bcad1c5.json
@@ -0,0 +1,7 @@
+{
+  "destinationDefinitionId": "707456df-6f4f-4ced-b5c6-03f73bcad1c5",
+  "name": "Cassandra",
+  "dockerRepository": "airbyte/destination-cassandra",
+  "dockerImageTag": "0.1.0",
+  "documentationUrl": "https://docs.airbyte.io/integrations/destinations/cassandra"
+}
diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/ca8f6566-e555-4b40-943a-545bf123117a.json b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/ca8f6566-e555-4b40-943a-545bf123117a.json
index 89078e4019d88..b7e6e4fff2655 100644
--- a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/ca8f6566-e555-4b40-943a-545bf123117a.json
+++ b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/ca8f6566-e555-4b40-943a-545bf123117a.json
@@ -2,6 +2,6 @@
   "destinationDefinitionId": "ca8f6566-e555-4b40-943a-545bf123117a",
   "name": "Google Cloud Storage (GCS)",
   "dockerRepository": "airbyte/destination-gcs",
-  "dockerImageTag": "0.1.2",
+  "dockerImageTag": "0.1.3",
   "documentationUrl": "https://docs.airbyte.io/integrations/destinations/gcs"
 }
diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/010eb12f-837b-4685-892d-0a39f76a98f5.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/010eb12f-837b-4685-892d-0a39f76a98f5.json
index 701207e5790ff..58ceb7512f0c9 100644
--- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/010eb12f-837b-4685-892d-0a39f76a98f5.json
+++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/010eb12f-837b-4685-892d-0a39f76a98f5.json
@@ -2,7 +2,7 @@
   "sourceDefinitionId": "010eb12f-837b-4685-892d-0a39f76a98f5",
   "name": "Facebook Pages",
   "dockerRepository": "airbyte/source-facebook-pages",
-  "dockerImageTag": "0.1.2",
+  "dockerImageTag": "0.1.3",
   "documentationUrl": "https://hub.docker.com/r/airbyte/source-facebook-pages",
   "icon": "facebook.svg"
 }
diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/12928b32-bf0a-4f1e-964f-07e12e37153a.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/12928b32-bf0a-4f1e-964f-07e12e37153a.json
index cd73c479261a5..69ac7cf32c70a 100644
--- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/12928b32-bf0a-4f1e-964f-07e12e37153a.json
+++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/12928b32-bf0a-4f1e-964f-07e12e37153a.json
@@ -2,7 +2,7 @@
   "sourceDefinitionId": "12928b32-bf0a-4f1e-964f-07e12e37153a",
   "name": "Mixpanel",
   "dockerRepository": "airbyte/source-mixpanel",
-  "dockerImageTag": "0.1.1",
+  "dockerImageTag": "0.1.3",
   "documentationUrl": "https://docs.airbyte.io/integrations/sources/mixpanel",
   "icon": "mixpanel.svg"
 }
diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/1d4fdb25-64fc-4569-92da-fcdca79a8372.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/1d4fdb25-64fc-4569-92da-fcdca79a8372.json
index 46fde36888a3b..34f0a63393b41 100644
--- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/1d4fdb25-64fc-4569-92da-fcdca79a8372.json
+++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/1d4fdb25-64fc-4569-92da-fcdca79a8372.json
@@ -2,6 +2,6 @@
   "sourceDefinitionId": "1d4fdb25-64fc-4569-92da-fcdca79a8372",
   "name": "Okta",
   "dockerRepository": "airbyte/source-okta",
-  "dockerImageTag": "0.1.2",
+  "dockerImageTag": "0.1.4",
   "documentationUrl": "https://docs.airbyte.io/integrations/sources/okta"
 }
diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/2e875208-0c0b-4ee4-9e92-1cb3156ea799.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/2e875208-0c0b-4ee4-9e92-1cb3156ea799.json
index 00fe3f7d0fba4..6cb4f103e6d2f 100644
--- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/2e875208-0c0b-4ee4-9e92-1cb3156ea799.json
+++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/2e875208-0c0b-4ee4-9e92-1cb3156ea799.json
@@ -2,6 +2,6 @@
   "sourceDefinitionId": "2e875208-0c0b-4ee4-9e92-1cb3156ea799",
   "name": "Iterable",
   "dockerRepository": "airbyte/source-iterable",
-  "dockerImageTag": "0.1.9",
+  "dockerImageTag": "0.1.10",
   "documentationUrl": "https://docs.airbyte.io/integrations/sources/iterable"
 }
diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/36c891d9-4bd9-43ac-bad2-10e12756272c.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/36c891d9-4bd9-43ac-bad2-10e12756272c.json
index 67f13cd7fff56..3af6a81ec4d29 100644
--- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/36c891d9-4bd9-43ac-bad2-10e12756272c.json
+++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/36c891d9-4bd9-43ac-bad2-10e12756272c.json
@@ -2,7 +2,7 @@
   "sourceDefinitionId": "36c891d9-4bd9-43ac-bad2-10e12756272c",
   "name": "Hubspot",
   "dockerRepository": "airbyte/source-hubspot",
-  "dockerImageTag": "0.1.21",
+  "dockerImageTag": "0.1.23",
   "documentationUrl": "https://docs.airbyte.io/integrations/sources/hubspot",
   "icon": "hubspot.svg"
 }
diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/45d2e135-2ede-49e1-939f-3e3ec357a65e.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/45d2e135-2ede-49e1-939f-3e3ec357a65e.json
index 92988179fb2e4..eb89c73307bf8 100644
--- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/45d2e135-2ede-49e1-939f-3e3ec357a65e.json
+++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/45d2e135-2ede-49e1-939f-3e3ec357a65e.json
@@ -2,6 +2,6 @@
   "sourceDefinitionId": "45d2e135-2ede-49e1-939f-3e3ec357a65e",
   "name": "Recharge",
   "dockerRepository": "airbyte/source-recharge",
-  "dockerImageTag": "0.1.3",
+  "dockerImageTag": "0.1.4",
   "documentationUrl": "https://docs.airbyte.io/integrations/sources/recharge"
 }
diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/59f1e50a-331f-4f09-b3e8-2e8d4d355f44.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/59f1e50a-331f-4f09-b3e8-2e8d4d355f44.json
index d5d140972c959..7999f8cfe9eed 100644
--- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/59f1e50a-331f-4f09-b3e8-2e8d4d355f44.json
+++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/59f1e50a-331f-4f09-b3e8-2e8d4d355f44.json
@@ -2,7 +2,7 @@
   "sourceDefinitionId": "59f1e50a-331f-4f09-b3e8-2e8d4d355f44",
   "name": "Greenhouse",
   "dockerRepository": "airbyte/source-greenhouse",
-  "dockerImageTag": "0.2.5",
+  "dockerImageTag": "0.2.6",
   "documentationUrl": "https://docs.airbyte.io/integrations/sources/greenhouse",
   "icon": "greenhouse.svg"
 }
diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/79c1aa37-dae3-42ae-b333-d1c105477715.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/79c1aa37-dae3-42ae-b333-d1c105477715.json
index 6a3aecdaa2153..04bc61ae14ef1 100644
--- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/79c1aa37-dae3-42ae-b333-d1c105477715.json
+++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/79c1aa37-dae3-42ae-b333-d1c105477715.json
@@ -2,7 +2,7 @@
   "sourceDefinitionId": "79c1aa37-dae3-42ae-b333-d1c105477715",
   "name": "Zendesk Support",
   "dockerRepository": "airbyte/source-zendesk-support",
-  "dockerImageTag": "0.1.3",
+  "dockerImageTag": "0.1.4",
   "documentationUrl": "https://docs.airbyte.io/integrations/sources/zendesk-support",
   "icon": "zendesk.svg"
 }
diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/80a54ea2-9959-4040-aac1-eee42423ec9b.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/80a54ea2-9959-4040-aac1-eee42423ec9b.json
new file mode 100644
index 0000000000000..6f40313e0fb65
--- /dev/null
+++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/80a54ea2-9959-4040-aac1-eee42423ec9b.json
@@ -0,0 +1,7 @@
+{
+  "sourceDefinitionId": "80a54ea2-9959-4040-aac1-eee42423ec9b",
+  "name": "Monday",
+  "dockerRepository": "airbyte/source-zendesk-monday",
+  "dockerImageTag": "0.1.0",
+  "documentationUrl": "https://docs.airbyte.io/integrations/sources/monday"
+}
diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/9da77001-af33-4bcd-be46-6252bf9342b9.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/9da77001-af33-4bcd-be46-6252bf9342b9.json
index 90ce4d4c9d5e9..aeddeffbaf6e4 100644
--- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/9da77001-af33-4bcd-be46-6252bf9342b9.json
+++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/9da77001-af33-4bcd-be46-6252bf9342b9.json
@@ -2,6 +2,6 @@
   "sourceDefinitionId": "9da77001-af33-4bcd-be46-6252bf9342b9",
   "name": "Shopify",
   "dockerRepository": "airbyte/source-shopify",
-  "dockerImageTag": "0.1.21",
+  "dockerImageTag": "0.1.22",
   "documentationUrl": "https://docs.airbyte.io/integrations/sources/shopify"
 }
diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/b117307c-14b6-41aa-9422-947e34922962.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/b117307c-14b6-41aa-9422-947e34922962.json
index 9cf14666e7b14..b0f7d72a5c777 100644
--- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/b117307c-14b6-41aa-9422-947e34922962.json
+++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/b117307c-14b6-41aa-9422-947e34922962.json
@@ -2,7 +2,7 @@
   "sourceDefinitionId": "b117307c-14b6-41aa-9422-947e34922962",
   "name": "Salesforce",
   "dockerRepository": "airbyte/source-salesforce",
-  "dockerImageTag": "0.1.2",
+  "dockerImageTag": "0.1.3",
   "documentationUrl": "https://docs.airbyte.io/integrations/sources/salesforce",
   "icon": "salesforce.svg"
 }
diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/b5ea17b1-f170-46dc-bc31-cc744ca984c1.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/b5ea17b1-f170-46dc-bc31-cc744ca984c1.json
index e65a9e63751d9..ff762f8c9f705 100644
--- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/b5ea17b1-f170-46dc-bc31-cc744ca984c1.json
+++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/b5ea17b1-f170-46dc-bc31-cc744ca984c1.json
@@ -2,7 +2,7 @@
   "sourceDefinitionId": "b5ea17b1-f170-46dc-bc31-cc744ca984c1",
   "name": "Microsoft SQL Server (MSSQL)",
   "dockerRepository": "airbyte/source-mssql",
-  "dockerImageTag": "0.3.6",
+  "dockerImageTag": "0.3.8",
   "documentationUrl": "https://docs.airbyte.io/integrations/sources/mssql",
   "icon": "mssql.svg"
 }
diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/cdaf146a-9b75-49fd-9dd2-9d64a0bb4781.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/cdaf146a-9b75-49fd-9dd2-9d64a0bb4781.json
new file mode 100644
index 0000000000000..f88e4eedda821
--- /dev/null
+++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/cdaf146a-9b75-49fd-9dd2-9d64a0bb4781.json
@@ -0,0 +1,7 @@
+{
+  "sourceDefinitionId": "cdaf146a-9b75-49fd-9dd2-9d64a0bb4781",
+  "name": "Sentry",
+  "dockerRepository": "airbyte/source-sentry",
+  "dockerImageTag": "0.1.0",
+  "documentationUrl": "https://docs.airbyte.io/integrations/sources/sentry"
+}
diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/d19ae824-e289-4b14-995a-0632eb46d246.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/d19ae824-e289-4b14-995a-0632eb46d246.json
index 7e1649ea63744..5abea4626c530 100644
--- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/d19ae824-e289-4b14-995a-0632eb46d246.json
+++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/d19ae824-e289-4b14-995a-0632eb46d246.json
@@ -2,6 +2,6 @@
   "sourceDefinitionId": "d19ae824-e289-4b14-995a-0632eb46d246",
   "name": "Google Directory",
   "dockerRepository": "airbyte/source-google-directory",
-  "dockerImageTag": "0.1.5",
+  "dockerImageTag": "0.1.8",
   "documentationUrl": "https://docs.airbyte.io/integrations/sources/google-directory"
 }
diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/decd338e-5647-4c0b-adf4-da0e75f5a750.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/decd338e-5647-4c0b-adf4-da0e75f5a750.json
index 1a952019a996f..2fd2cdf5d5d98 100644
--- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/decd338e-5647-4c0b-adf4-da0e75f5a750.json
+++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/decd338e-5647-4c0b-adf4-da0e75f5a750.json
@@ -2,7 +2,7 @@
   "sourceDefinitionId": "decd338e-5647-4c0b-adf4-da0e75f5a750",
   "name": "Postgres",
   "dockerRepository": "airbyte/source-postgres",
-  "dockerImageTag": "0.3.9",
+  "dockerImageTag": "0.3.13",
   "documentationUrl": "https://docs.airbyte.io/integrations/sources/postgres",
   "icon": "postgresql.svg"
 }
diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/e094cb9a-26de-4645-8761-65c0c425d1de.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/e094cb9a-26de-4645-8761-65c0c425d1de.json
index 712f4c4558c82..327a7d21a5e48 100644
--- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/e094cb9a-26de-4645-8761-65c0c425d1de.json
+++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/e094cb9a-26de-4645-8761-65c0c425d1de.json
@@ -2,7 +2,7 @@
   "sourceDefinitionId": "e094cb9a-26de-4645-8761-65c0c425d1de",
   "name": "Stripe",
   "dockerRepository": "airbyte/source-stripe",
-  "dockerImageTag": "0.1.21",
+  "dockerImageTag": "0.1.22",
   "documentationUrl": "https://docs.airbyte.io/integrations/sources/stripe",
   "icon": "stripe.svg"
 }
diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/e7778cfc-e97c-4458-9ecb-b4f2bba8946c.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/e7778cfc-e97c-4458-9ecb-b4f2bba8946c.json
index da5565d41b5f2..798cb04b32b5a 100644
--- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/e7778cfc-e97c-4458-9ecb-b4f2bba8946c.json
+++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/e7778cfc-e97c-4458-9ecb-b4f2bba8946c.json
@@ -2,7 +2,7 @@
   "sourceDefinitionId": "e7778cfc-e97c-4458-9ecb-b4f2bba8946c",
   "name": "Facebook Marketing",
   "dockerRepository": "airbyte/source-facebook-marketing",
-  "dockerImageTag": "0.2.21",
+  "dockerImageTag": "0.2.22",
   "documentationUrl": "https://docs.airbyte.io/integrations/sources/facebook-marketing",
   "icon": "facebook.svg"
 }
diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/eca08d79-7b92-4065-b7f3-79c14836ebe7.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/eca08d79-7b92-4065-b7f3-79c14836ebe7.json
new file mode 100644
index 0000000000000..d74256e9d2816
--- /dev/null
+++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/eca08d79-7b92-4065-b7f3-79c14836ebe7.json
@@ -0,0 +1,7 @@
+{
+  "sourceDefinitionId": "eca08d79-7b92-4065-b7f3-79c14836ebe7",
+  "name": "Freshsales",
+  "dockerRepository": "airbyte/source-freshsales",
+  "dockerImageTag": "0.1.0",
+  "documentationUrl": "https://docs.airbyte.io/integrations/sources/freshsales"
+}
diff --git a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml
index 1d899ea73b42f..8e9644a95b98b 100644
--- a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml
+++ b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml
@@ -11,8 +11,13 @@
 - name: BigQuery (denormalized typed struct)
   destinationDefinitionId: 079d5540-f236-4294-ba7c-ade8fd918496
   dockerRepository: airbyte/destination-bigquery-denormalized
-  dockerImageTag: 0.1.7
+  dockerImageTag: 0.1.8
   documentationUrl: https://docs.airbyte.io/integrations/destinations/bigquery
+- name: Cassandra
+  destinationDefinitionId: 707456df-6f4f-4ced-b5c6-03f73bcad1c5
+  dockerRepository: airbyte/destination-cassandra
+  dockerImageTag: 0.1.0
+  documentationUrl: https://docs.airbyte.io/integrations/destinations/cassandra
 - name: Chargify (Keen)
   destinationDefinitionId: 81740ce8-d764-4ea7-94df-16bb41de36ae
   dockerRepository: airbyte/destination-keen
@@ -31,7 +36,7 @@
 - name: Google Cloud Storage (GCS)
   destinationDefinitionId: ca8f6566-e555-4b40-943a-545bf123117a
   dockerRepository: airbyte/destination-gcs
-  dockerImageTag: 0.1.2
+  dockerImageTag: 0.1.3
   documentationUrl: https://docs.airbyte.io/integrations/destinations/gcs
 - name: Google PubSub
   destinationDefinitionId: 356668e2-7e34-47f3-a3b0-67a8a481b692
@@ -84,6 +89,11 @@
   dockerImageTag: 0.3.11
   documentationUrl: https://docs.airbyte.io/integrations/destinations/postgres
   icon: postgresql.svg
+- name: Pulsar
+  destinationDefinitionId: 2340cbba-358e-11ec-8d3d-0242ac130203
+  dockerRepository: airbyte/destination-pulsar
+  dockerImageTag: 0.1.0
+  documentationUrl: https://docs.airbyte.io/integrations/destinations/pulsar
 - name: Redshift
   destinationDefinitionId: f7a7d195-377f-cf5b-70a5-be6b819019dc
   dockerRepository: airbyte/destination-redshift
@@ -93,7 +103,7 @@
 - name: S3
   destinationDefinitionId: 4816b78f-1489-44c1-9060-4b19d5fa9362
   dockerRepository: airbyte/destination-s3
-  dockerImageTag: 0.1.12
+  dockerImageTag: 0.1.13
   documentationUrl: https://docs.airbyte.io/integrations/destinations/s3
 - name: Snowflake
   destinationDefinitionId: 424892c4-daac-4491-b35d-c6688ba547ba
diff --git a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml
new file mode 100644
index 0000000000000..6142ea259b445
--- /dev/null
+++ b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml
@@ -0,0 +1,3051 @@
+# This file is generated by io.airbyte.config.specs.SeedConnectorSpecGenerator.
+# Do NOT edit this file directly. See generator class for more details.
+---
+- dockerImage: "airbyte/destination-azure-blob-storage:0.1.0"
+  spec:
+    documentationUrl: "https://docs.airbyte.io/integrations/destinations/azureblobstorage"
+    connectionSpecification:
+      $schema: "http://json-schema.org/draft-07/schema#"
+      title: "AzureBlobStorage Destination Spec"
+      type: "object"
+      required:
+      - "azure_blob_storage_account_name"
+      - "azure_blob_storage_account_key"
+      - "format"
+      additionalProperties: false
+      properties:
+        azure_blob_storage_endpoint_domain_name:
+          title: "Endpoint Domain Name"
+          type: "string"
+          default: "blob.core.windows.net"
+          description: "This is Azure Blob Storage endpoint domain name. Leave default\
+            \ value (or leave it empty if run container from command line) to use\
+            \ Microsoft native from example."
+          examples:
+          - "blob.core.windows.net"
+        azure_blob_storage_container_name:
+          title: "Azure blob storage container (Bucket) Name"
+          type: "string"
+          description: "The name of the Azure blob storage container. If not exists\
+            \ - will be created automatically. May be empty, then will be created\
+            \ automatically airbytecontainer+timestamp"
+          examples:
+          - "airbytetescontainername"
+        azure_blob_storage_account_name:
+          title: "Azure Blob Storage account name"
+          type: "string"
+          description: "The account's name of the Azure Blob Storage."
+          examples:
+          - "airbyte5storage"
+        azure_blob_storage_account_key:
+          description: "The Azure blob storage account key."
+          airbyte_secret: true
+          type: "string"
+          examples:
+          - "Z8ZkZpteggFx394vm+PJHnGTvdRncaYS+JhLKdj789YNmD+iyGTnG+PV+POiuYNhBg/ACS+LKjd%4FG3FHGN12Nd=="
+        format:
+          title: "Output Format"
+          type: "object"
+          description: "Output data format"
+          oneOf:
+          - title: "CSV: Comma-Separated Values"
+            required:
+            - "format_type"
+            - "flattening"
+            properties:
+              format_type:
+                type: "string"
+                const: "CSV"
+              flattening:
+                type: "string"
+                title: "Normalization (Flattening)"
+                description: "Whether the input json data should be normalized (flattened)\
+                  \ in the output CSV. Please refer to docs for details."
+                default: "No flattening"
+                enum:
+                - "No flattening"
+                - "Root level flattening"
+          - title: "JSON Lines: newline-delimited JSON"
+            required:
+            - "format_type"
+            properties:
+              format_type:
+                type: "string"
+                const: "JSONL"
+    supportsIncremental: true
+    supportsNormalization: false
+    supportsDBT: false
+    supported_destination_sync_modes:
+    - "overwrite"
+    - "append"
+- dockerImage: "airbyte/destination-bigquery:0.5.0"
+  spec:
+    documentationUrl: "https://docs.airbyte.io/integrations/destinations/bigquery"
+    connectionSpecification:
+      $schema: "http://json-schema.org/draft-07/schema#"
+      title: "BigQuery Destination Spec"
+      type: "object"
+      required:
+      - "project_id"
+      - "dataset_id"
+      additionalProperties: true
+      properties:
+        big_query_client_buffer_size_mb:
+          title: "Google BigQuery client chunk size"
+          description: "Google BigQuery client's chunk(buffer) size (MIN=1, MAX =\
+            \ 15) for each table. The default 15MiB value is used if not set explicitly.\
+            \ It's recommended to decrease value for big data sets migration for less\
+            \ HEAP memory consumption and avoiding crashes. For more details refer\
+            \ to https://googleapis.dev/python/bigquery/latest/generated/google.cloud.bigquery.client.Client.html"
+          type: "integer"
+          minimum: 1
+          maximum: 15
+          default: 15
+          examples:
+          - "15"
+        project_id:
+          type: "string"
+          description: "The GCP project ID for the project containing the target BigQuery\
+            \ dataset."
+          title: "Project ID"
+        dataset_id:
+          type: "string"
+          description: "Default BigQuery Dataset ID tables are replicated to if the\
+            \ source does not specify a namespace."
+          title: "Default Dataset ID"
+        dataset_location:
+          type: "string"
+          description: "The location of the dataset. Warning: Changes made after creation\
+            \ will not be applied."
+          title: "Dataset Location"
+          default: "US"
+          enum:
+          - "US"
+          - "EU"
+          - "asia-east1"
+          - "asia-east2"
+          - "asia-northeast1"
+          - "asia-northeast2"
+          - "asia-northeast3"
+          - "asia-south1"
+          - "asia-southeast1"
+          - "asia-southeast2"
+          - "australia-southeast1"
+          - "europe-central1"
+          - "europe-central2"
+          - "europe-north1"
+          - "europe-west1"
+          - "europe-west2"
+          - "europe-west3"
+          - "europe-west4"
+          - "europe-west5"
+          - "europe-west6"
+          - "northamerica-northeast1"
+          - "southamerica-east1"
+          - "us-central1"
+          - "us-east1"
+          - "us-east4"
+          - "us-west-1"
+          - "us-west-2"
+          - "us-west-3"
+          - "us-west-4"
+        credentials_json:
+          type: "string"
+          description: "The contents of the JSON service account key. Check out the\
+            \ docs if you need help generating this key. Default credentials will\
+            \ be used if this field is left empty."
+          title: "Credentials JSON"
+          airbyte_secret: true
+        transformation_priority:
+          type: "string"
+          description: "When running custom transformations or Basic normalization,\
+            \ running queries on interactive mode can hit BQ limits, choosing batch\
+            \ will solve those limitss."
+          title: "Transformation Query Run Type"
+          default: "interactive"
+          enum:
+          - "interactive"
+          - "batch"
+        loading_method:
+          type: "object"
+          title: "Loading Method"
+          description: "Loading method used to send select the way data will be uploaded\
+            \ to BigQuery."
+          oneOf:
+          - title: "Standard Inserts"
+            additionalProperties: false
+            description: "Direct uploading using streams."
+            required:
+            - "method"
+            properties:
+              method:
+                type: "string"
+                const: "Standard"
+          - title: "GCS Staging"
+            additionalProperties: false
+            description: "Writes large batches of records to a file, uploads the file\
+              \ to GCS, then uses 
COPY INTO table
to upload the file. Recommended\ + \ for large production workloads for better speed and scalability." + required: + - "method" + - "gcs_bucket_name" + - "gcs_bucket_path" + - "credential" + properties: + method: + type: "string" + const: "GCS Staging" + gcs_bucket_name: + title: "GCS Bucket Name" + type: "string" + description: "The name of the GCS bucket." + examples: + - "airbyte_sync" + gcs_bucket_path: + description: "Directory under the GCS bucket where data will be written." + type: "string" + examples: + - "data_sync/test" + keep_files_in_gcs-bucket: + type: "string" + description: "This upload method is supposed to temporary store records\ + \ in GCS bucket. What do you want to do with data in GCS bucket\ + \ when migration has finished?" + title: "GCS tmp files afterward processing" + default: "Delete all tmp files from GCS" + enum: + - "Delete all tmp files from GCS" + - "Keep all tmp files in GCS" + credential: + title: "Credential" + type: "object" + oneOf: + - title: "HMAC key" + required: + - "credential_type" + - "hmac_key_access_id" + - "hmac_key_secret" + properties: + credential_type: + type: "string" + const: "HMAC_KEY" + hmac_key_access_id: + type: "string" + description: "HMAC key access ID. When linked to a service account,\ + \ this ID is 61 characters long; when linked to a user account,\ + \ it is 24 characters long." + title: "HMAC Key Access ID" + airbyte_secret: true + examples: + - "1234567890abcdefghij1234" + hmac_key_secret: + type: "string" + description: "The corresponding secret for the access ID. It\ + \ is a 40-character base-64 encoded string." + title: "HMAC Key Secret" + airbyte_secret: true + examples: + - "1234567890abcdefghij1234567890ABCDEFGHIJ" + supportsIncremental: true + supportsNormalization: true + supportsDBT: true + supported_destination_sync_modes: + - "overwrite" + - "append" + - "append_dedup" +- dockerImage: "airbyte/destination-bigquery-denormalized:0.1.8" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/bigquery" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "BigQuery Denormalized Typed Struct Destination Spec" + type: "object" + required: + - "project_id" + - "dataset_id" + additionalProperties: true + properties: + project_id: + type: "string" + description: "The GCP project ID for the project containing the target BigQuery\ + \ dataset." + title: "Project ID" + dataset_id: + type: "string" + description: "Default BigQuery Dataset ID tables are replicated to if the\ + \ source does not specify a namespace." + title: "Default Dataset ID" + dataset_location: + type: "string" + description: "The location of the dataset. Warning: Changes made after creation\ + \ will not be applied." + title: "Dataset Location" + default: "US" + enum: + - "US" + - "EU" + - "asia-east1" + - "asia-east2" + - "asia-northeast1" + - "asia-northeast2" + - "asia-northeast3" + - "asia-south1" + - "asia-southeast1" + - "asia-southeast2" + - "australia-southeast1" + - "europe-central1" + - "europe-central2" + - "europe-north1" + - "europe-west1" + - "europe-west2" + - "europe-west3" + - "europe-west4" + - "europe-west5" + - "europe-west6" + - "northamerica-northeast1" + - "southamerica-east1" + - "us-central1" + - "us-east1" + - "us-east4" + - "us-west-1" + - "us-west-2" + - "us-west-3" + - "us-west-4" + credentials_json: + type: "string" + description: "The contents of the JSON service account key. Check out the\ + \ docs if you need help generating this key. Default credentials will\ + \ be used if this field is left empty." + title: "Credentials JSON" + airbyte_secret: true + supportsIncremental: true + supportsNormalization: false + supportsDBT: true + supported_destination_sync_modes: + - "overwrite" + - "append" +- dockerImage: "airbyte/destination-cassandra:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/cassandra" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Cassandra Destination Spec" + type: "object" + required: + - "keyspace" + - "username" + - "password" + - "address" + - "port" + additionalProperties: true + properties: + keyspace: + title: "Keyspace" + description: "Default Cassandra keyspace to create data in." + type: "string" + order: 0 + username: + title: "Username" + description: "Username to use to access Cassandra." + type: "string" + order: 1 + password: + title: "Password" + description: "Password associated with Cassandra." + type: "string" + airbyte_secret: true + order: 2 + address: + title: "Address" + description: "Address to connect to." + type: "string" + examples: + - "localhost,127.0.0.1" + order: 3 + port: + title: "Port" + description: "Port of Cassandra." + type: "integer" + minimum: 0 + maximum: 65536 + default: 9042 + order: 4 + datacenter: + title: "Datacenter" + description: "Datacenter of the cassandra cluster." + type: "string" + default: "datacenter1" + order: 5 + replication: + title: "Replication factor" + type: "integer" + description: "Indicates to how many nodes the data should be replicated\ + \ to." + default: 1 + order: 6 + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "overwrite" + - "append" +- dockerImage: "airbyte/destination-keen:0.2.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/keen" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Keen Spec" + type: "object" + required: + - "project_id" + - "api_key" + additionalProperties: false + properties: + project_id: + description: "Keen Project ID" + type: "string" + examples: + - "58b4acc22ba938934e888322e" + api_key: + title: "API Key" + description: "Keen Master API key" + type: "string" + examples: + - "ABCDEFGHIJKLMNOPRSTUWXYZ" + airbyte_secret: true + infer_timestamp: + title: "Infer Timestamp" + description: "Allow connector to guess keen.timestamp value based on the\ + \ streamed data" + type: "boolean" + default: true + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "overwrite" + - "append" +- dockerImage: "airbyte/destination-dynamodb:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/dynamodb" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "DynamoDB Destination Spec" + type: "object" + required: + - "dynamodb_table_name" + - "dynamodb_region" + - "access_key_id" + - "secret_access_key" + additionalProperties: false + properties: + dynamodb_endpoint: + title: "Endpoint" + type: "string" + default: "" + description: "This is your DynamoDB endpoint url.(if you are working with\ + \ AWS DynamoDB, just leave empty)." + examples: + - "http://localhost:9000" + dynamodb_table_name: + title: "DynamoDB Table Name" + type: "string" + description: "The name of the DynamoDB table." + examples: + - "airbyte_sync" + dynamodb_region: + title: "DynamoDB Region" + type: "string" + default: "" + description: "The region of the DynamoDB." + enum: + - "" + - "us-east-1" + - "us-east-2" + - "us-west-1" + - "us-west-2" + - "af-south-1" + - "ap-east-1" + - "ap-south-1" + - "ap-northeast-1" + - "ap-northeast-2" + - "ap-northeast-3" + - "ap-southeast-1" + - "ap-southeast-2" + - "ca-central-1" + - "cn-north-1" + - "cn-northwest-1" + - "eu-central-1" + - "eu-north-1" + - "eu-south-1" + - "eu-west-1" + - "eu-west-2" + - "eu-west-3" + - "sa-east-1" + - "me-south-1" + - "us-gov-east-1" + - "us-gov-west-1" + access_key_id: + type: "string" + description: "The access key id to access the DynamoDB. Airbyte requires\ + \ Read and Write permissions to the DynamoDB." + title: "DynamoDB Key Id" + airbyte_secret: true + examples: + - "A012345678910EXAMPLE" + secret_access_key: + type: "string" + description: "The corresponding secret to the access key id." + title: "DynamoDB Access Key" + airbyte_secret: true + examples: + - "a012345678910ABCDEFGH/AbCdEfGhEXAMPLEKEY" + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "overwrite" + - "append" +- dockerImage: "airbyte/destination-elasticsearch:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/elasticsearch" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Elasticsearch Connection Configuration" + type: "object" + required: + - "endpoint" + additionalProperties: false + properties: + endpoint: + title: "Server Endpoint" + type: "string" + description: "The full url of the Elasticsearch server" + upsert: + type: "boolean" + title: "Upsert Records" + description: "If a primary key identifier is defined in the source, an upsert\ + \ will be performed using the primary key value as the elasticsearch doc\ + \ id. Does not support composite primary keys." + default: true + authenticationMethod: + title: "Authentication Method" + type: "object" + description: "The type of authentication to be used" + oneOf: + - title: "None" + additionalProperties: false + description: "No authentication will be used" + required: + - "method" + properties: + method: + type: "string" + const: "none" + - title: "Api Key/Secret" + additionalProperties: false + description: "Use a api key and secret combination to authenticate" + required: + - "method" + - "apiKeyId" + - "apiKeySecret" + properties: + method: + type: "string" + const: "secret" + apiKeyId: + title: "API Key ID" + description: "The Key ID to used when accessing an enterprise Elasticsearch\ + \ instance." + type: "string" + apiKeySecret: + title: "API Key Secret" + description: "The secret associated with the API Key ID." + type: "string" + airbyte_secret: true + - title: "Username/Password" + additionalProperties: false + description: "Basic auth header with a username and password" + required: + - "method" + - "username" + - "password" + properties: + method: + type: "string" + const: "basic" + username: + title: "Username" + description: "Basic auth username to access a secure Elasticsearch\ + \ server" + type: "string" + password: + title: "Password" + description: "Basic auth password to access a secure Elasticsearch\ + \ server" + type: "string" + airbyte_secret: true + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "overwrite" + - "append" + supportsNamespaces: true +- dockerImage: "airbyte/destination-gcs:0.1.3" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/gcs" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "GCS Destination Spec" + type: "object" + required: + - "gcs_bucket_name" + - "gcs_bucket_path" + - "gcs_bucket_region" + - "credential" + - "format" + additionalProperties: false + properties: + gcs_bucket_name: + title: "GCS Bucket Name" + type: "string" + description: "The name of the GCS bucket." + examples: + - "airbyte_sync" + gcs_bucket_path: + description: "Directory under the GCS bucket where data will be written." + type: "string" + examples: + - "data_sync/test" + gcs_bucket_region: + title: "GCS Bucket Region" + type: "string" + default: "" + description: "The region of the GCS bucket." + enum: + - "" + - "-- North America --" + - "northamerica-northeast1" + - "us-central1" + - "us-east1" + - "us-east4" + - "us-west1" + - "us-west2" + - "us-west3" + - "us-west4" + - "-- South America --" + - "southamerica-east1" + - "-- Europe --" + - "europe-central2" + - "europe-north1" + - "europe-west1" + - "europe-west2" + - "europe-west3" + - "europe-west4" + - "europe-west6" + - "-- Asia --" + - "asia-east1" + - "asia-east2" + - "asia-northeast1" + - "asia-northeast2" + - "asia-northeast3" + - "asia-south1" + - "asia-south2" + - "asia-southeast1" + - "asia-southeast2" + - "-- Australia --" + - "australia-southeast1" + - "australia-southeast2" + - "-- Multi-regions --" + - "asia" + - "eu" + - "us" + - "-- Dual-regions --" + - "asia1" + - "eur4" + - "nam4" + credential: + title: "Credential" + type: "object" + oneOf: + - title: "HMAC key" + required: + - "credential_type" + - "hmac_key_access_id" + - "hmac_key_secret" + properties: + credential_type: + type: "string" + enum: + - "HMAC_KEY" + default: "HMAC_KEY" + hmac_key_access_id: + type: "string" + description: "HMAC key access ID. When linked to a service account,\ + \ this ID is 61 characters long; when linked to a user account,\ + \ it is 24 characters long." + title: "HMAC Key Access ID" + airbyte_secret: true + examples: + - "1234567890abcdefghij1234" + hmac_key_secret: + type: "string" + description: "The corresponding secret for the access ID. It is a\ + \ 40-character base-64 encoded string." + title: "HMAC Key Secret" + airbyte_secret: true + examples: + - "1234567890abcdefghij1234567890ABCDEFGHIJ" + format: + title: "Output Format" + type: "object" + description: "Output data format" + oneOf: + - title: "Avro: Apache Avro" + required: + - "format_type" + - "compression_codec" + properties: + format_type: + type: "string" + enum: + - "Avro" + default: "Avro" + compression_codec: + title: "Compression Codec" + description: "The compression algorithm used to compress data. Default\ + \ to no compression." + type: "object" + oneOf: + - title: "no compression" + required: + - "codec" + properties: + codec: + type: "string" + enum: + - "no compression" + default: "no compression" + - title: "Deflate" + required: + - "codec" + - "compression_level" + properties: + codec: + type: "string" + enum: + - "Deflate" + default: "Deflate" + compression_level: + title: "Deflate level" + description: "0: no compression & fastest, 9: best compression\ + \ & slowest." + type: "integer" + default: 0 + minimum: 0 + maximum: 9 + - title: "bzip2" + required: + - "codec" + properties: + codec: + type: "string" + enum: + - "bzip2" + default: "bzip2" + - title: "xz" + required: + - "codec" + - "compression_level" + properties: + codec: + type: "string" + enum: + - "xz" + default: "xz" + compression_level: + title: "Compression level" + description: "See here for details." + type: "integer" + default: 6 + minimum: 0 + maximum: 9 + - title: "zstandard" + required: + - "codec" + - "compression_level" + properties: + codec: + type: "string" + enum: + - "zstandard" + default: "zstandard" + compression_level: + title: "Compression level" + description: "Negative levels are 'fast' modes akin to lz4 or\ + \ snappy, levels above 9 are generally for archival purposes,\ + \ and levels above 18 use a lot of memory." + type: "integer" + default: 3 + minimum: -5 + maximum: 22 + include_checksum: + title: "Include checksum" + description: "If true, include a checksum with each data block." + type: "boolean" + default: false + - title: "snappy" + required: + - "codec" + properties: + codec: + type: "string" + enum: + - "snappy" + default: "snappy" + part_size_mb: + title: "Block Size (MB) for GCS multipart upload" + description: "This is the size of a \"Part\" being buffered in memory.\ + \ It limits the memory usage when writing. Larger values will allow\ + \ to upload a bigger files and improve the speed, but consumes9\ + \ more memory. Allowed values: min=5MB, max=525MB Default: 5MB." + type: "integer" + default: 5 + examples: + - 5 + - title: "CSV: Comma-Separated Values" + required: + - "format_type" + - "flattening" + properties: + format_type: + type: "string" + enum: + - "CSV" + default: "CSV" + flattening: + type: "string" + title: "Normalization (Flattening)" + description: "Whether the input json data should be normalized (flattened)\ + \ in the output CSV. Please refer to docs for details." + default: "No flattening" + enum: + - "No flattening" + - "Root level flattening" + part_size_mb: + title: "Block Size (MB) for GCS multipart upload" + description: "This is the size of a \"Part\" being buffered in memory.\ + \ It limits the memory usage when writing. Larger values will allow\ + \ to upload a bigger files and improve the speed, but consumes9\ + \ more memory. Allowed values: min=5MB, max=525MB Default: 5MB." + type: "integer" + default: 5 + examples: + - 5 + - title: "JSON Lines: newline-delimited JSON" + required: + - "format_type" + properties: + format_type: + type: "string" + enum: + - "JSONL" + default: "JSONL" + part_size_mb: + title: "Block Size (MB) for GCS multipart upload" + description: "This is the size of a \"Part\" being buffered in memory.\ + \ It limits the memory usage when writing. Larger values will allow\ + \ to upload a bigger files and improve the speed, but consumes9\ + \ more memory. Allowed values: min=5MB, max=525MB Default: 5MB." + type: "integer" + default: 5 + examples: + - 5 + - title: "Parquet: Columnar Storage" + required: + - "format_type" + properties: + format_type: + type: "string" + enum: + - "Parquet" + default: "Parquet" + compression_codec: + title: "Compression Codec" + description: "The compression algorithm used to compress data pages." + type: "string" + enum: + - "UNCOMPRESSED" + - "SNAPPY" + - "GZIP" + - "LZO" + - "BROTLI" + - "LZ4" + - "ZSTD" + default: "UNCOMPRESSED" + block_size_mb: + title: "Block Size (Row Group Size) (MB)" + description: "This is the size of a row group being buffered in memory.\ + \ It limits the memory usage when writing. Larger values will improve\ + \ the IO when reading, but consume more memory when writing. Default:\ + \ 128 MB." + type: "integer" + default: 128 + examples: + - 128 + max_padding_size_mb: + title: "Max Padding Size (MB)" + description: "Maximum size allowed as padding to align row groups.\ + \ This is also the minimum size of a row group. Default: 8 MB." + type: "integer" + default: 8 + examples: + - 8 + page_size_kb: + title: "Page Size (KB)" + description: "The page size is for compression. A block is composed\ + \ of pages. A page is the smallest unit that must be read fully\ + \ to access a single record. If this value is too small, the compression\ + \ will deteriorate. Default: 1024 KB." + type: "integer" + default: 1024 + examples: + - 1024 + dictionary_page_size_kb: + title: "Dictionary Page Size (KB)" + description: "There is one dictionary page per column per row group\ + \ when dictionary encoding is used. The dictionary page size works\ + \ like the page size but for dictionary. Default: 1024 KB." + type: "integer" + default: 1024 + examples: + - 1024 + dictionary_encoding: + title: "Dictionary Encoding" + description: "Default: true." + type: "boolean" + default: true + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "overwrite" + - "append" + $schema: "http://json-schema.org/draft-07/schema#" +- dockerImage: "airbyte/destination-pubsub:0.1.1" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/pubsub" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Google PubSub Destination Spec" + type: "object" + required: + - "project_id" + - "topic_id" + - "credentials_json" + additionalProperties: true + properties: + project_id: + type: "string" + description: "The GCP project ID for the project containing the target PubSub" + title: "Project ID" + topic_id: + type: "string" + description: "PubSub topic ID in the given GCP project ID" + title: "PubSub Topic ID" + credentials_json: + type: "string" + description: "The contents of the JSON service account key. Check out the\ + \ docs if you need help generating this key." + title: "Credentials JSON" + airbyte_secret: true + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "append" +- dockerImage: "airbyte/destination-kafka:0.1.2" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/kafka" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Kafka Destination Spec" + type: "object" + required: + - "bootstrap_servers" + - "topic_pattern" + - "protocol" + - "acks" + - "enable_idempotence" + - "compression_type" + - "batch_size" + - "linger_ms" + - "max_in_flight_requests_per_connection" + - "client_dns_lookup" + - "buffer_memory" + - "max_request_size" + - "retries" + - "socket_connection_setup_timeout_ms" + - "socket_connection_setup_timeout_max_ms" + - "max_block_ms" + - "request_timeout_ms" + - "delivery_timeout_ms" + - "send_buffer_bytes" + - "receive_buffer_bytes" + additionalProperties: true + properties: + bootstrap_servers: + title: "Bootstrap servers" + description: "A list of host/port pairs to use for establishing the initial\ + \ connection to the Kafka cluster. The client will make use of all servers\ + \ irrespective of which servers are specified here for bootstrapping—this\ + \ list only impacts the initial hosts used to discover the full set of\ + \ servers. This list should be in the form host1:port1,host2:port2,....\ + \ Since these servers are just used for the initial connection to discover\ + \ the full cluster membership (which may change dynamically), this list\ + \ need not contain the full set of servers (you may want more than one,\ + \ though, in case a server is down)." + type: "string" + examples: + - "kafka-broker1:9092,kafka-broker2:9092" + topic_pattern: + title: "Topic pattern" + description: "Topic pattern in which the records will be sent. You can use\ + \ patterns like '{namespace}' and/or '{stream}' to send the message to\ + \ a specific topic based on these values. Notice that the topic name will\ + \ be transformed to a standard naming convention." + type: "string" + examples: + - "sample.topic" + - "{namespace}.{stream}.sample" + test_topic: + title: "Test topic" + description: "Topic to test if Airbyte can produce messages." + type: "string" + examples: + - "test.topic" + sync_producer: + title: "Sync producer" + description: "Wait synchronously until the record has been sent to Kafka." + type: "boolean" + default: false + protocol: + title: "Protocol" + type: "object" + description: "Protocol used to communicate with brokers." + oneOf: + - title: "PLAINTEXT" + required: + - "security_protocol" + properties: + security_protocol: + type: "string" + enum: + - "PLAINTEXT" + default: "PLAINTEXT" + - title: "SASL PLAINTEXT" + required: + - "security_protocol" + - "sasl_mechanism" + - "sasl_jaas_config" + properties: + security_protocol: + type: "string" + enum: + - "SASL_PLAINTEXT" + default: "SASL_PLAINTEXT" + sasl_mechanism: + title: "SASL mechanism" + description: "SASL mechanism used for client connections. This may\ + \ be any mechanism for which a security provider is available." + type: "string" + default: "PLAIN" + enum: + - "PLAIN" + sasl_jaas_config: + title: "SASL JAAS config" + description: "JAAS login context parameters for SASL connections in\ + \ the format used by JAAS configuration files." + type: "string" + default: "" + airbyte_secret: true + - title: "SASL SSL" + required: + - "security_protocol" + - "sasl_mechanism" + - "sasl_jaas_config" + properties: + security_protocol: + type: "string" + enum: + - "SASL_SSL" + default: "SASL_SSL" + sasl_mechanism: + title: "SASL mechanism" + description: "SASL mechanism used for client connections. This may\ + \ be any mechanism for which a security provider is available." + type: "string" + default: "GSSAPI" + enum: + - "GSSAPI" + - "OAUTHBEARER" + - "SCRAM-SHA-256" + sasl_jaas_config: + title: "SASL JAAS config" + description: "JAAS login context parameters for SASL connections in\ + \ the format used by JAAS configuration files." + type: "string" + default: "" + airbyte_secret: true + client_id: + title: "Client ID" + description: "An id string to pass to the server when making requests. The\ + \ purpose of this is to be able to track the source of requests beyond\ + \ just ip/port by allowing a logical application name to be included in\ + \ server-side request logging." + type: "string" + examples: + - "airbyte-producer" + acks: + title: "ACKs" + description: "The number of acknowledgments the producer requires the leader\ + \ to have received before considering a request complete. This controls\ + \ the durability of records that are sent." + type: "string" + default: "1" + enum: + - "0" + - "1" + - "all" + enable_idempotence: + title: "Enable idempotence" + description: "When set to 'true', the producer will ensure that exactly\ + \ one copy of each message is written in the stream. If 'false', producer\ + \ retries due to broker failures, etc., may write duplicates of the retried\ + \ message in the stream." + type: "boolean" + default: false + compression_type: + title: "Compression type" + description: "The compression type for all data generated by the producer." + type: "string" + default: "none" + enum: + - "none" + - "gzip" + - "snappy" + - "lz4" + - "zstd" + batch_size: + title: "Batch size" + description: "The producer will attempt to batch records together into fewer\ + \ requests whenever multiple records are being sent to the same partition." + type: "integer" + examples: + - 16384 + linger_ms: + title: "Linger ms" + description: "The producer groups together any records that arrive in between\ + \ request transmissions into a single batched request." + type: "string" + examples: + - 0 + max_in_flight_requests_per_connection: + title: "Max in flight requests per connection" + description: "The maximum number of unacknowledged requests the client will\ + \ send on a single connection before blocking." + type: "integer" + examples: + - 5 + client_dns_lookup: + title: "Client DNS lookup" + description: "Controls how the client uses DNS lookups. If set to use_all_dns_ips,\ + \ connect to each returned IP address in sequence until a successful connection\ + \ is established. After a disconnection, the next IP is used. Once all\ + \ IPs have been used once, the client resolves the IP(s) from the hostname\ + \ again. If set to resolve_canonical_bootstrap_servers_only, resolve each\ + \ bootstrap address into a list of canonical names. After the bootstrap\ + \ phase, this behaves the same as use_all_dns_ips. If set to default (deprecated),\ + \ attempt to connect to the first IP address returned by the lookup, even\ + \ if the lookup returns multiple IP addresses." + type: "string" + default: "use_all_dns_ips" + enum: + - "default" + - "use_all_dns_ips" + - "resolve_canonical_bootstrap_servers_only" + - "use_all_dns_ips" + buffer_memory: + title: "Buffer memory" + description: "The total bytes of memory the producer can use to buffer records\ + \ waiting to be sent to the server." + type: "string" + examples: 33554432 + max_request_size: + title: "Max request size" + description: "The maximum size of a request in bytes." + type: "integer" + examples: + - 1048576 + retries: + title: "Retries" + description: "Setting a value greater than zero will cause the client to\ + \ resend any record whose send fails with a potentially transient error." + type: "integer" + examples: + - 2147483647 + socket_connection_setup_timeout_ms: + title: "Socket connection setup timeout" + description: "The amount of time the client will wait for the socket connection\ + \ to be established." + type: "string" + examples: + - 10000 + socket_connection_setup_timeout_max_ms: + title: "Socket connection setup max timeout" + description: "The maximum amount of time the client will wait for the socket\ + \ connection to be established. The connection setup timeout will increase\ + \ exponentially for each consecutive connection failure up to this maximum." + type: "string" + examples: + - 30000 + max_block_ms: + title: "Max block ms" + description: "The configuration controls how long the KafkaProducer's send(),\ + \ partitionsFor(), initTransactions(), sendOffsetsToTransaction(), commitTransaction()\ + \ and abortTransaction() methods will block." + type: "string" + examples: + - 60000 + request_timeout_ms: + title: "Request timeout" + description: "The configuration controls the maximum amount of time the\ + \ client will wait for the response of a request. If the response is not\ + \ received before the timeout elapses the client will resend the request\ + \ if necessary or fail the request if retries are exhausted." + type: "integer" + examples: + - 30000 + delivery_timeout_ms: + title: "Delivery timeout" + description: "An upper bound on the time to report success or failure after\ + \ a call to 'send()' returns." + type: "integer" + examples: + - 120000 + send_buffer_bytes: + title: "Send buffer bytes" + description: "The size of the TCP send buffer (SO_SNDBUF) to use when sending\ + \ data. If the value is -1, the OS default will be used." + type: "integer" + examples: + - 131072 + receive_buffer_bytes: + title: "Receive buffer bytes" + description: "The size of the TCP receive buffer (SO_RCVBUF) to use when\ + \ reading data. If the value is -1, the OS default will be used." + type: "integer" + examples: + - 32768 + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "append" +- dockerImage: "airbyte/destination-csv:0.2.8" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/local-csv" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "CSV Destination Spec" + type: "object" + required: + - "destination_path" + additionalProperties: false + properties: + destination_path: + description: "Path to the directory where csv files will be written. The\ + \ destination uses the local mount \"/local\" and any data files will\ + \ be placed inside that local mount. For more information check out our\ + \ docs" + type: "string" + examples: + - "/local" + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "overwrite" + - "append" +- dockerImage: "airbyte/destination-local-json:0.2.8" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/local-json" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Local Json Destination Spec" + type: "object" + required: + - "destination_path" + additionalProperties: false + properties: + destination_path: + description: "Path to the directory where json files will be written. The\ + \ files will be placed inside that local mount. For more information check\ + \ out our docs" + type: "string" + examples: + - "/json_data" + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "overwrite" + - "append" +- dockerImage: "airbyte/destination-mssql:0.1.10" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/mssql" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "MS SQL Server Destination Spec" + type: "object" + required: + - "host" + - "port" + - "username" + - "database" + - "schema" + additionalProperties: true + properties: + host: + title: "Host" + description: "Hostname of the database." + type: "string" + order: 0 + port: + title: "Port" + description: "Port of the database." + type: "integer" + minimum: 0 + maximum: 65536 + default: 1433 + examples: + - "1433" + order: 1 + database: + title: "DB Name" + description: "Name of the database." + type: "string" + order: 2 + schema: + title: "Default Schema" + description: "The default schema tables are written to if the source does\ + \ not specify a namespace. The usual value for this field is \"public\"\ + ." + type: "string" + examples: + - "public" + default: "public" + order: 3 + username: + title: "User" + description: "Username to use to access the database." + type: "string" + order: 4 + password: + title: "Password" + description: "Password associated with the username." + type: "string" + airbyte_secret: true + order: 5 + ssl_method: + title: "SSL Method" + type: "object" + description: "Encryption method to use when communicating with the database" + order: 6 + oneOf: + - title: "Unencrypted" + additionalProperties: false + description: "Data transfer will not be encrypted." + required: + - "ssl_method" + type: "object" + properties: + ssl_method: + type: "string" + enum: + - "unencrypted" + default: "unencrypted" + - title: "Encrypted (trust server certificate)" + additionalProperties: false + description: "Use the cert provided by the server without verification.\ + \ (For testing purposes only!)" + required: + - "ssl_method" + type: "object" + properties: + ssl_method: + type: "string" + enum: + - "encrypted_trust_server_certificate" + default: "encrypted_trust_server_certificate" + - title: "Encrypted (verify certificate)" + additionalProperties: false + description: "Verify and use the cert provided by the server." + required: + - "ssl_method" + - "trustStoreName" + - "trustStorePassword" + type: "object" + properties: + ssl_method: + type: "string" + enum: + - "encrypted_verify_certificate" + default: "encrypted_verify_certificate" + hostNameInCertificate: + title: "Host Name In Certificate" + type: "string" + description: "Specifies the host name of the server. The value of\ + \ this property must match the subject property of the certificate." + order: 7 + tunnel_method: + type: "object" + title: "SSH Tunnel Method" + description: "Whether to initiate an SSH tunnel before connecting to the\ + \ database, and if so, which kind of authentication to use." + oneOf: + - title: "No Tunnel" + required: + - "tunnel_method" + properties: + tunnel_method: + description: "No ssh tunnel needed to connect to database" + type: "string" + const: "NO_TUNNEL" + order: 0 + - title: "SSH Key Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "ssh_key" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and ssh key" + type: "string" + const: "SSH_KEY_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host." + type: "string" + order: 3 + ssh_key: + title: "SSH Private Key" + description: "OS-level user account ssh key credentials in RSA PEM\ + \ format ( created with ssh-keygen -t rsa -m PEM -f myuser_rsa )" + type: "string" + airbyte_secret: true + multiline: true + order: 4 + - title: "Password Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "tunnel_user_password" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and password authentication" + type: "string" + const: "SSH_PASSWORD_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host" + type: "string" + order: 3 + tunnel_user_password: + title: "Password" + description: "OS-level password for logging into the jump server host" + type: "string" + airbyte_secret: true + order: 4 + supportsIncremental: true + supportsNormalization: true + supportsDBT: true + supported_destination_sync_modes: + - "overwrite" + - "append" + - "append_dedup" +- dockerImage: "airbyte/destination-meilisearch:0.2.10" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/meilisearch" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "MeiliSearch Destination Spec" + type: "object" + required: + - "host" + additionalProperties: true + properties: + host: + title: "Host" + description: "Hostname of the MeiliSearch instance" + type: "string" + order: 0 + api_key: + title: "API Key" + airbyte_secret: true + description: "MeiliSearch instance API Key" + type: "string" + order: 1 + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "overwrite" + - "append" +- dockerImage: "airbyte/destination-mongodb:0.1.2" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/mongodb" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "MongoDB Destination Spec" + type: "object" + required: + - "database" + - "auth_type" + additionalProperties: true + properties: + instance_type: + description: "MongoDb instance to connect to. For MongoDB Atlas and Replica\ + \ Set TLS connection is used by default." + title: "MongoDb instance type" + type: "object" + order: 0 + oneOf: + - title: "Standalone MongoDb Instance" + required: + - "instance" + - "host" + - "port" + properties: + instance: + type: "string" + enum: + - "standalone" + default: "standalone" + host: + title: "Host" + type: "string" + description: "Host of a Mongo database to be replicated." + order: 0 + port: + title: "Port" + type: "integer" + description: "Port of a Mongo database to be replicated." + minimum: 0 + maximum: 65536 + default: 27017 + examples: + - "27017" + order: 1 + tls: + title: "TLS connection" + type: "boolean" + description: "Indicates whether TLS encryption protocol will be used\ + \ to connect to MongoDB. It is recommended to use TLS connection\ + \ if possible. For more information see documentation." + default: false + order: 2 + - title: "Replica Set" + required: + - "instance" + - "server_addresses" + properties: + instance: + type: "string" + enum: + - "replica" + default: "replica" + server_addresses: + title: "Server addresses" + type: "string" + description: "The members of a replica set. Please specify `host`:`port`\ + \ of each member seperated by comma." + examples: + - "host1:27017,host2:27017,host3:27017" + order: 0 + replica_set: + title: "Replica Set" + type: "string" + description: "A replica set name." + order: 1 + - title: "MongoDB Atlas" + additionalProperties: false + required: + - "instance" + - "cluster_url" + properties: + instance: + type: "string" + enum: + - "atlas" + default: "atlas" + cluster_url: + title: "Cluster URL" + type: "string" + description: "URL of a cluster to connect to." + order: 0 + database: + title: "DB Name" + description: "Name of the database." + type: "string" + order: 2 + auth_type: + title: "Authorization type" + type: "object" + description: "Authorization type." + oneOf: + - title: "None" + additionalProperties: false + description: "None." + required: + - "authorization" + type: "object" + properties: + authorization: + type: "string" + const: "none" + - title: "Login/Password" + additionalProperties: false + description: "Login/Password." + required: + - "authorization" + - "username" + - "password" + type: "object" + properties: + authorization: + type: "string" + const: "login/password" + username: + title: "User" + description: "Username to use to access the database." + type: "string" + order: 1 + password: + title: "Password" + description: "Password associated with the username." + type: "string" + airbyte_secret: true + order: 2 + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "overwrite" + - "append" +- dockerImage: "airbyte/destination-mysql:0.1.13" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/mysql" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "MySQL Destination Spec" + type: "object" + required: + - "host" + - "port" + - "username" + - "database" + additionalProperties: true + properties: + host: + title: "Host" + description: "Hostname of the database." + type: "string" + order: 0 + port: + title: "Port" + description: "Port of the database." + type: "integer" + minimum: 0 + maximum: 65536 + default: 3306 + examples: + - "3306" + order: 1 + database: + title: "DB Name" + description: "Name of the database." + type: "string" + order: 2 + username: + title: "User" + description: "Username to use to access the database." + type: "string" + order: 3 + password: + title: "Password" + description: "Password associated with the username." + type: "string" + airbyte_secret: true + order: 4 + ssl: + title: "SSL Connection" + description: "Encrypt data using SSL." + type: "boolean" + default: true + order: 5 + tunnel_method: + type: "object" + title: "SSH Tunnel Method" + description: "Whether to initiate an SSH tunnel before connecting to the\ + \ database, and if so, which kind of authentication to use." + oneOf: + - title: "No Tunnel" + required: + - "tunnel_method" + properties: + tunnel_method: + description: "No ssh tunnel needed to connect to database" + type: "string" + const: "NO_TUNNEL" + order: 0 + - title: "SSH Key Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "ssh_key" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and ssh key" + type: "string" + const: "SSH_KEY_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host." + type: "string" + order: 3 + ssh_key: + title: "SSH Private Key" + description: "OS-level user account ssh key credentials for logging\ + \ into the jump server host." + type: "string" + airbyte_secret: true + multiline: true + order: 4 + - title: "Password Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "tunnel_user_password" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and password authentication" + type: "string" + const: "SSH_PASSWORD_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host" + type: "string" + order: 3 + tunnel_user_password: + title: "Password" + description: "OS-level password for logging into the jump server host" + type: "string" + airbyte_secret: true + order: 4 + supportsIncremental: true + supportsNormalization: true + supportsDBT: true + supported_destination_sync_modes: + - "overwrite" + - "append" +- dockerImage: "airbyte/destination-oracle:0.1.11" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/oracle" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Oracle Destination Spec" + type: "object" + required: + - "host" + - "port" + - "username" + - "sid" + additionalProperties: true + properties: + host: + title: "Host" + description: "Hostname of the database." + type: "string" + order: 0 + port: + title: "Port" + description: "Port of the database." + type: "integer" + minimum: 0 + maximum: 65536 + default: 1521 + examples: + - "1521" + order: 1 + sid: + title: "SID" + description: "SID" + type: "string" + order: 2 + username: + title: "User" + description: "Username to use to access the database. This user must have\ + \ CREATE USER privileges in the database." + type: "string" + order: 3 + password: + title: "Password" + description: "Password associated with the username." + type: "string" + airbyte_secret: true + order: 4 + schema: + title: "Default Schema" + description: "The default schema tables are written to if the source does\ + \ not specify a namespace. The usual value for this field is \"airbyte\"\ + . In Oracle, schemas and users are the same thing, so the \"user\" parameter\ + \ is used as the login credentials and this is used for the default Airbyte\ + \ message schema." + type: "string" + examples: + - "airbyte" + default: "airbyte" + order: 5 + encryption: + title: "Encryption" + type: "object" + description: "Encryption method to use when communicating with the database" + order: 6 + oneOf: + - title: "Unencrypted" + additionalProperties: false + description: "Data transfer will not be encrypted." + required: + - "encryption_method" + properties: + encryption_method: + type: "string" + const: "unencrypted" + enum: + - "unencrypted" + default: "unencrypted" + - title: "Native Network Ecryption (NNE)" + additionalProperties: false + description: "Native network encryption gives you the ability to encrypt\ + \ database connections, without the configuration overhead of TCP/IP\ + \ and SSL/TLS and without the need to open and listen on different ports." + required: + - "encryption_method" + properties: + encryption_method: + type: "string" + const: "client_nne" + enum: + - "client_nne" + default: "client_nne" + encryption_algorithm: + type: "string" + description: "This parameter defines the encryption algorithm to be\ + \ used" + title: "Encryption Algorithm" + default: "AES256" + enum: + - "AES256" + - "RC4_56" + - "3DES168" + - title: "TLS Encrypted (verify certificate)" + additionalProperties: false + description: "Verify and use the cert provided by the server." + required: + - "encryption_method" + - "ssl_certificate" + properties: + encryption_method: + type: "string" + const: "encrypted_verify_certificate" + enum: + - "encrypted_verify_certificate" + default: "encrypted_verify_certificate" + ssl_certificate: + title: "SSL PEM file" + description: "Privacy Enhanced Mail (PEM) files are concatenated certificate\ + \ containers frequently used in certificate installations" + type: "string" + airbyte_secret: true + multiline: true + tunnel_method: + type: "object" + title: "SSH Tunnel Method" + description: "Whether to initiate an SSH tunnel before connecting to the\ + \ database, and if so, which kind of authentication to use." + oneOf: + - title: "No Tunnel" + required: + - "tunnel_method" + properties: + tunnel_method: + description: "No ssh tunnel needed to connect to database" + type: "string" + const: "NO_TUNNEL" + order: 0 + - title: "SSH Key Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "ssh_key" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and ssh key" + type: "string" + const: "SSH_KEY_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host." + type: "string" + order: 3 + ssh_key: + title: "SSH Private Key" + description: "OS-level user account ssh key credentials in RSA PEM\ + \ format ( created with ssh-keygen -t rsa -m PEM -f myuser_rsa )" + type: "string" + airbyte_secret: true + multiline: true + order: 4 + - title: "Password Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "tunnel_user_password" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and password authentication" + type: "string" + const: "SSH_PASSWORD_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host" + type: "string" + order: 3 + tunnel_user_password: + title: "Password" + description: "OS-level password for logging into the jump server host" + type: "string" + airbyte_secret: true + order: 4 + supportsIncremental: true + supportsNormalization: false + supportsDBT: true + supported_destination_sync_modes: + - "overwrite" + - "append" +- dockerImage: "airbyte/destination-postgres:0.3.11" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/postgres" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Postgres Destination Spec" + type: "object" + required: + - "host" + - "port" + - "username" + - "database" + - "schema" + additionalProperties: true + properties: + host: + title: "Host" + description: "Hostname of the database." + type: "string" + order: 0 + port: + title: "Port" + description: "Port of the database." + type: "integer" + minimum: 0 + maximum: 65536 + default: 5432 + examples: + - "5432" + order: 1 + database: + title: "DB Name" + description: "Name of the database." + type: "string" + order: 2 + schema: + title: "Default Schema" + description: "The default schema tables are written to if the source does\ + \ not specify a namespace. The usual value for this field is \"public\"\ + ." + type: "string" + examples: + - "public" + default: "public" + order: 3 + username: + title: "User" + description: "Username to use to access the database." + type: "string" + order: 4 + password: + title: "Password" + description: "Password associated with the username." + type: "string" + airbyte_secret: true + order: 5 + ssl: + title: "SSL Connection" + description: "Encrypt data using SSL." + type: "boolean" + default: false + order: 6 + tunnel_method: + type: "object" + title: "SSH Tunnel Method" + description: "Whether to initiate an SSH tunnel before connecting to the\ + \ database, and if so, which kind of authentication to use." + oneOf: + - title: "No Tunnel" + required: + - "tunnel_method" + properties: + tunnel_method: + description: "No ssh tunnel needed to connect to database" + type: "string" + const: "NO_TUNNEL" + order: 0 + - title: "SSH Key Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "ssh_key" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and ssh key" + type: "string" + const: "SSH_KEY_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host." + type: "string" + order: 3 + ssh_key: + title: "SSH Private Key" + description: "OS-level user account ssh key credentials for logging\ + \ into the jump server host." + type: "string" + airbyte_secret: true + multiline: true + order: 4 + - title: "Password Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "tunnel_user_password" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and password authentication" + type: "string" + const: "SSH_PASSWORD_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host" + type: "string" + order: 3 + tunnel_user_password: + title: "Password" + description: "OS-level password for logging into the jump server host" + type: "string" + airbyte_secret: true + order: 4 + supportsIncremental: true + supportsNormalization: true + supportsDBT: true + supported_destination_sync_modes: + - "overwrite" + - "append" + - "append_dedup" +- dockerImage: "airbyte/destination-pulsar:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/pulsar" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Pulsar Destination Spec" + type: "object" + required: + - "brokers" + - "use_tls" + - "topic_type" + - "topic_tenant" + - "topic_namespace" + - "topic_pattern" + - "compression_type" + - "send_timeout_ms" + - "max_pending_messages" + - "max_pending_messages_across_partitions" + - "batching_enabled" + - "batching_max_messages" + - "batching_max_publish_delay" + - "block_if_queue_full" + additionalProperties: true + properties: + brokers: + title: "Pulsar brokers" + description: "A list of host/port pairs to use for establishing the initial\ + \ connection to the Pulsar cluster." + type: "string" + examples: + - "broker1:6650,broker2:6650" + use_tls: + title: "Use TLS" + description: "Whether to use TLS encryption on the connection." + type: "boolean" + default: false + topic_type: + title: "Topic type" + description: "It identifies type of topic. Pulsar supports two kind of topics:\ + \ persistent and non-persistent. In persistent topic, all messages are\ + \ durably persisted on disk (that means on multiple disks unless the broker\ + \ is standalone), whereas non-persistent topic does not persist message\ + \ into storage disk." + type: "string" + default: "persistent" + enum: + - "persistent" + - "non-persistent" + topic_tenant: + title: "Topic tenant" + description: "The topic tenant within the instance. Tenants are essential\ + \ to multi-tenancy in Pulsar, and spread across clusters." + type: "string" + default: "public" + examples: + - "public" + topic_namespace: + title: "Topic namespace" + description: "The administrative unit of the topic, which acts as a grouping\ + \ mechanism for related topics. Most topic configuration is performed\ + \ at the namespace level. Each tenant has one or multiple namespaces." + type: "string" + default: "default" + examples: + - "default" + topic_pattern: + title: "Topic pattern" + description: "Topic pattern in which the records will be sent. You can use\ + \ patterns like '{namespace}' and/or '{stream}' to send the message to\ + \ a specific topic based on these values. Notice that the topic name will\ + \ be transformed to a standard naming convention." + type: "string" + examples: + - "sample.topic" + - "{namespace}.{stream}.sample" + topic_test: + title: "Test topic" + description: "Topic to test if Airbyte can produce messages." + type: "string" + examples: + - "test.topic" + producer_name: + title: "Producer name" + description: "Name for the producer. If not filled, the system will generate\ + \ a globally unique name which can be accessed with." + type: "string" + examples: + - "airbyte-producer" + producer_sync: + title: "Sync producer" + description: "Wait synchronously until the record has been sent to Pulsar." + type: "boolean" + default: false + compression_type: + title: "Compression type" + description: "Compression type for the producer." + type: "string" + default: "NONE" + enum: + - "NONE" + - "LZ4" + - "ZLIB" + - "ZSTD" + - "SNAPPY" + send_timeout_ms: + title: "Message send timeout" + description: "If a message is not acknowledged by a server before the send-timeout\ + \ expires, an error occurs (in ms)." + type: "integer" + default: 30000 + max_pending_messages: + title: "Max pending messages" + description: "The maximum size of a queue holding pending messages." + type: "integer" + default: 1000 + max_pending_messages_across_partitions: + title: "Max pending messages across partitions" + description: "The maximum number of pending messages across partitions." + type: "integer" + default: 50000 + batching_enabled: + title: "Enable batching" + description: "Control whether automatic batching of messages is enabled\ + \ for the producer." + type: "boolean" + default: true + batching_max_messages: + title: "Batching max messages" + description: "Maximum number of messages permitted in a batch." + type: "integer" + default: 1000 + batching_max_publish_delay: + title: "Batching max publish delay" + description: " Time period in milliseconds within which the messages sent\ + \ will be batched." + type: "integer" + default: 1 + block_if_queue_full: + title: "Block if queue is full" + description: "If the send operation should block when the outgoing message\ + \ queue is full." + type: "boolean" + default: false + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "append" +- dockerImage: "airbyte/destination-redshift:0.3.19" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/redshift" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Redshift Destination Spec" + type: "object" + required: + - "host" + - "port" + - "database" + - "username" + - "password" + - "schema" + additionalProperties: true + properties: + host: + description: "Host Endpoint of the Redshift Cluster (must include the cluster-id,\ + \ region and end with .redshift.amazonaws.com)" + type: "string" + title: "Host" + port: + description: "Port of the database." + type: "integer" + minimum: 0 + maximum: 65536 + default: 5439 + examples: + - "5439" + title: "Port" + username: + description: "Username to use to access the database." + type: "string" + title: "Username" + password: + description: "Password associated with the username." + type: "string" + airbyte_secret: true + title: "Password" + database: + description: "Name of the database." + type: "string" + title: "Database" + schema: + description: "The default schema tables are written to if the source does\ + \ not specify a namespace. Unless specifically configured, the usual value\ + \ for this field is \"public\"." + type: "string" + examples: + - "public" + default: "public" + title: "Default Schema" + s3_bucket_name: + title: "S3 Bucket Name" + type: "string" + description: "The name of the staging S3 bucket to use if utilising a COPY\ + \ strategy. COPY is recommended for production workloads for better speed\ + \ and scalability. See AWS docs for more details." + examples: + - "airbyte.staging" + s3_bucket_region: + title: "S3 Bucket Region" + type: "string" + default: "" + description: "The region of the S3 staging bucket to use if utilising a\ + \ copy strategy." + enum: + - "" + - "us-east-1" + - "us-east-2" + - "us-west-1" + - "us-west-2" + - "af-south-1" + - "ap-east-1" + - "ap-south-1" + - "ap-northeast-1" + - "ap-northeast-2" + - "ap-northeast-3" + - "ap-southeast-1" + - "ap-southeast-2" + - "ca-central-1" + - "cn-north-1" + - "cn-northwest-1" + - "eu-central-1" + - "eu-north-1" + - "eu-south-1" + - "eu-west-1" + - "eu-west-2" + - "eu-west-3" + - "sa-east-1" + - "me-south-1" + access_key_id: + type: "string" + description: "The Access Key Id granting allow one to access the above S3\ + \ staging bucket. Airbyte requires Read and Write permissions to the given\ + \ bucket." + title: "S3 Key Id" + airbyte_secret: true + secret_access_key: + type: "string" + description: "The corresponding secret to the above access key id." + title: "S3 Access Key" + airbyte_secret: true + part_size: + type: "integer" + minimum: 10 + maximum: 100 + examples: + - "10" + description: "Optional. Increase this if syncing tables larger than 100GB.\ + \ Only relevant for COPY. Files are streamed to S3 in parts. This determines\ + \ the size of each part, in MBs. As S3 has a limit of 10,000 parts per\ + \ file, part size affects the table size. This is 10MB by default, resulting\ + \ in a default limit of 100GB tables. Note, a larger part size will result\ + \ in larger memory requirements. A rule of thumb is to multiply the part\ + \ size by 10 to get the memory requirement. Modify this with care." + title: "Stream Part Size" + supportsIncremental: true + supportsNormalization: true + supportsDBT: true + supported_destination_sync_modes: + - "overwrite" + - "append" + - "append_dedup" +- dockerImage: "airbyte/destination-s3:0.1.13" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/s3" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "S3 Destination Spec" + type: "object" + required: + - "s3_bucket_name" + - "s3_bucket_path" + - "s3_bucket_region" + - "access_key_id" + - "secret_access_key" + - "format" + additionalProperties: false + properties: + s3_endpoint: + title: "Endpoint" + type: "string" + default: "" + description: "This is your S3 endpoint url.(if you are working with AWS\ + \ S3, just leave empty)." + examples: + - "http://localhost:9000" + s3_bucket_name: + title: "S3 Bucket Name" + type: "string" + description: "The name of the S3 bucket." + examples: + - "airbyte_sync" + s3_bucket_path: + description: "Directory under the S3 bucket where data will be written." + type: "string" + examples: + - "data_sync/test" + s3_bucket_region: + title: "S3 Bucket Region" + type: "string" + default: "" + description: "The region of the S3 bucket." + enum: + - "" + - "us-east-1" + - "us-east-2" + - "us-west-1" + - "us-west-2" + - "af-south-1" + - "ap-east-1" + - "ap-south-1" + - "ap-northeast-1" + - "ap-northeast-2" + - "ap-northeast-3" + - "ap-southeast-1" + - "ap-southeast-2" + - "ca-central-1" + - "cn-north-1" + - "cn-northwest-1" + - "eu-central-1" + - "eu-north-1" + - "eu-south-1" + - "eu-west-1" + - "eu-west-2" + - "eu-west-3" + - "sa-east-1" + - "me-south-1" + - "us-gov-east-1" + - "us-gov-west-1" + access_key_id: + type: "string" + description: "The access key id to access the S3 bucket. Airbyte requires\ + \ Read and Write permissions to the given bucket." + title: "S3 Key Id" + airbyte_secret: true + examples: + - "A012345678910EXAMPLE" + secret_access_key: + type: "string" + description: "The corresponding secret to the access key id." + title: "S3 Access Key" + airbyte_secret: true + examples: + - "a012345678910ABCDEFGH/AbCdEfGhEXAMPLEKEY" + format: + title: "Output Format" + type: "object" + description: "Output data format" + oneOf: + - title: "Avro: Apache Avro" + required: + - "format_type" + - "compression_codec" + properties: + format_type: + type: "string" + enum: + - "Avro" + default: "Avro" + compression_codec: + title: "Compression Codec" + description: "The compression algorithm used to compress data. Default\ + \ to no compression." + type: "object" + oneOf: + - title: "no compression" + required: + - "codec" + properties: + codec: + type: "string" + enum: + - "no compression" + default: "no compression" + - title: "Deflate" + required: + - "codec" + - "compression_level" + properties: + codec: + type: "string" + enum: + - "Deflate" + default: "Deflate" + compression_level: + title: "Deflate level" + description: "0: no compression & fastest, 9: best compression\ + \ & slowest." + type: "integer" + default: 0 + minimum: 0 + maximum: 9 + - title: "bzip2" + required: + - "codec" + properties: + codec: + type: "string" + enum: + - "bzip2" + default: "bzip2" + - title: "xz" + required: + - "codec" + - "compression_level" + properties: + codec: + type: "string" + enum: + - "xz" + default: "xz" + compression_level: + title: "Compression level" + description: "See here for details." + type: "integer" + default: 6 + minimum: 0 + maximum: 9 + - title: "zstandard" + required: + - "codec" + - "compression_level" + properties: + codec: + type: "string" + enum: + - "zstandard" + default: "zstandard" + compression_level: + title: "Compression level" + description: "Negative levels are 'fast' modes akin to lz4 or\ + \ snappy, levels above 9 are generally for archival purposes,\ + \ and levels above 18 use a lot of memory." + type: "integer" + default: 3 + minimum: -5 + maximum: 22 + include_checksum: + title: "Include checksum" + description: "If true, include a checksum with each data block." + type: "boolean" + default: false + - title: "snappy" + required: + - "codec" + properties: + codec: + type: "string" + enum: + - "snappy" + default: "snappy" + part_size_mb: + title: "Block Size (MB) for Amazon S3 multipart upload" + description: "This is the size of a \"Part\" being buffered in memory.\ + \ It limits the memory usage when writing. Larger values will allow\ + \ to upload a bigger files and improve the speed, but consumes9\ + \ more memory. Allowed values: min=5MB, max=525MB Default: 5MB." + type: "integer" + default: 5 + examples: + - 5 + - title: "CSV: Comma-Separated Values" + required: + - "format_type" + - "flattening" + properties: + format_type: + type: "string" + enum: + - "CSV" + default: "CSV" + flattening: + type: "string" + title: "Normalization (Flattening)" + description: "Whether the input json data should be normalized (flattened)\ + \ in the output CSV. Please refer to docs for details." + default: "No flattening" + enum: + - "No flattening" + - "Root level flattening" + part_size_mb: + title: "Block Size (MB) for Amazon S3 multipart upload" + description: "This is the size of a \"Part\" being buffered in memory.\ + \ It limits the memory usage when writing. Larger values will allow\ + \ to upload a bigger files and improve the speed, but consumes9\ + \ more memory. Allowed values: min=5MB, max=525MB Default: 5MB." + type: "integer" + default: 5 + examples: + - 5 + - title: "JSON Lines: newline-delimited JSON" + required: + - "format_type" + properties: + format_type: + type: "string" + enum: + - "JSONL" + default: "JSONL" + part_size_mb: + title: "Block Size (MB) for Amazon S3 multipart upload" + description: "This is the size of a \"Part\" being buffered in memory.\ + \ It limits the memory usage when writing. Larger values will allow\ + \ to upload a bigger files and improve the speed, but consumes9\ + \ more memory. Allowed values: min=5MB, max=525MB Default: 5MB." + type: "integer" + default: 5 + examples: + - 5 + - title: "Parquet: Columnar Storage" + required: + - "format_type" + properties: + format_type: + type: "string" + enum: + - "Parquet" + default: "Parquet" + compression_codec: + title: "Compression Codec" + description: "The compression algorithm used to compress data pages." + type: "string" + enum: + - "UNCOMPRESSED" + - "SNAPPY" + - "GZIP" + - "LZO" + - "BROTLI" + - "LZ4" + - "ZSTD" + default: "UNCOMPRESSED" + block_size_mb: + title: "Block Size (Row Group Size) (MB)" + description: "This is the size of a row group being buffered in memory.\ + \ It limits the memory usage when writing. Larger values will improve\ + \ the IO when reading, but consume more memory when writing. Default:\ + \ 128 MB." + type: "integer" + default: 128 + examples: + - 128 + max_padding_size_mb: + title: "Max Padding Size (MB)" + description: "Maximum size allowed as padding to align row groups.\ + \ This is also the minimum size of a row group. Default: 8 MB." + type: "integer" + default: 8 + examples: + - 8 + page_size_kb: + title: "Page Size (KB)" + description: "The page size is for compression. A block is composed\ + \ of pages. A page is the smallest unit that must be read fully\ + \ to access a single record. If this value is too small, the compression\ + \ will deteriorate. Default: 1024 KB." + type: "integer" + default: 1024 + examples: + - 1024 + dictionary_page_size_kb: + title: "Dictionary Page Size (KB)" + description: "There is one dictionary page per column per row group\ + \ when dictionary encoding is used. The dictionary page size works\ + \ like the page size but for dictionary. Default: 1024 KB." + type: "integer" + default: 1024 + examples: + - 1024 + dictionary_encoding: + title: "Dictionary Encoding" + description: "Default: true." + type: "boolean" + default: true + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "overwrite" + - "append" +- dockerImage: "airbyte/destination-snowflake:0.3.16" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/snowflake" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Snowflake Destination Spec" + type: "object" + required: + - "host" + - "role" + - "warehouse" + - "database" + - "schema" + - "username" + - "password" + additionalProperties: true + properties: + host: + description: "Host domain of the snowflake instance (must include the account,\ + \ region, cloud environment, and end with snowflakecomputing.com)." + examples: + - "accountname.us-east-2.aws.snowflakecomputing.com" + type: "string" + title: "Host" + order: 0 + role: + description: "The role you created for Airbyte to access Snowflake." + examples: + - "AIRBYTE_ROLE" + type: "string" + title: "Role" + order: 1 + warehouse: + description: "The warehouse you created for Airbyte to sync data into." + examples: + - "AIRBYTE_WAREHOUSE" + type: "string" + title: "Warehouse" + order: 2 + database: + description: "The database you created for Airbyte to sync data into." + examples: + - "AIRBYTE_DATABASE" + type: "string" + title: "Database" + order: 3 + schema: + description: "The default Snowflake schema tables are written to if the\ + \ source does not specify a namespace." + examples: + - "AIRBYTE_SCHEMA" + type: "string" + title: "Default Schema" + order: 4 + username: + description: "The username you created to allow Airbyte to access the database." + examples: + - "AIRBYTE_USER" + type: "string" + title: "Username" + order: 5 + password: + description: "Password associated with the username." + type: "string" + airbyte_secret: true + title: "Password" + order: 6 + loading_method: + type: "object" + title: "Loading Method" + description: "Loading method used to send data to Snowflake." + order: 7 + oneOf: + - title: "Standard Inserts" + additionalProperties: false + description: "Uses
INSERT
statements to send batches of records\ + \ to Snowflake. Easiest (no setup) but not recommended for large production\ + \ workloads due to slow speed." + required: + - "method" + properties: + method: + type: "string" + enum: + - "Standard" + default: "Standard" + - title: "AWS S3 Staging" + additionalProperties: false + description: "Writes large batches of records to a file, uploads the file\ + \ to S3, then uses
COPY INTO table
to upload the file. Recommended\ + \ for large production workloads for better speed and scalability." + required: + - "method" + - "s3_bucket_name" + - "access_key_id" + - "secret_access_key" + properties: + method: + type: "string" + enum: + - "S3 Staging" + default: "S3 Staging" + order: 0 + s3_bucket_name: + title: "S3 Bucket Name" + type: "string" + description: "The name of the staging S3 bucket. Airbyte will write\ + \ files to this bucket and read them via
COPY
statements\ + \ on Snowflake." + examples: + - "airbyte.staging" + order: 1 + s3_bucket_region: + title: "S3 Bucket Region" + type: "string" + default: "" + description: "The region of the S3 staging bucket to use if utilising\ + \ a copy strategy." + enum: + - "" + - "us-east-1" + - "us-east-2" + - "us-west-1" + - "us-west-2" + - "af-south-1" + - "ap-east-1" + - "ap-south-1" + - "ap-northeast-1" + - "ap-northeast-2" + - "ap-northeast-3" + - "ap-southeast-1" + - "ap-southeast-2" + - "ca-central-1" + - "cn-north-1" + - "cn-northwest-1" + - "eu-central-1" + - "eu-west-1" + - "eu-west-2" + - "eu-west-3" + - "eu-south-1" + - "eu-north-1" + - "sa-east-1" + - "me-south-1" + order: 2 + access_key_id: + type: "string" + description: "The Access Key Id granting allow one to access the above\ + \ S3 staging bucket. Airbyte requires Read and Write permissions\ + \ to the given bucket." + title: "S3 Key Id" + airbyte_secret: true + order: 3 + secret_access_key: + type: "string" + description: "The corresponding secret to the above access key id." + title: "S3 Access Key" + airbyte_secret: true + order: 4 + - title: "GCS Staging" + additionalProperties: false + description: "Writes large batches of records to a file, uploads the file\ + \ to GCS, then uses
COPY INTO table
to upload the file. Recommended\ + \ for large production workloads for better speed and scalability." + required: + - "method" + - "project_id" + - "bucket_name" + - "credentials_json" + properties: + method: + type: "string" + enum: + - "GCS Staging" + default: "GCS Staging" + order: 0 + project_id: + title: "GCP Project ID" + type: "string" + description: "The name of the GCP project ID for your credentials." + examples: + - "my-project" + order: 1 + bucket_name: + title: "GCS Bucket Name" + type: "string" + description: "The name of the staging GCS bucket. Airbyte will write\ + \ files to this bucket and read them via
COPY
statements\ + \ on Snowflake." + examples: + - "airbyte-staging" + order: 2 + credentials_json: + title: "Google Application Credentials" + type: "string" + description: "The contents of the JSON key file that has read/write\ + \ permissions to the staging GCS bucket. You will separately need\ + \ to grant bucket access to your Snowflake GCP service account.\ + \ See the GCP docs for more information on how to generate a JSON key\ + \ for your service account." + airbyte_secret: true + multiline: true + order: 3 + supportsIncremental: true + supportsNormalization: true + supportsDBT: true + supported_destination_sync_modes: + - "overwrite" + - "append" + - "append_dedup" diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 3e91d0e60c8bb..4597a82ddecbc 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -131,14 +131,14 @@ - name: Facebook Marketing sourceDefinitionId: e7778cfc-e97c-4458-9ecb-b4f2bba8946c dockerRepository: airbyte/source-facebook-marketing - dockerImageTag: 0.2.21 + dockerImageTag: 0.2.22 documentationUrl: https://docs.airbyte.io/integrations/sources/facebook-marketing icon: facebook.svg sourceType: api - name: Facebook Pages sourceDefinitionId: 010eb12f-837b-4685-892d-0a39f76a98f5 dockerRepository: airbyte/source-facebook-pages - dockerImageTag: 0.1.2 + dockerImageTag: 0.1.3 documentationUrl: https://hub.docker.com/r/airbyte/source-facebook-pages icon: facebook.svg sourceType: api @@ -156,6 +156,12 @@ documentationUrl: https://docs.airbyte.io/integrations/sources/freshdesk icon: freshdesk.svg sourceType: api +- name: Freshsales + sourceDefinitionId: eca08d79-7b92-4065-b7f3-79c14836ebe7 + dockerRepository: airbyte/source-freshsales + dockerImageTag: 0.1.0 + documentationUrl: https://docs.airbyte.io/integrations/sources/freshsales + sourceType: api - name: Freshservice sourceDefinitionId: 9bb85338-ea95-4c93-b267-6be89125b267 dockerRepository: airbyte/source-freshservice @@ -192,7 +198,7 @@ - name: Google Directory sourceDefinitionId: d19ae824-e289-4b14-995a-0632eb46d246 dockerRepository: airbyte/source-google-directory - dockerImageTag: 0.1.5 + dockerImageTag: 0.1.8 documentationUrl: https://docs.airbyte.io/integrations/sources/google-directory sourceType: api - name: Google Search Console @@ -217,7 +223,7 @@ - name: Greenhouse sourceDefinitionId: 59f1e50a-331f-4f09-b3e8-2e8d4d355f44 dockerRepository: airbyte/source-greenhouse - dockerImageTag: 0.2.5 + dockerImageTag: 0.2.6 documentationUrl: https://docs.airbyte.io/integrations/sources/greenhouse icon: greenhouse.svg sourceType: api @@ -230,7 +236,7 @@ - name: Hubspot sourceDefinitionId: 36c891d9-4bd9-43ac-bad2-10e12756272c dockerRepository: airbyte/source-hubspot - dockerImageTag: 0.1.21 + dockerImageTag: 0.1.23 documentationUrl: https://docs.airbyte.io/integrations/sources/hubspot icon: hubspot.svg sourceType: api @@ -256,7 +262,7 @@ - name: Iterable sourceDefinitionId: 2e875208-0c0b-4ee4-9e92-1cb3156ea799 dockerRepository: airbyte/source-iterable - dockerImageTag: 0.1.9 + dockerImageTag: 0.1.11 documentationUrl: https://docs.airbyte.io/integrations/sources/iterable sourceType: api - name: Jira @@ -328,10 +334,16 @@ - name: Mixpanel sourceDefinitionId: 12928b32-bf0a-4f1e-964f-07e12e37153a dockerRepository: airbyte/source-mixpanel - dockerImageTag: 0.1.1 + dockerImageTag: 0.1.3 documentationUrl: https://docs.airbyte.io/integrations/sources/mixpanel icon: mixpanel.svg sourceType: api +- name: Monday + sourceDefinitionId: 80a54ea2-9959-4040-aac1-eee42423ec9b + dockerRepository: airbyte/source-monday + dockerImageTag: 0.1.0 + documentationUrl: https://docs.airbyte.io/integrations/sources/monday + sourceType: api - name: MongoDb sourceDefinitionId: b2e713cd-cc36-4c0a-b5bd-b47cb8a0561e dockerRepository: airbyte/source-mongodb-v2 @@ -342,14 +354,14 @@ - name: MySQL sourceDefinitionId: 435bb9a5-7887-4809-aa58-28c27df0d7ad dockerRepository: airbyte/source-mysql - dockerImageTag: 0.4.8 + dockerImageTag: 0.4.9 documentationUrl: https://docs.airbyte.io/integrations/sources/mysql icon: mysql.svg sourceType: database - name: Okta sourceDefinitionId: 1d4fdb25-64fc-4569-92da-fcdca79a8372 dockerRepository: airbyte/source-okta - dockerImageTag: 0.1.2 + dockerImageTag: 0.1.4 documentationUrl: https://docs.airbyte.io/integrations/sources/okta sourceType: api - name: OneSignal @@ -405,7 +417,7 @@ - name: Postgres sourceDefinitionId: decd338e-5647-4c0b-adf4-da0e75f5a750 dockerRepository: airbyte/source-postgres - dockerImageTag: 0.3.11 + dockerImageTag: 0.3.13 documentationUrl: https://docs.airbyte.io/integrations/sources/postgres icon: postgresql.svg sourceType: database @@ -424,7 +436,7 @@ - name: Recharge sourceDefinitionId: 45d2e135-2ede-49e1-939f-3e3ec357a65e dockerRepository: airbyte/source-recharge - dockerImageTag: 0.1.3 + dockerImageTag: 0.1.4 documentationUrl: https://docs.airbyte.io/integrations/sources/recharge sourceType: api - name: Recurly @@ -462,7 +474,7 @@ - name: Salesforce sourceDefinitionId: b117307c-14b6-41aa-9422-947e34922962 dockerRepository: airbyte/source-salesforce - dockerImageTag: 0.1.2 + dockerImageTag: 0.1.3 documentationUrl: https://docs.airbyte.io/integrations/sources/salesforce icon: salesforce.svg sourceType: api @@ -476,7 +488,7 @@ - name: Shopify sourceDefinitionId: 9da77001-af33-4bcd-be46-6252bf9342b9 dockerRepository: airbyte/source-shopify - dockerImageTag: 0.1.21 + dockerImageTag: 0.1.22 documentationUrl: https://docs.airbyte.io/integrations/sources/shopify sourceType: api - name: Short.io @@ -524,7 +536,7 @@ - name: Stripe sourceDefinitionId: e094cb9a-26de-4645-8761-65c0c425d1de dockerRepository: airbyte/source-stripe - dockerImageTag: 0.1.21 + dockerImageTag: 0.1.22 documentationUrl: https://docs.airbyte.io/integrations/sources/stripe icon: stripe.svg sourceType: api @@ -586,7 +598,7 @@ - name: Zendesk Support sourceDefinitionId: 79c1aa37-dae3-42ae-b333-d1c105477715 dockerRepository: airbyte/source-zendesk-support - dockerImageTag: 0.1.3 + dockerImageTag: 0.1.4 documentationUrl: https://docs.airbyte.io/integrations/sources/zendesk-support icon: zendesk.svg sourceType: api @@ -596,6 +608,11 @@ dockerImageTag: 0.1.2 documentationUrl: https://docs.airbyte.io/integrations/sources/zendesk-talk sourceType: api +- sourceDefinitionId: cdaf146a-9b75-49fd-9dd2-9d64a0bb4781 + name: Sentry + dockerRepository: airbyte/source-sentry + dockerImageTag: 0.1.0 + documentationUrl: https://docs.airbyte.io/integrations/sources/sentry - name: Zoom sourceDefinitionId: aea2fd0d-377d-465e-86c0-4fdc4f688e51 dockerRepository: airbyte/source-zoom-singer diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml new file mode 100644 index 0000000000000..e526ee27f5c3f --- /dev/null +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -0,0 +1,6016 @@ +# This file is generated by io.airbyte.config.specs.SeedConnectorSpecGenerator. +# Do NOT edit this file directly. See generator class for more details. +--- +- dockerImage: "airbyte/source-aws-cloudtrail:0.1.2" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/aws-cloudtrail" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Aws CloudTrail Spec" + type: "object" + required: + - "aws_key_id" + - "aws_secret_key" + - "aws_region_name" + - "start_date" + additionalProperties: true + properties: + aws_key_id: + type: "string" + description: "Specifies an AWS access key associated with an IAM user or\ + \ role." + airbyte_secret: true + aws_secret_key: + type: "string" + description: "Specifies the secret key associated with the access key. This\ + \ is essentially the 'password' for the access key." + airbyte_secret: true + aws_region_name: + type: "string" + description: "The default AWS Region to use, for example, us-west-1 or us-west-2.\ + \ When specifying a Region inline during client initialization, this property\ + \ is named region_name." + start_date: + type: "string" + description: "The date you would like to replicate data. Data in ClouTraid\ + \ is available for last 90 days only. Format: YYYY-MM-DD." + examples: + - "2021-01-01" + default: "1970-01-01" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-amazon-ads:0.1.2" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/amazon-ads" + connectionSpecification: + title: "Amazon Ads Spec" + type: "object" + properties: + client_id: + title: "Client Id" + description: "Oauth client id How to create your Login with Amazon" + name: "Client ID" + type: "string" + client_secret: + title: "Client Secret" + description: "Oauth client secret How to create your Login with Amazon" + name: "Client secret" + airbyte_secret: true + type: "string" + scope: + title: "Scope" + description: "By default its advertising::campaign_management, but customers\ + \ may need to set scope to cpc_advertising:campaign_management." + default: "advertising::campaign_management" + name: "Client scope" + examples: + - "cpc_advertising:campaign_management" + type: "string" + refresh_token: + title: "Refresh Token" + description: "Oauth 2.0 refresh_token, read details here" + name: "Oauth refresh token" + airbyte_secret: true + type: "string" + start_date: + title: "Start Date" + description: "Start date for collectiong reports, should not be more than\ + \ 60 days in past. In YYYY-MM-DD format" + name: "Start date" + examples: + - "2022-10-10" + - "2022-10-22" + type: "string" + region: + description: "Region to pull data from (EU/NA/FE/SANDBOX)" + default: "NA" + name: "Region" + title: "AmazonAdsRegion" + enum: + - "NA" + - "EU" + - "FE" + - "SANDBOX" + type: "string" + profiles: + title: "Profiles" + description: "profile Ids you want to fetch data for" + name: "Profile Ids" + type: "array" + items: + type: "integer" + required: + - "client_id" + - "client_secret" + - "refresh_token" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-amazon-seller-partner:0.2.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/amazon-seller-partner" + changelogUrl: "https://docs.airbyte.io/integrations/sources/amazon-seller-partner" + connectionSpecification: + title: "Amazon Seller Partner Spec" + type: "object" + properties: + replication_start_date: + title: "Replication Start Date" + description: "UTC date and time in the format 2017-01-25T00:00:00Z. Any\ + \ data before this date will not be replicated." + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + examples: + - "2017-01-25T00:00:00Z" + type: "string" + refresh_token: + title: "Refresh Token" + description: "The refresh token used obtained via authorization (can be\ + \ passed to the client instead)" + airbyte_secret: true + type: "string" + lwa_app_id: + title: "Lwa App Id" + description: "Your login with amazon app id" + airbyte_secret: true + type: "string" + lwa_client_secret: + title: "Lwa Client Secret" + description: "Your login with amazon client secret" + airbyte_secret: true + type: "string" + aws_access_key: + title: "Aws Access Key" + description: "AWS user access key" + airbyte_secret: true + type: "string" + aws_secret_key: + title: "Aws Secret Key" + description: "AWS user secret key" + airbyte_secret: true + type: "string" + role_arn: + title: "Role Arn" + description: "The role's arn (needs permission to 'Assume Role' STS)" + airbyte_secret: true + type: "string" + aws_environment: + title: "AWSEnvironment" + description: "An enumeration." + enum: + - "PRODUCTION" + - "SANDBOX" + type: "string" + region: + title: "AWSRegion" + description: "An enumeration." + enum: + - "AE" + - "DE" + - "PL" + - "EG" + - "ES" + - "FR" + - "IN" + - "IT" + - "NL" + - "SA" + - "SE" + - "TR" + - "UK" + - "AU" + - "JP" + - "SG" + - "US" + - "BR" + - "CA" + - "MX" + - "GB" + type: "string" + required: + - "replication_start_date" + - "refresh_token" + - "lwa_app_id" + - "lwa_client_secret" + - "aws_access_key" + - "aws_secret_key" + - "role_arn" + - "aws_environment" + - "region" + definitions: + AWSEnvironment: + title: "AWSEnvironment" + description: "An enumeration." + enum: + - "PRODUCTION" + - "SANDBOX" + type: "string" + AWSRegion: + title: "AWSRegion" + description: "An enumeration." + enum: + - "AE" + - "DE" + - "PL" + - "EG" + - "ES" + - "FR" + - "IN" + - "IT" + - "NL" + - "SA" + - "SE" + - "TR" + - "UK" + - "AU" + - "JP" + - "SG" + - "US" + - "BR" + - "CA" + - "MX" + - "GB" + type: "string" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-amplitude:0.1.3" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/amplitude" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Amplitude Spec" + type: "object" + required: + - "api_key" + - "secret_key" + - "start_date" + additionalProperties: false + properties: + api_key: + type: "string" + description: "This is the project’s API key, used for calling Amplitude’\ + s APIs" + airbyte_secret: true + secret_key: + type: "string" + description: "This is the project's secret key, which is also used for calling\ + \ Amplitude’s APIs" + airbyte_secret: true + start_date: + type: "string" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + description: "UTC date and time in the format 2021-01-25T00:00:00Z. Any\ + \ data before this date will not be replicated." + examples: + - "2021-01-25T00:00:00Z" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-apify-dataset:0.1.1" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/apify-dataset" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Apify Dataset Spec" + type: "object" + required: + - "datasetId" + additionalProperties: false + properties: + datasetId: + type: "string" + description: "ID of the dataset you would like to load to Airbyte." + clean: + type: "boolean" + description: "If set to true, only clean items will be downloaded from the\ + \ dataset. See description of what clean means in Apify API docs. If not sure, set clean to false." + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-appstore-singer:0.2.4" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/appstore" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Source Appstore Singer Spec" + type: "object" + required: + - "key_id" + - "private_key" + - "issuer_id" + - "vendor" + - "start_date" + additionalProperties: false + properties: + key_id: + type: "string" + description: "Key_id is the API key you use to connect to appstore's API." + private_key: + type: "string" + description: "Private_key is the contents of the key file you use to connect to appstore's API." + airbyte_secret: true + multiline: true + issuer_id: + type: "string" + description: "Issuer_id is used to generate the credentials to connect to appstore's\ + \ API." + vendor: + type: "string" + description: "This is the Apple ID of your account." + start_date: + type: "string" + description: "Date from which to start pulling data." + examples: + - "2020-11-16T00:00:00Z" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-asana:0.1.3" + spec: + documentationUrl: "https://docsurl.com" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Asana Spec" + type: "object" + additionalProperties: true + properties: + credentials: + title: "Authentication mechanism" + description: "Choose how to authenticate to Github" + type: "object" + oneOf: + - type: "object" + title: "Authenticate with Personal Access Token" + required: + - "personal_access_token" + properties: + option_title: + type: "string" + title: "Credentials title" + description: "PAT Credentials" + const: "PAT Credentials" + personal_access_token: + type: "string" + title: "Personal Access Token" + description: "Asana Personal Access Token (generate yours here)." + airbyte_secret: true + - type: "object" + title: "Authenticate via Asana (Oauth)" + required: + - "client_id" + - "client_secret" + - "refresh_token" + properties: + option_title: + type: "string" + title: "Credentials title" + description: "OAuth Credentials" + const: "OAuth Credentials" + client_id: + type: "string" + title: "" + description: "" + airbyte_secret: true + client_secret: + type: "string" + title: "" + description: "" + airbyte_secret: true + refresh_token: + type: "string" + title: "" + description: "" + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] + authSpecification: + auth_type: "oauth2.0" + oauth2Specification: + rootObject: + - "credentials" + - "1" + oauthFlowInitParameters: + - - "client_id" + - - "client_secret" + oauthFlowOutputParameters: + - - "refresh_token" +- dockerImage: "airbyte/source-bamboo-hr:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/bamboo-hr" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Bamboo HR Spec" + type: "object" + required: + - "subdomain" + - "api_key" + additionalProperties: false + properties: + subdomain: + type: "string" + description: "Sub Domain of bamboo hr" + api_key: + type: "string" + description: "Api key of bamboo hr" + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-bigcommerce:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/bigcommerce" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "BigCommerce Source CDK Specifications" + type: "object" + required: + - "start_date" + - "store_hash" + - "access_token" + additionalProperties: false + properties: + start_date: + type: "string" + description: "The date you would like to replicate data. Format: YYYY-MM-DD." + examples: + - "2021-01-01" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" + store_hash: + type: "string" + description: "The hash code of the store. For https://api.bigcommerce.com/stores/HASH_CODE/v3/,\ + \ The store's hash code is 'HASH_CODE'." + access_token: + type: "string" + description: "The API Access Token." + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-bigquery:0.1.4" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/source/bigquery" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "BigQuery Source Spec" + type: "object" + required: + - "project_id" + - "credentials_json" + additionalProperties: false + properties: + project_id: + type: "string" + description: "The GCP project ID for the project containing the target BigQuery\ + \ dataset." + title: "Project ID" + dataset_id: + type: "string" + description: "The BigQuery Dataset ID to look for tables to replicate from." + title: "Default Dataset ID" + credentials_json: + type: "string" + description: "The contents of the JSON service account key. Check out the\ + \ docs\ + \ if you need help generating this key." + title: "Credentials JSON" + airbyte_secret: true + supportsIncremental: true + supportsNormalization: true + supportsDBT: true + supported_destination_sync_modes: [] + supported_sync_modes: + - "overwrite" + - "append" + - "append_dedup" +- dockerImage: "airbyte/source-bing-ads:0.1.1" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/bing-ads" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Bing Ads Spec" + type: "object" + required: + - "accounts" + - "client_id" + - "client_secret" + - "customer_id" + - "developer_token" + - "refresh_token" + - "user_id" + - "reports_start_date" + - "hourly_reports" + - "daily_reports" + - "weekly_reports" + - "monthly_reports" + additionalProperties: false + properties: + accounts: + title: "Accounts" + type: "object" + description: "Account selection strategy." + oneOf: + - title: "All accounts assigned to your user" + additionalProperties: false + description: "Fetch data for all available accounts." + required: + - "selection_strategy" + properties: + selection_strategy: + type: "string" + enum: + - "all" + const: "all" + - title: "Subset of your accounts" + additionalProperties: false + description: "Fetch data for subset of account ids." + required: + - "ids" + - "selection_strategy" + properties: + selection_strategy: + type: "string" + enum: + - "subset" + const: "subset" + ids: + type: "array" + description: "List of accounts from which data will be fetched." + items: + type: "string" + minItems: 1 + uniqueItems: true + client_id: + type: "string" + description: "ID of your Microsoft Advertising client application." + airbyte_secret: true + client_secret: + type: "string" + description: "Secret of your Microsoft Advertising client application." + airbyte_secret: true + customer_id: + type: "string" + description: "User's customer ID." + developer_token: + type: "string" + description: "Developer token associated with user." + airbyte_secret: true + refresh_token: + type: "string" + description: "The long-lived Refresh token received via grant_type=refresh_token\ + \ request." + airbyte_secret: true + user_id: + type: "string" + description: "Unique user identifier." + reports_start_date: + type: "string" + format: "date" + default: "2020-01-01" + description: "From which date perform initial sync for report related streams.\ + \ In YYYY-MM-DD format" + hourly_reports: + title: "Hourly reports" + type: "boolean" + description: "The report data will be aggregated by each hour of the day." + default: false + daily_reports: + title: "Daily reports" + type: "boolean" + description: "The report data will be aggregated by each day." + default: false + weekly_reports: + title: "Weekly reports" + type: "boolean" + description: "The report data will be aggregated by each week running from\ + \ Sunday through Saturday." + default: false + monthly_reports: + title: "Monthly reports" + type: "boolean" + description: "The report data will be aggregated by each month." + default: false + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-braintree:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/braintree" + connectionSpecification: + title: "Braintree Spec" + type: "object" + properties: + merchant_id: + title: "Merchant Id" + description: "Merchant ID is the unique identifier for entire gateway account." + name: "Merchant ID" + type: "string" + public_key: + title: "Public Key" + description: "This is your user-specific public identifier for Braintree." + name: "Public key" + type: "string" + private_key: + title: "Private Key" + description: "This is your user-specific private identifier." + name: "Private Key" + airbyte_secret: true + type: "string" + start_date: + title: "Start Date" + description: "The date from which you'd like to replicate data for Braintree\ + \ API for UTC timezone, All data generated after this date will be replicated." + name: "Start date" + examples: + - "2020" + - "2020-12-30" + - "2020-11-22 20:20:05" + type: "string" + format: "date-time" + environment: + description: "Environment specifies where the data will come from." + name: "Environment" + examples: + - "sandbox" + - "production" + - "qa" + - "development" + allOf: + - $ref: "#/definitions/Environment" + required: + - "merchant_id" + - "public_key" + - "private_key" + - "environment" + definitions: + Environment: + title: "Environment" + description: "An enumeration." + enum: + - "Development" + - "Sandbox" + - "Qa" + - "Production" + type: "string" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-cart:0.1.3" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/cart" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Cart Spec" + type: "object" + required: + - "access_token" + - "start_date" + - "store_name" + additionalProperties: true + properties: + access_token: + type: "string" + airbyte_secret: true + description: "API Key. See the docs for information on how to generate this key." + store_name: + type: "string" + description: "Store name. All API URLs start with https://[mystorename.com]/api/v1/,\ + \ where [mystorename.com] is the domain name of your store." + start_date: + title: "Start Date" + type: "string" + description: "The date from which you'd like to replicate the data" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + examples: + - "2021-01-01T00:00:00Z" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-chargebee:0.1.4" + spec: + documentationUrl: "https://apidocs.chargebee.com/docs/api" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Chargebee Spec" + type: "object" + required: + - "site" + - "site_api_key" + - "start_date" + - "product_catalog" + additionalProperties: false + properties: + site: + type: "string" + title: "Site" + description: "The site prefix for your Chargebee instance." + examples: + - "airbyte-test" + site_api_key: + type: "string" + title: "API Key" + description: "The API key from your Chargebee instance." + examples: + - "test_3yzfanAXF66USdWC9wQcM555DQJkSYoppu" + airbyte_secret: true + start_date: + type: "string" + title: "Start Date" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + description: "UTC date and time in the format 2021-01-25T00:00:00Z. Any\ + \ data before this date will not be replicated." + examples: + - "2021-01-25T00:00:00Z" + product_catalog: + title: "Product Catalog" + type: "string" + description: "Product Catalog version of your Chargebee site. Instructions\ + \ on how to find your version you may find here under `API Version` section." + enum: + - "1.0" + - "2.0" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-clickhouse:0.1.4" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/clickhouse" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "ClickHouse Source Spec" + type: "object" + required: + - "host" + - "port" + - "database" + - "username" + additionalProperties: false + properties: + host: + description: "Host Endpoint of the Clickhouse Cluster" + type: "string" + port: + description: "Port of the database." + type: "integer" + minimum: 0 + maximum: 65536 + default: 8123 + examples: + - "8123" + database: + description: "Name of the database." + type: "string" + examples: + - "default" + username: + description: "Username to use to access the database." + type: "string" + password: + description: "Password associated with the username." + type: "string" + airbyte_secret: true + ssl: + title: "SSL Connection" + description: "Encrypt data using SSL." + type: "boolean" + default: true + tunnel_method: + type: "object" + title: "SSH Tunnel Method" + description: "Whether to initiate an SSH tunnel before connecting to the\ + \ database, and if so, which kind of authentication to use." + oneOf: + - title: "No Tunnel" + required: + - "tunnel_method" + properties: + tunnel_method: + description: "No ssh tunnel needed to connect to database" + type: "string" + const: "NO_TUNNEL" + order: 0 + - title: "SSH Key Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "ssh_key" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and ssh key" + type: "string" + const: "SSH_KEY_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host." + type: "string" + order: 3 + ssh_key: + title: "SSH Private Key" + description: "OS-level user account ssh key credentials in RSA PEM\ + \ format ( created with ssh-keygen -t rsa -m PEM -f myuser_rsa )" + type: "string" + airbyte_secret: true + multiline: true + order: 4 + - title: "Password Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "tunnel_user_password" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and password authentication" + type: "string" + const: "SSH_PASSWORD_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host" + type: "string" + order: 3 + tunnel_user_password: + title: "Password" + description: "OS-level password for logging into the jump server host" + type: "string" + airbyte_secret: true + order: 4 + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-close-com:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/close-com" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Close.com Spec" + type: "object" + required: + - "api_key" + additionalProperties: false + properties: + api_key: + type: "string" + description: "Close.com API key (usually starts with 'api_'; find yours\ + \ here)." + airbyte_secret: true + start_date: + type: "string" + description: "The start date to sync data. Leave blank for full sync. Format:\ + \ YYYY-MM-DD." + examples: + - "2021-01-01" + default: "2021-01-01" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-cockroachdb:0.1.2" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/postgres" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Cockroach Source Spec" + type: "object" + required: + - "host" + - "port" + - "database" + - "username" + additionalProperties: false + properties: + host: + title: "Host" + description: "Hostname of the database." + type: "string" + order: 0 + port: + title: "Port" + description: "Port of the database." + type: "integer" + minimum: 0 + maximum: 65536 + default: 5432 + examples: + - "5432" + order: 1 + database: + title: "DB Name" + description: "Name of the database." + type: "string" + order: 2 + username: + title: "User" + description: "Username to use to access the database." + type: "string" + order: 3 + password: + title: "Password" + description: "Password associated with the username." + type: "string" + airbyte_secret: true + order: 4 + ssl: + title: "Connect using SSL" + description: "Encrypt client/server communications for increased security." + type: "boolean" + default: false + order: 5 + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-delighted:0.1.0" + spec: + documentationUrl: "https://docsurl.com" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Delighted Spec" + type: "object" + required: + - "since" + - "api_key" + additionalProperties: false + properties: + since: + type: "integer" + description: "An Unix timestamp to retrieve records created on or after\ + \ this time." + examples: + - 1625328167 + api_key: + type: "string" + description: "A Delighted API key." + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-dixa:0.1.1" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/dixa" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Dixa Spec" + type: "object" + required: + - "api_token" + - "start_date" + additionalProperties: false + properties: + api_token: + type: "string" + description: "Dixa API token" + airbyte_secret: true + start_date: + type: "string" + description: "The connector pulls records updated from this date onwards." + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" + examples: + - "YYYY-MM-DD" + batch_size: + type: "integer" + description: "Number of days to batch into one request. Max 31." + pattern: "^[0-9]{1,2}$" + examples: + - 1 + - 31 + default: 31 + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-drift:0.2.3" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/drift" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Drift Spec" + type: "object" + required: + - "access_token" + additionalProperties: false + properties: + access_token: + type: "string" + description: "Drift Access Token. See the docs for more information on how to generate this key." + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-exchange-rates:0.2.3" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/exchangeratesapi" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "ratesapi.io Source Spec" + type: "object" + required: + - "start_date" + - "access_key" + additionalProperties: false + properties: + start_date: + type: "string" + description: "Start getting data from that date." + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" + examples: + - "YYYY-MM-DD" + access_key: + type: "string" + description: "Your API Access Key. See here. The key is case sensitive." + airbyte_secret: true + base: + type: "string" + description: "ISO reference currency. See here. Free plan doesn't support Source Currency Switching, default\ + \ base currency is EUR" + examples: + - "EUR" + - "USD" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-facebook-marketing:0.2.22" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/facebook-marketing" + changelogUrl: "https://docs.airbyte.io/integrations/sources/facebook-marketing" + connectionSpecification: + title: "Source Facebook Marketing" + type: "object" + properties: + account_id: + title: "Account Id" + description: "The Facebook Ad account ID to use when pulling data from the\ + \ Facebook Marketing API." + type: "string" + access_token: + title: "Access Token" + description: "The value of the access token generated. See the docs\ + \ for more information" + airbyte_secret: true + type: "string" + start_date: + title: "Start Date" + description: "The date from which you'd like to replicate data for AdCreatives\ + \ and AdInsights APIs, in the format YYYY-MM-DDT00:00:00Z. All data generated\ + \ after this date will be replicated." + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + examples: + - "2017-01-25T00:00:00Z" + type: "string" + format: "date-time" + end_date: + title: "End Date" + description: "The date until which you'd like to replicate data for AdCreatives\ + \ and AdInsights APIs, in the format YYYY-MM-DDT00:00:00Z. All data generated\ + \ between start_date and this date will be replicated. Not setting this\ + \ option will result in always syncing the latest data." + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + examples: + - "2017-01-26T00:00:00Z" + type: "string" + format: "date-time" + include_deleted: + title: "Include Deleted" + description: "Include data from deleted campaigns, ads, and adsets." + default: false + type: "boolean" + insights_lookback_window: + title: "Insights Lookback Window" + description: "The attribution window for the actions" + default: 28 + minimum: 0 + maximum: 28 + type: "integer" + insights_days_per_job: + title: "Insights Days Per Job" + description: "Number of days to sync in one job. The more data you have\ + \ - the smaller you want this parameter to be." + default: 7 + minimum: 1 + maximum: 30 + type: "integer" + custom_insights: + title: "Custom Insights" + description: "A list wich contains insights entries, each entry must have\ + \ a name and can contains fields, breakdowns or action_breakdowns)" + type: "array" + items: + title: "InsightConfig" + type: "object" + properties: + name: + title: "Name" + description: "The name value of insight" + type: "string" + fields: + title: "Fields" + description: "A list of chosen fields for fields parameter" + default: [] + type: "array" + items: + type: "string" + breakdowns: + title: "Breakdowns" + description: "A list of chosen breakdowns for breakdowns" + default: [] + type: "array" + items: + type: "string" + action_breakdowns: + title: "Action Breakdowns" + description: "A list of chosen action_breakdowns for action_breakdowns" + default: [] + type: "array" + items: + type: "string" + required: + - "name" + required: + - "account_id" + - "access_token" + - "start_date" + definitions: + InsightConfig: + title: "InsightConfig" + type: "object" + properties: + name: + title: "Name" + description: "The name value of insight" + type: "string" + fields: + title: "Fields" + description: "A list of chosen fields for fields parameter" + default: [] + type: "array" + items: + type: "string" + breakdowns: + title: "Breakdowns" + description: "A list of chosen breakdowns for breakdowns" + default: [] + type: "array" + items: + type: "string" + action_breakdowns: + title: "Action Breakdowns" + description: "A list of chosen action_breakdowns for action_breakdowns" + default: [] + type: "array" + items: + type: "string" + required: + - "name" + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "append" + authSpecification: + auth_type: "oauth2.0" + oauth2Specification: + rootObject: [] + oauthFlowInitParameters: [] + oauthFlowOutputParameters: + - - "access_token" +- dockerImage: "airbyte/source-facebook-pages:0.1.3" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/facebook-pages" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Facebook Pages Spec" + type: "object" + required: + - "access_token" + - "page_id" + additionalProperties: true + properties: + access_token: + type: "string" + title: "Page Access Token" + description: "Facebook Page Access Token" + airbyte_secret: true + page_id: + type: "string" + title: "Page ID" + description: "Page ID" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] + authSpecification: + auth_type: "oauth2.0" + oauth2Specification: + rootObject: [] + oauthFlowInitParameters: [] + oauthFlowOutputParameters: + - - "access_token" +- dockerImage: "airbyte/source-file:0.2.6" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/file" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "File Source Spec" + type: "object" + additionalProperties: false + required: + - "dataset_name" + - "format" + - "url" + - "provider" + properties: + dataset_name: + type: "string" + description: "Name of the final table where to replicate this file (should\ + \ include only letters, numbers dash and underscores)" + format: + type: "string" + enum: + - "csv" + - "json" + - "jsonl" + - "excel" + - "feather" + - "parquet" + default: "csv" + description: "File Format of the file to be replicated (Warning: some format\ + \ may be experimental, please refer to docs)." + reader_options: + type: "string" + description: "This should be a valid JSON string used by each reader/parser\ + \ to provide additional options and tune its behavior" + examples: + - "{}" + - "{'sep': ' '}" + url: + type: "string" + description: "URL path to access the file to be replicated" + provider: + type: "object" + description: "Storage Provider or Location of the file(s) to be replicated." + default: "Public Web" + oneOf: + - title: "HTTPS: Public Web" + required: + - "storage" + properties: + storage: + type: "string" + enum: + - "HTTPS" + default: "HTTPS" + - title: "GCS: Google Cloud Storage" + required: + - "storage" + properties: + storage: + type: "string" + enum: + - "GCS" + default: "GCS" + service_account_json: + type: "string" + description: "In order to access private Buckets stored on Google\ + \ Cloud, this connector would need a service account json credentials\ + \ with the proper permissions as described here. Please generate the credentials.json\ + \ file and copy/paste its content to this field (expecting JSON\ + \ formats). If accessing publicly available data, this field is\ + \ not necessary." + - title: "S3: Amazon Web Services" + required: + - "storage" + properties: + storage: + type: "string" + enum: + - "S3" + default: "S3" + aws_access_key_id: + type: "string" + description: "In order to access private Buckets stored on AWS S3,\ + \ this connector would need credentials with the proper permissions.\ + \ If accessing publicly available data, this field is not necessary." + aws_secret_access_key: + type: "string" + description: "In order to access private Buckets stored on AWS S3,\ + \ this connector would need credentials with the proper permissions.\ + \ If accessing publicly available data, this field is not necessary." + airbyte_secret: true + - title: "AzBlob: Azure Blob Storage" + required: + - "storage" + - "storage_account" + properties: + storage: + type: "string" + enum: + - "AzBlob" + default: "AzBlob" + storage_account: + type: "string" + description: "The globally unique name of the storage account that\ + \ the desired blob sits within. See here for more details." + sas_token: + type: "string" + description: "To access Azure Blob Storage, this connector would need\ + \ credentials with the proper permissions. One option is a SAS (Shared\ + \ Access Signature) token. If accessing publicly available data,\ + \ this field is not necessary." + airbyte_secret: true + shared_key: + type: "string" + description: "To access Azure Blob Storage, this connector would need\ + \ credentials with the proper permissions. One option is a storage\ + \ account shared key (aka account key or access key). If accessing\ + \ publicly available data, this field is not necessary." + airbyte_secret: true + - title: "SSH: Secure Shell" + required: + - "storage" + - "user" + - "host" + properties: + storage: + type: "string" + enum: + - "SSH" + default: "SSH" + user: + type: "string" + password: + type: "string" + airbyte_secret: true + host: + type: "string" + port: + type: "string" + default: "22" + - title: "SCP: Secure copy protocol" + required: + - "storage" + - "user" + - "host" + properties: + storage: + type: "string" + enum: + - "SCP" + default: "SCP" + user: + type: "string" + password: + type: "string" + airbyte_secret: true + host: + type: "string" + port: + type: "string" + default: "22" + - title: "SFTP: Secure File Transfer Protocol" + required: + - "storage" + - "user" + - "host" + properties: + storage: + type: "string" + enum: + - "SFTP" + default: "SFTP" + user: + type: "string" + password: + type: "string" + airbyte_secret: true + host: + type: "string" + port: + type: "string" + default: "22" + - title: "Local Filesystem (limited)" + required: + - "storage" + properties: + storage: + type: "string" + description: "WARNING: Note that local storage URL available for read\ + \ must start with the local mount \"/local/\" at the moment until\ + \ we implement more advanced docker mounting options..." + enum: + - "local" + default: "local" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-freshdesk:0.2.7" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/freshdesk" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Freshdesk Spec" + type: "object" + required: + - "domain" + - "api_key" + additionalProperties: false + properties: + domain: + type: "string" + description: "Freshdesk domain" + examples: + - "myaccount.freshdesk.com" + pattern: + - "^[a-zA-Z0-9._-]*\\.freshdesk\\.com$" + api_key: + type: "string" + description: "Freshdesk API Key. See the docs for more information on how to obtain this key." + airbyte_secret: true + requests_per_minute: + title: "Requests per minute" + type: "integer" + description: "Number of requests per minute that this source allowed to\ + \ use." + start_date: + title: "Start date" + description: "Date from which to start pulling data." + format: "date-time" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + examples: + - "2020-12-01T00:00:00Z" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-freshsales:0.1.0" + spec: + documentationUrl: "https://docsurl.com" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Freshsales Spec" + type: "object" + required: + - "domain_name" + - "api_key" + additionalProperties: false + properties: + domain_name: + type: "string" + description: "Freshsales domain" + examples: + - "mydomain.myfreshworks.com" + api_key: + type: "string" + description: "Your API Access Key. See here. The key is case sensitive." + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-freshservice:0.1.0" + spec: + documentationUrl: "https://hub.docker.com/r/airbyte/source-freshservice" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Freshservice Spec" + type: "object" + required: + - "domain_name" + - "api_key" + - "start_date" + additionalProperties: false + properties: + domain_name: + type: "string" + description: "Freshservice domain" + examples: + - "mydomain.freshservice.com" + api_key: + title: "Api Key" + type: "string" + description: "Your API Access Key. See here. The key is case sensitive." + airbyte_secret: true + start_date: + title: "Replication Start Date" + type: "string" + description: "UTC date and time in the format 2020-10-01T00:00:00Z. Any\ + \ data before this date will not be replicated." + examples: + - "2020-10-01T00:00:00Z" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-github:0.2.3" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/github" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Github Source Spec" + type: "object" + required: + - "start_date" + - "repository" + additionalProperties: true + properties: + credentials: + title: "Authentication mechanism" + description: "Choose how to authenticate to Github" + type: "object" + oneOf: + - type: "object" + title: "Authenticate via Github (Oauth)" + required: + - "access_token" + properties: + option_title: + type: "string" + title: "Credentials title" + description: "OAuth Credentials" + const: "OAuth Credentials" + access_token: + type: "string" + title: "Access Token" + description: "Oauth access token" + airbyte_secret: true + - type: "object" + title: "Authenticate with Personal Access Token" + required: + - "personal_access_token" + properties: + option_title: + type: "string" + title: "Credentials title" + description: "PAT Credentials" + const: "PAT Credentials" + personal_access_token: + type: "string" + title: "Personal Access Tokens" + description: "Log into Github and then generate a personal access token. To load balance your API quota consumption\ + \ across multiple API tokens, input multiple tokens separated with\ + \ \",\"" + airbyte_secret: true + repository: + type: "string" + examples: + - "airbytehq/airbyte" + - "airbytehq/*" + title: "Github repositories" + description: "Space-delimited list of GitHub repositories/organizations,\ + \ e.g. `airbytehq/airbyte` for single repository and `airbytehq/*` for\ + \ get all repositories from organization" + start_date: + type: "string" + title: "Start date" + description: "The date from which you'd like to replicate data for GitHub\ + \ in the format YYYY-MM-DDT00:00:00Z. All data generated after this date\ + \ will be replicated. Note that it will be used only in the following\ + \ incremental streams: comments, commits and issues." + examples: + - "2021-03-01T00:00:00Z" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + branch: + type: "string" + title: "Branch" + examples: + - "airbytehq/airbyte/master" + description: "Space-delimited list of GitHub repository branches to pull\ + \ commits for, e.g. `airbytehq/airbyte/master`. If no branches are specified\ + \ for a repository, the default branch will be pulled." + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] + authSpecification: + auth_type: "oauth2.0" + oauth2Specification: + rootObject: + - "credentials" + - "0" + oauthFlowInitParameters: [] + oauthFlowOutputParameters: + - - "access_token" +- dockerImage: "airbyte/source-gitlab:0.1.2" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/gitlab" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Source Gitlab Singer Spec" + type: "object" + required: + - "api_url" + - "private_token" + - "start_date" + additionalProperties: false + properties: + api_url: + type: "string" + examples: + - "gitlab.com" + description: "Please enter your basic URL from Gitlab instance" + private_token: + type: "string" + description: "Log into your Gitlab account and then generate a personal\ + \ Access Token." + airbyte_secret: true + groups: + type: "string" + examples: + - "airbyte.io" + description: "Space-delimited list of groups. e.g. airbyte.io" + projects: + type: "string" + examples: + - "airbyte.io/documentation" + description: "Space-delimited list of projects. e.g. airbyte.io/documentation\ + \ meltano/tap-gitlab" + start_date: + type: "string" + description: "The date from which you'd like to replicate data for Gitlab\ + \ API, in the format YYYY-MM-DDT00:00:00Z. All data generated after this\ + \ date will be replicated." + examples: + - "2021-03-01T00:00:00Z" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-google-ads:0.1.15" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/google-ads" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Google Ads Spec" + type: "object" + required: + - "credentials" + - "start_date" + - "customer_id" + additionalProperties: true + properties: + credentials: + type: "object" + title: "Google Credentials" + required: + - "developer_token" + - "client_id" + - "client_secret" + - "refresh_token" + properties: + developer_token: + type: "string" + title: "Developer Token" + description: "Developer token granted by Google to use their APIs. More\ + \ instruction on how to find this value in our docs" + airbyte_secret: true + client_id: + type: "string" + title: "Client Id" + description: "Google client id. More instruction on how to find this\ + \ value in our docs" + client_secret: + type: "string" + title: "Client Secret" + description: "Google client secret. More instruction on how to find\ + \ this value in our docs" + airbyte_secret: true + access_token: + type: "string" + title: "Access Token" + description: "Access token generated using developer_token, oauth_client_id,\ + \ and oauth_client_secret. More instruction on how to find this value\ + \ in our docs" + airbyte_secret: true + refresh_token: + type: "string" + title: "Refresh Token" + description: "Refresh token generated using developer_token, oauth_client_id,\ + \ and oauth_client_secret. More instruction on how to find this value\ + \ in our docs" + airbyte_secret: true + customer_id: + title: "Customer Id" + type: "string" + description: "Customer id must be specified as a 10-digit number without\ + \ dashes. More instruction on how to find this value in our docs" + login_customer_id: + type: "string" + title: "Login Customer ID" + description: "If your access to the customer account is through a manager\ + \ account, this field is required and must be set to the customer ID of\ + \ the manager account (10-digit number without dashes). More information\ + \ about this field you can see here" + start_date: + type: "string" + title: "Start Date" + description: "UTC date and time in the format 2017-01-25. Any data before\ + \ this date will not be replicated." + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" + examples: + - "2017-01-25" + conversion_window_days: + title: "Conversion Window" + type: "integer" + description: "Define the historical replication lookback window in days" + minimum: 0 + maximum: 1095 + default: 14 + examples: + - 14 + custom_queries: + type: "array" + title: "Custom GAQL Queries" + items: + type: "object" + properties: + query: + type: "string" + title: "Custom query" + description: "A custom defined GAQL query for building the report.\ + \ Should not contain segments.date expression as it used by incremental\ + \ streams" + examples: + - "SELECT segments.ad_destination_type, campaign.advertising_channel_sub_type\ + \ FROM campaign WHERE campaign.status = 'PAUSED'" + table_name: + type: "string" + title: "Destination table name" + description: "The table name in your destination database for choosen\ + \ query." + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] + authSpecification: + auth_type: "oauth2.0" + oauth2Specification: + rootObject: + - "credentials" + oauthFlowInitParameters: + - - "client_id" + - - "client_secret" + - - "developer_token" + oauthFlowOutputParameters: + - - "access_token" + - - "refresh_token" +- dockerImage: "airbyte/source-google-analytics-v4:0.1.9" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/google-analytics-v4" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Google Analytics V4 Spec" + type: "object" + required: + - "view_id" + - "start_date" + additionalProperties: true + properties: + view_id: + type: "string" + title: "View ID" + description: "The ID for the Google Analytics View you want to fetch data\ + \ from. This can be found from the Google Analytics Account Explorer." + airbyte_secret: true + start_date: + type: "string" + title: "Start Date" + description: "A date in the format YYYY-MM-DD." + examples: + - "2020-06-01" + window_in_days: + type: "integer" + description: "The amount of days for each data-chunk begining from start_date.\ + \ Bigger the value - faster the fetch. (Min=1, as for a Day; Max=364,\ + \ as for a Year)." + examples: + - 30 + - 60 + - 90 + - 120 + - 200 + - 364 + default: 90 + custom_reports: + title: "Custom Reports" + type: "string" + description: "A JSON array describing the custom reports you want to sync\ + \ from GA. Check out the docs to get more information about this field." + credentials: + type: "object" + oneOf: + - title: "Authenticate via Google (Oauth)" + type: "object" + required: + - "client_id" + - "client_secret" + - "refresh_token" + properties: + auth_type: + type: "string" + const: "Client" + enum: + - "Client" + default: "Client" + order: 0 + client_id: + title: "Client ID" + type: "string" + description: "The Client ID of your developer application" + airbyte_secret: true + client_secret: + title: "Client Secret" + type: "string" + description: "The client secret of your developer application" + airbyte_secret: true + refresh_token: + title: "Refresh Token" + type: "string" + description: "A refresh token generated using the above client ID\ + \ and secret" + airbyte_secret: true + access_token: + title: "Access Token" + type: "string" + description: "A access token generated using the above client ID,\ + \ secret and refresh_token" + airbyte_secret: true + - type: "object" + title: "Service Account Key Authentication" + required: + - "credentials_json" + properties: + auth_type: + type: "string" + const: "Service" + enum: + - "Service" + default: "Service" + order: 0 + credentials_json: + type: "string" + description: "The JSON key of the service account to use for authorization" + examples: + - "{ \"type\": \"service_account\", \"project_id\": YOUR_PROJECT_ID,\ + \ \"private_key_id\": YOUR_PRIVATE_KEY, ... }" + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] + authSpecification: + auth_type: "oauth2.0" + oauth2Specification: + rootObject: + - "credentials" + - "0" + oauthFlowInitParameters: + - - "client_id" + - - "client_secret" + oauthFlowOutputParameters: + - - "access_token" + - - "refresh_token" +- dockerImage: "airbyte/source-google-directory:0.1.8" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/google-directory" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Google Directory Spec" + type: "object" + required: [] + additionalProperties: true + properties: + credentials: + title: "Google Credentials" + description: "Google APIs use the OAuth 2.0 protocol for authentication\ + \ and authorization. The Source supports Web server application and Service accounts scenarios" + type: "object" + oneOf: + - title: "Sign in via Google (Oauth)" + description: "For these scenario user only needs to give permission to\ + \ read Google Directory data" + type: "object" + required: + - "client_id" + - "client_secret" + - "refresh_token" + properties: + credentials_title: + type: "string" + title: "Credentials title" + description: "Authentication scenario" + const: "Web server app" + enum: + - "Web server app" + default: "Web server app" + order: 0 + client_id: + title: "Client ID" + type: "string" + description: "The client ID of developer application" + airbyte_secret: true + client_secret: + title: "Client secret" + type: "string" + description: "The client secret of developer application" + airbyte_secret: true + refresh_token: + title: "Refresh Token" + type: "string" + description: "The token for obtaining new access token" + airbyte_secret: true + - title: "Service account Key" + description: "For these scenario user should obtain service account's\ + \ credentials from the Google API Console and provide delegated email" + type: "object" + required: + - "credentials_json" + - "email" + properties: + credentials_title: + type: "string" + title: "Credentials title" + description: "Authentication scenario" + const: "Service accounts" + enum: + - "Service accounts" + default: "Service accounts" + order: 0 + credentials_json: + type: "string" + title: "Credentials JSON" + description: "The contents of the JSON service account key. See the\ + \ docs for more information on how to generate this key." + airbyte_secret: true + email: + type: "string" + title: "Email" + description: "The email of the user, which has permissions to access\ + \ the Google Workspace Admin APIs." + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] + authSpecification: + auth_type: "oauth2.0" + oauth2Specification: + rootObject: + - "credentials" + - "0" + oauthFlowInitParameters: + - - "client_id" + - - "client_secret" + oauthFlowOutputParameters: + - - "refresh_token" +- dockerImage: "airbyte/source-google-search-console:0.1.6" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/google-search-console" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Google Search Console Spec" + type: "object" + additionalProperties: false + required: + - "site_urls" + - "start_date" + - "authorization" + properties: + site_urls: + type: "array" + items: + type: "string" + description: "Website URLs property; do not include the domain-level property\ + \ in the list" + examples: + - "https://example1.com" + - "https://example2.com" + start_date: + type: "string" + description: "The date from which you'd like to replicate data in the format\ + \ YYYY-MM-DD." + examples: + - "2021-01-01" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" + end_date: + type: "string" + description: "The date from which you'd like to replicate data in the format\ + \ YYYY-MM-DD. Must be greater or equal start_date field" + examples: + - "2021-12-12" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" + authorization: + type: "object" + title: "Authentication Type" + oneOf: + - title: "Authenticate via Google (Oauth)" + type: "object" + required: + - "auth_type" + - "client_id" + - "client_secret" + - "refresh_token" + properties: + auth_type: + type: "string" + const: "Client" + enum: + - "Client" + default: "Client" + order: 0 + client_id: + title: "Client ID" + type: "string" + description: "The Client ID of your developer application" + airbyte_secret: true + client_secret: + title: "Client Secret" + type: "string" + description: "The client secret of your developer application" + airbyte_secret: true + access_token: + title: "Access Token" + type: "string" + description: "An access token generated using the above client ID\ + \ and secret" + airbyte_secret: true + refresh_token: + title: "Refresh Token" + type: "string" + description: "A refresh token generated using the above client ID\ + \ and secret" + airbyte_secret: true + - type: "object" + title: "Service Account Key Authentication" + required: + - "auth_type" + - "service_account_info" + - "email" + properties: + auth_type: + type: "string" + const: "Service" + enum: + - "Service" + default: "Service" + order: 0 + service_account_info: + title: "Service Account JSON Key" + type: "string" + description: "The JSON key of the service account to use for authorization" + examples: + - "{ \"type\": \"service_account\", \"project_id\": YOUR_PROJECT_ID,\ + \ \"private_key_id\": YOUR_PRIVATE_KEY, ... }" + email: + title: "Admin Email" + type: "string" + description: "The email of the user which has permissions to access\ + \ the Google Workspace Admin APIs." + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] + authSpecification: + auth_type: "oauth2.0" + oauth2Specification: + rootObject: + - "authorization" + - "0" + oauthFlowInitParameters: + - - "client_id" + - - "client_secret" + oauthFlowOutputParameters: + - - "access_token" + - - "refresh_token" +- dockerImage: "airbyte/source-google-sheets:0.2.6" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/google-sheets" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Stripe Source Spec" + type: "object" + required: + - "spreadsheet_id" + additionalProperties: true + properties: + spreadsheet_id: + type: "string" + description: "The ID of the spreadsheet to be replicated." + credentials: + type: "object" + oneOf: + - title: "Authenticate via Google (Oauth)" + type: "object" + required: + - "auth_type" + - "client_id" + - "client_secret" + - "refresh_token" + properties: + auth_type: + type: "string" + const: "Client" + client_id: + title: "Client ID" + type: "string" + description: "The Client ID of your developer application" + airbyte_secret: true + client_secret: + title: "Client Secret" + type: "string" + description: "The client secret of your developer application" + airbyte_secret: true + refresh_token: + title: "Refresh Token" + type: "string" + description: "A refresh token generated using the above client ID\ + \ and secret" + airbyte_secret: true + - title: "Service Account Key Authentication" + type: "object" + required: + - "auth_type" + - "service_account_info" + properties: + auth_type: + type: "string" + const: "Service" + service_account_info: + type: "string" + description: "The JSON key of the service account to use for authorization" + examples: + - "{ \"type\": \"service_account\", \"project_id\": YOUR_PROJECT_ID,\ + \ \"private_key_id\": YOUR_PRIVATE_KEY, ... }" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] + authSpecification: + auth_type: "oauth2.0" + oauth2Specification: + rootObject: + - "credentials" + - 0 + oauthFlowInitParameters: + - - "client_id" + - - "client_secret" + oauthFlowOutputParameters: + - - "refresh_token" +- dockerImage: "airbyte/source-google-workspace-admin-reports:0.1.5" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/google-workspace-admin-reports" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Google Directory Spec" + type: "object" + required: + - "credentials_json" + - "email" + additionalProperties: false + properties: + credentials_json: + type: "string" + description: "The contents of the JSON service account key. See the docs for more information on how to generate this key." + airbyte_secret: true + email: + type: "string" + description: "The email of the user, which has permissions to access the\ + \ Google Workspace Admin APIs." + lookback: + type: "integer" + minimum: 0 + maximum: 180 + description: "Sets the range of time shown in the report. Reports API allows\ + \ from up to 180 days ago. " + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-greenhouse:0.2.5" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/greenhouse" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Greenhouse Spec" + type: "object" + required: + - "api_key" + additionalProperties: false + properties: + api_key: + type: "string" + description: "Greenhouse API Key. See the docs for more information on how to generate this key." + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-harvest:0.1.5" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/harvest" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Harvest Spec" + type: "object" + required: + - "api_token" + - "account_id" + - "replication_start_date" + additionalProperties: false + properties: + api_token: + title: "API Token" + description: "Harvest API Token." + airbyte_secret: true + type: "string" + account_id: + title: "Account ID" + description: "Harvest account ID. Required for all Harvest requests in pair\ + \ with API Key" + airbyte_secret: true + type: "string" + replication_start_date: + title: "Replication Start Date" + description: "UTC date and time in the format 2017-01-25T00:00:00Z. Any\ + \ data before this date will not be replicated." + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + examples: + - "2017-01-25T00:00:00Z" + type: "string" + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "append" +- dockerImage: "airbyte/source-hubspot:0.1.22" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/hubspot" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Hubspot Source Spec" + type: "object" + required: + - "start_date" + - "credentials" + additionalProperties: false + properties: + start_date: + type: "string" + title: "Replication start date" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + description: "UTC date and time in the format 2017-01-25T00:00:00Z. Any\ + \ data before this date will not be replicated." + examples: + - "2017-01-25T00:00:00Z" + credentials: + title: "Authentication mechanism" + description: "Choose either to provide the API key or the OAuth2.0 credentials" + type: "object" + oneOf: + - type: "object" + title: "Authenticate via Hubspot (Oauth)" + required: + - "redirect_uri" + - "client_id" + - "client_secret" + - "refresh_token" + - "access_token" + - "credentials_title" + properties: + credentials_title: + type: "string" + title: "Credentials title" + description: "Name of the credentials set" + const: "OAuth Credentials" + enum: + - "OAuth Credentials" + default: "OAuth Credentials" + order: 0 + client_id: + title: "Client ID" + description: "Hubspot client_id. See our docs if you need help finding this id." + type: "string" + examples: + - "123456789000" + client_secret: + title: "Client Secret" + description: "Hubspot client_secret. See our docs if you need help finding this secret." + type: "string" + examples: + - "secret" + airbyte_secret: true + refresh_token: + title: "Refresh token" + description: "Hubspot refresh_token. See our docs if you need help generating the token." + type: "string" + examples: + - "refresh_token" + airbyte_secret: true + - type: "object" + title: "API key" + required: + - "api_key" + - "credentials_title" + properties: + credentials_title: + type: "string" + title: "Credentials title" + description: "Name of the credentials set" + const: "API Key Credentials" + enum: + - "API Key Credentials" + default: "API Key Credentials" + order: 0 + api_key: + title: "API key" + description: "Hubspot API Key. See our docs if you need help finding this key." + type: "string" + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] + authSpecification: + auth_type: "oauth2.0" + oauth2Specification: + rootObject: + - "credentials" + - "0" + oauthFlowInitParameters: + - - "client_id" + - - "client_secret" + - - "refresh_token" + oauthFlowOutputParameters: + - - "refresh_token" +- dockerImage: "airbyte/source-db2:0.1.1" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/db2" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "IBM Db2 Source Spec" + type: "object" + required: + - "host" + - "port" + - "db" + - "username" + - "password" + additionalProperties: false + properties: + host: + description: "Host of the Db2." + type: "string" + port: + description: "Port of the database." + type: "integer" + minimum: 0 + maximum: 65536 + default: 8123 + examples: + - "8123" + db: + description: "Name of the database." + type: "string" + examples: + - "default" + username: + description: "Username to use to access the database." + type: "string" + password: + description: "Password associated with the username." + type: "string" + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-instagram:0.1.9" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/instagram" + changelogUrl: "https://docs.airbyte.io/integrations/sources/instagram" + connectionSpecification: + title: "Source Instagram" + type: "object" + properties: + start_date: + title: "Start Date" + description: "The date from which you'd like to replicate data for User\ + \ Insights, in the format YYYY-MM-DDT00:00:00Z. All data generated after\ + \ this date will be replicated." + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + examples: + - "2017-01-25T00:00:00Z" + type: "string" + format: "date-time" + access_token: + title: "Access Token" + description: "The value of the access token generated. See the docs for\ + \ more information" + airbyte_secret: true + type: "string" + required: + - "start_date" + - "access_token" + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "append" + authSpecification: + auth_type: "oauth2.0" + oauth2Specification: + rootObject: [] + oauthFlowInitParameters: [] + oauthFlowOutputParameters: + - - "access_token" +- dockerImage: "airbyte/source-intercom:0.1.6" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/intercom" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Source Intercom Spec" + type: "object" + required: + - "access_token" + - "start_date" + additionalProperties: false + properties: + access_token: + type: "string" + description: "Intercom Access Token. See the docs for more information on how to obtain this key." + airbyte_secret: true + start_date: + type: "string" + description: "The date from which you'd like to replicate data for Intercom\ + \ API, in the format YYYY-MM-DDT00:00:00Z. All data generated after this\ + \ date will be replicated." + examples: + - "2020-11-16T00:00:00Z" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-iterable:0.1.11" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/iterable" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Iterable Spec" + type: "object" + required: + - "start_date" + - "api_key" + additionalProperties: false + properties: + start_date: + type: "string" + description: "The date from which you'd like to replicate data for Iterable,\ + \ in the format YYYY-MM-DDT00:00:00Z. All data generated after this date\ + \ will be replicated." + examples: + - "2021-04-01T00:00:00Z" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + api_key: + type: "string" + description: "Iterable API Key. See the docs for more information on how to obtain this key." + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-jira:0.2.14" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/jira" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Jira Spec" + type: "object" + required: + - "api_token" + - "domain" + - "email" + additionalProperties: true + properties: + api_token: + type: "string" + description: "Jira API Token. See the docs for more information on how to generate this key." + airbyte_secret: true + domain: + type: "string" + examples: + - "domainname.atlassian.net" + pattern: "^[a-zA-Z0-9._-]*\\.atlassian\\.net$" + description: "Domain for your Jira account, e.g. airbyteio.atlassian.net" + email: + type: "string" + description: "The user email for your Jira account" + projects: + type: "array" + title: "Projects" + items: + type: "string" + examples: + - "PROJ1" + - "PROJ2" + description: "Comma-separated list of Jira project keys to replicate data\ + \ for" + start_date: + type: "string" + title: "Start Date" + description: "The date from which you'd like to replicate data for Jira\ + \ in the format YYYY-MM-DDT00:00:00Z. All data generated after this date\ + \ will be replicated. Note that it will be used only in the following\ + \ incremental streams: issues." + examples: + - "2021-03-01T00:00:00Z" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + additional_fields: + type: "array" + title: "Additional Fields" + items: + type: "string" + description: "Comma-separated list of additional fields to include in replicating\ + \ issues" + examples: + - "Field A" + - "Field B" + expand_issue_changelog: + type: "boolean" + title: "Expand Issue Changelog" + description: "Expand the changelog when replicating issues" + default: false + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-kafka:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/kafka" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Kafka Source Spec" + type: "object" + required: + - "bootstrap_servers" + - "subscription" + - "protocol" + additionalProperties: false + properties: + bootstrap_servers: + title: "Bootstrap servers" + description: "A list of host/port pairs to use for establishing the initial\ + \ connection to the Kafka cluster. The client will make use of all servers\ + \ irrespective of which servers are specified here for bootstrapping—this\ + \ list only impacts the initial hosts used to discover the full set of\ + \ servers. This list should be in the form host1:port1,host2:port2,....\ + \ Since these servers are just used for the initial connection to discover\ + \ the full cluster membership (which may change dynamically), this list\ + \ need not contain the full set of servers (you may want more than one,\ + \ though, in case a server is down)." + type: "string" + examples: + - "kafka-broker1:9092,kafka-broker2:9092" + subscription: + title: "Subscribe method" + type: "object" + description: "You can choose to manually assign a list of partitions, or\ + \ subscribe to all topics matching specified pattern to get dynamically\ + \ assigned partitions" + oneOf: + - title: "Manually assign a list of partitions" + required: + - "subscription_type" + - "topic_partitions" + properties: + subscription_type: + description: "Manually assign a list of partitions to this consumer.\ + \ This interface does not allow for incremental assignment and will\ + \ replace the previous assignment (if there is one).\nIf the given\ + \ list of topic partitions is empty, it is treated the same as unsubscribe()." + type: "string" + const: "assign" + enum: + - "assign" + default: "assign" + topic_partitions: + title: "List of topic:partition pairs" + type: "string" + examples: + - "sample.topic:0, sample.topic:1" + - title: "Subscribe to all topics matching specified pattern" + required: + - "subscription_type" + - "topic_pattern" + properties: + subscription_type: + description: "Topic pattern from which the records will be read." + type: "string" + const: "subscribe" + enum: + - "subscribe" + default: "subscribe" + topic_pattern: + title: "Topic pattern" + type: "string" + examples: + - "sample.topic" + test_topic: + title: "Test topic" + description: "Topic to test if Airbyte can consume messages." + type: "string" + examples: + - "test.topic" + group_id: + title: "Group ID" + description: "Group id." + type: "string" + examples: + - "group.id" + max_poll_records: + title: "Max poll records" + description: "The maximum number of records returned in a single call to\ + \ poll(). Note, that max_poll_records does not impact the underlying fetching\ + \ behavior. The consumer will cache the records from each fetch request\ + \ and returns them incrementally from each poll." + type: "integer" + default: 500 + protocol: + title: "Protocol" + type: "object" + description: "Protocol used to communicate with brokers." + oneOf: + - title: "PLAINTEXT" + required: + - "security_protocol" + properties: + security_protocol: + type: "string" + enum: + - "PLAINTEXT" + default: "PLAINTEXT" + - title: "SASL PLAINTEXT" + required: + - "security_protocol" + - "sasl_mechanism" + - "sasl_jaas_config" + properties: + security_protocol: + type: "string" + enum: + - "SASL_PLAINTEXT" + default: "SASL_PLAINTEXT" + sasl_mechanism: + title: "SASL mechanism" + description: "SASL mechanism used for client connections. This may\ + \ be any mechanism for which a security provider is available." + type: "string" + default: "PLAIN" + enum: + - "PLAIN" + sasl_jaas_config: + title: "SASL JAAS config" + description: "JAAS login context parameters for SASL connections in\ + \ the format used by JAAS configuration files." + type: "string" + default: "" + airbyte_secret: true + - title: "SASL SSL" + required: + - "security_protocol" + - "sasl_mechanism" + - "sasl_jaas_config" + properties: + security_protocol: + type: "string" + enum: + - "SASL_SSL" + default: "SASL_SSL" + sasl_mechanism: + title: "SASL mechanism" + description: "SASL mechanism used for client connections. This may\ + \ be any mechanism for which a security provider is available." + type: "string" + default: "GSSAPI" + enum: + - "GSSAPI" + - "OAUTHBEARER" + - "SCRAM-SHA-256" + sasl_jaas_config: + title: "SASL JAAS config" + description: "JAAS login context parameters for SASL connections in\ + \ the format used by JAAS configuration files." + type: "string" + default: "" + airbyte_secret: true + client_id: + title: "Client ID" + description: "An id string to pass to the server when making requests. The\ + \ purpose of this is to be able to track the source of requests beyond\ + \ just ip/port by allowing a logical application name to be included in\ + \ server-side request logging." + type: "string" + examples: + - "airbyte-consumer" + enable_auto_commit: + title: "Enable auto commit" + description: "If true the consumer's offset will be periodically committed\ + \ in the background." + type: "boolean" + default: true + auto_commit_interval_ms: + title: "Auto commit interval ms" + description: "The frequency in milliseconds that the consumer offsets are\ + \ auto-committed to Kafka if enable.auto.commit is set to true." + type: "integer" + default: 5000 + client_dns_lookup: + title: "Client DNS lookup" + description: "Controls how the client uses DNS lookups. If set to use_all_dns_ips,\ + \ connect to each returned IP address in sequence until a successful connection\ + \ is established. After a disconnection, the next IP is used. Once all\ + \ IPs have been used once, the client resolves the IP(s) from the hostname\ + \ again. If set to resolve_canonical_bootstrap_servers_only, resolve each\ + \ bootstrap address into a list of canonical names. After the bootstrap\ + \ phase, this behaves the same as use_all_dns_ips. If set to default (deprecated),\ + \ attempt to connect to the first IP address returned by the lookup, even\ + \ if the lookup returns multiple IP addresses." + type: "string" + default: "use_all_dns_ips" + enum: + - "default" + - "use_all_dns_ips" + - "resolve_canonical_bootstrap_servers_only" + retry_backoff_ms: + title: "Retry backoff ms" + description: "The amount of time to wait before attempting to retry a failed\ + \ request to a given topic partition. This avoids repeatedly sending requests\ + \ in a tight loop under some failure scenarios." + type: "integer" + default: 100 + request_timeout_ms: + title: "Request timeout ms" + description: "The configuration controls the maximum amount of time the\ + \ client will wait for the response of a request. If the response is not\ + \ received before the timeout elapses the client will resend the request\ + \ if necessary or fail the request if retries are exhausted." + type: "integer" + default: 30000 + receive_buffer_bytes: + title: "Receive buffer bytes" + description: "The size of the TCP receive buffer (SO_RCVBUF) to use when\ + \ reading data. If the value is -1, the OS default will be used." + type: "integer" + default: 32768 + auto_offset_reset: + title: "Auto offset reset" + description: "What to do when there is no initial offset in Kafka or if\ + \ the current offset does not exist any more on the server - earliest:\ + \ automatically reset the offset to the earliest offset, latest: automatically\ + \ reset the offset to the latest offset, none: throw exception to the\ + \ consumer if no previous offset is found for the consumer's group, anything\ + \ else: throw exception to the consumer." + type: "string" + default: "latest" + enum: + - "latest" + - "earliest" + - "none" + repeated_calls: + title: "Repeated calls" + description: "The number of repeated calls to poll() if no messages were\ + \ received." + type: "integer" + default: 3 + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] + supported_source_sync_modes: + - "append" +- dockerImage: "airbyte/source-klaviyo:0.1.2" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/klaviyo" + changelogUrl: "https://docs.airbyte.io/integrations/sources/klaviyo" + connectionSpecification: + title: "Klaviyo Spec" + type: "object" + properties: + api_key: + title: "Api Key" + description: "Klaviyo API Key. See our docs if you need help finding this key." + airbyte_secret: true + type: "string" + start_date: + title: "Start Date" + description: "UTC date and time in the format 2017-01-25T00:00:00Z. Any\ + \ data before this date will not be replicated." + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + examples: + - "2017-01-25T00:00:00Z" + type: "string" + required: + - "api_key" + - "start_date" + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "append" +- dockerImage: "airbyte/source-lever-hiring:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/lever-hiring" + changelogUrl: "https://docs.airbyte.io/integrations/sources/lever-hiring#changelog" + connectionSpecification: + title: "Lever Hiring Spec" + type: "object" + properties: + client_id: + title: "Client Id" + description: "The client application id as provided when registering the\ + \ application with Lever." + type: "string" + client_secret: + title: "Client Secret" + description: "The application secret as provided when registering the application\ + \ with Lever." + airbyte_secret: true + type: "string" + refresh_token: + title: "Refresh Token" + description: "The refresh token your application will need to submit to\ + \ get a new access token after it's expired." + type: "string" + environment: + title: "Environment" + description: "Sandbox or Production environment." + default: "Production" + enum: + - "Sandbox" + - "Production" + type: "string" + start_date: + title: "Start Date" + description: "UTC date and time in the format 2019-02-25T00:00:00Z. Any\ + \ data before this date will not be replicated." + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + examples: + - "2021-04-25T00:00:00Z" + type: "string" + required: + - "client_id" + - "client_secret" + - "refresh_token" + - "start_date" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] + authSpecification: + auth_type: "oauth2.0" + oauth2Specification: + rootObject: [] + oauthFlowInitParameters: + - - "client_id" + - - "client_secret" + - - "refresh_token" + oauthFlowOutputParameters: [] +- dockerImage: "airbyte/source-linkedin-ads:0.1.1" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/linkedin-ads" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Linkedin Ads Spec" + type: "object" + required: + - "start_date" + - "access_token" + additionalProperties: false + properties: + start_date: + type: "string" + title: "Start Date" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" + description: "Date in the format 2020-09-17. Any data before this date will\ + \ not be replicated." + examples: + - "2021-05-17" + access_token: + type: "string" + title: "Access Token" + description: "The token value ganerated using Auth Code" + airbyte_secret: true + account_ids: + title: "Account IDs" + type: "array" + description: "Specify the Account IDs separated by space, from which to\ + \ pull the data. Leave empty to pull from all associated accounts." + items: + type: "integer" + default: [] + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-looker:0.2.5" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/looker" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Looker Spec" + type: "object" + required: + - "domain" + - "client_id" + - "client_secret" + additionalProperties: false + properties: + domain: + type: "string" + examples: + - "domainname.looker.com" + - "looker.clientname.com" + - "123.123.124.123:8000" + description: "Domain for your Looker account, e.g. airbyte.cloud.looker.com,looker.[clientname].com,IP\ + \ address" + client_id: + title: "Client ID" + type: "string" + description: "The Client ID is first part of an API3 key that is specific\ + \ to each Looker user. See the docs for more information on how to generate this key." + client_secret: + title: "Client Secret" + type: "string" + description: "The Client Secret is second part of an API3 key." + run_look_ids: + title: "Look IDs to Run" + type: "array" + items: + type: "string" + pattern: "^[0-9]*$" + description: "The IDs of any Looks to run (optional)" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-mailchimp:0.2.8" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/mailchimp" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Mailchimp Spec" + type: "object" + required: + - "username" + - "apikey" + additionalProperties: false + properties: + username: + type: "string" + description: "The Username or email you use to sign into Mailchimp" + apikey: + type: "string" + airbyte_secret: true + description: "API Key. See the docs for information on how to generate this key." + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-marketo:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/marketo" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Source Marketo Spec" + type: "object" + required: + - "domain_url" + - "client_id" + - "client_secret" + - "start_date" + additionalProperties: false + properties: + domain_url: + type: "string" + description: "Your Marketo Base URL. See the docs for info on how to obtain this." + examples: + - "https://000-AAA-000.mktorest.com" + airbyte_secret: true + client_id: + type: "string" + description: "Your Marketo client_id. See the docs for info on how to obtain this." + airbyte_secret: true + client_secret: + type: "string" + description: "Your Marketo client secret. See the docs for info on how to obtain this." + airbyte_secret: true + start_date: + type: "string" + description: "Data generated in Marketo after this date will be replicated.\ + \ This date must be specified in the format YYYY-MM-DDT00:00:00Z." + examples: + - "2020-09-25T00:00:00Z" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + window_in_days: + type: "integer" + description: "The amount of days for each data-chunk begining from start_date.\ + \ (Min=1, as for a Day; Max=30, as for a Month)." + examples: + - 1 + - 5 + - 10 + - 15 + - 30 + default: 30 + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-mssql:0.3.6" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/mssql" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "MSSQL Source Spec" + type: "object" + required: + - "host" + - "port" + - "database" + - "username" + additionalProperties: false + properties: + host: + description: "Hostname of the database." + type: "string" + port: + description: "Port of the database." + type: "integer" + minimum: 0 + maximum: 65536 + examples: + - "1433" + database: + description: "Name of the database." + type: "string" + examples: + - "master" + username: + description: "Username to use to access the database." + type: "string" + password: + description: "Password associated with the username." + type: "string" + airbyte_secret: true + ssl_method: + title: "SSL Method" + type: "object" + description: "Encryption method to use when communicating with the database" + order: 6 + oneOf: + - title: "Unencrypted" + additionalProperties: false + description: "Data transfer will not be encrypted." + required: + - "ssl_method" + properties: + ssl_method: + type: "string" + const: "unencrypted" + enum: + - "unencrypted" + default: "unencrypted" + - title: "Encrypted (trust server certificate)" + additionalProperties: false + description: "Use the cert provided by the server without verification.\ + \ (For testing purposes only!)" + required: + - "ssl_method" + properties: + ssl_method: + type: "string" + const: "encrypted_trust_server_certificate" + enum: + - "encrypted_trust_server_certificate" + default: "encrypted_trust_server_certificate" + - title: "Encrypted (verify certificate)" + additionalProperties: false + description: "Verify and use the cert provided by the server." + required: + - "ssl_method" + - "trustStoreName" + - "trustStorePassword" + properties: + ssl_method: + type: "string" + const: "encrypted_verify_certificate" + enum: + - "encrypted_verify_certificate" + default: "encrypted_verify_certificate" + hostNameInCertificate: + title: "Host Name In Certificate" + type: "string" + description: "Specifies the host name of the server. The value of\ + \ this property must match the subject property of the certificate." + order: 7 + replication_method: + type: "string" + title: "Replication Method" + description: "Replication method to use for extracting data from the database.\ + \ STANDARD replication requires no setup on the DB side but will not be\ + \ able to represent deletions incrementally. CDC uses {TBC} to detect\ + \ inserts, updates, and deletes. This needs to be configured on the source\ + \ database itself." + default: "STANDARD" + enum: + - "STANDARD" + - "CDC" + tunnel_method: + type: "object" + title: "SSH Tunnel Method" + description: "Whether to initiate an SSH tunnel before connecting to the\ + \ database, and if so, which kind of authentication to use." + oneOf: + - title: "No Tunnel" + required: + - "tunnel_method" + properties: + tunnel_method: + description: "No ssh tunnel needed to connect to database" + type: "string" + const: "NO_TUNNEL" + order: 0 + - title: "SSH Key Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "ssh_key" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and ssh key" + type: "string" + const: "SSH_KEY_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host." + type: "string" + order: 3 + ssh_key: + title: "SSH Private Key" + description: "OS-level user account ssh key credentials for logging\ + \ into the jump server host." + type: "string" + airbyte_secret: true + multiline: true + order: 4 + - title: "Password Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "tunnel_user_password" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and password authentication" + type: "string" + const: "SSH_PASSWORD_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host" + type: "string" + order: 3 + tunnel_user_password: + title: "Password" + description: "OS-level password for logging into the jump server host" + type: "string" + airbyte_secret: true + order: 4 + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-microsoft-teams:0.2.2" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/microsoft-teams" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Microsoft Teams Spec" + type: "object" + required: + - "tenant_id" + - "client_id" + - "client_secret" + - "period" + additionalProperties: false + properties: + tenant_id: + title: "Directory (tenant) ID" + type: "string" + description: "Directory (tenant) ID" + client_id: + title: "Application (client) ID" + type: "string" + description: "Application (client) ID" + client_secret: + title: "Client Secret" + type: "string" + description: "Client secret" + airbyte_secret: true + period: + type: "string" + description: "Specifies the length of time over which the Team Device Report\ + \ stream is aggregated. The supported values are: D7, D30, D90, and D180." + examples: + - "D7" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-mixpanel:0.1.3" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/mixpanel" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Source Mixpanel Spec" + type: "object" + required: + - "api_secret" + additionalProperties: true + properties: + api_secret: + type: "string" + description: "Mixpanel API Secret. See the docs for more information on how to obtain this key." + airbyte_secret: true + attribution_window: + type: "integer" + description: "Latency minimum number of days to look-back to account for\ + \ delays in attributing accurate results. Default attribution window is\ + \ 5 days." + default: 5 + date_window_size: + type: "integer" + description: "Number of days for date window looping through transactional\ + \ endpoints with from_date and to_date. Default date_window_size is 30\ + \ days. Clients with large volumes of events may want to decrease this\ + \ to 14, 7, or even down to 1-2 days." + default: 30 + project_timezone: + type: "string" + description: "Time zone in which integer date times are stored. The project\ + \ timezone may be found in the project settings in the Mixpanel console." + default: "US/Pacific" + examples: + - "US/Pacific" + - "UTC" + select_properties_by_default: + type: "boolean" + description: "Setting this config parameter to true ensures that new properties\ + \ on events and engage records are captured. Otherwise new properties\ + \ will be ignored" + default: true + start_date: + type: "string" + description: "The default value to use if no bookmark exists for an endpoint.\ + \ Default is 1 year ago." + examples: + - "2021-11-16" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}(T[0-9]{2}:[0-9]{2}:[0-9]{2}Z)?$" + region: + type: "string" + enum: + - "US" + - "EU" + default: "US" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-monday:0.1.0" + spec: + documentationUrl: "https://docsurl.com" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Monday Spec" + type: "object" + required: + - "api_token" + additionalProperties: false + properties: + api_token: + type: "string" + description: "This is the API token to authenticate requests to Monday.\ + \ Profile picture (bottom left) => Admin => API" + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-mongodb-v2:0.1.3" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/mongodb-v2" + changelogUrl: "https://docs.airbyte.io/integrations/sources/mongodb-v2" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "MongoDb Source Spec" + type: "object" + required: + - "database" + additionalProperties: true + properties: + instance_type: + type: "object" + title: "MongoDb instance type" + description: "MongoDb instance to connect to. For MongoDB Atlas and Replica\ + \ Set TLS connection is used by default." + order: 0 + oneOf: + - title: "Standalone MongoDb Instance" + required: + - "instance" + - "host" + - "port" + properties: + instance: + type: "string" + enum: + - "standalone" + default: "standalone" + host: + title: "Host" + type: "string" + description: "Host of a Mongo database to be replicated." + order: 0 + port: + title: "Port" + type: "integer" + description: "Port of a Mongo database to be replicated." + minimum: 0 + maximum: 65536 + default: 27017 + examples: + - "27017" + order: 1 + tls: + title: "TLS connection" + type: "boolean" + description: "Indicates whether TLS encryption protocol will be used\ + \ to connect to MongoDB. It is recommended to use TLS connection\ + \ if possible. For more information see documentation." + default: false + order: 2 + - title: "Replica Set" + required: + - "instance" + - "server_addresses" + properties: + instance: + type: "string" + enum: + - "replica" + default: "replica" + server_addresses: + title: "Server addresses" + type: "string" + description: "The members of a replica set. Please specify `host`:`port`\ + \ of each member seperated by comma." + examples: + - "host1:27017,host2:27017,host3:27017" + order: 0 + replica_set: + title: "Replica Set" + type: "string" + description: "A replica set name." + order: 1 + - title: "MongoDB Atlas" + additionalProperties: false + required: + - "instance" + - "cluster_url" + properties: + instance: + type: "string" + enum: + - "atlas" + default: "atlas" + cluster_url: + title: "Cluster URL" + type: "string" + description: "URL of a cluster to connect to." + order: 0 + database: + title: "Database name" + type: "string" + description: "Database to be replicated." + order: 1 + user: + title: "User" + type: "string" + description: "User" + order: 2 + password: + title: "Password" + type: "string" + description: "Password" + airbyte_secret: true + order: 3 + auth_source: + title: "Authentication source" + type: "string" + description: "Authentication source where user information is stored" + default: "admin" + examples: + - "admin" + order: 4 + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-mysql:0.4.9" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/mysql" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "MySql Source Spec" + type: "object" + required: + - "host" + - "port" + - "database" + - "username" + - "replication_method" + additionalProperties: false + properties: + host: + description: "Hostname of the database." + type: "string" + order: 0 + port: + description: "Port of the database." + type: "integer" + minimum: 0 + maximum: 65536 + default: 3306 + examples: + - "3306" + order: 1 + database: + description: "Name of the database." + type: "string" + order: 2 + username: + description: "Username to use to access the database." + type: "string" + order: 3 + password: + description: "Password associated with the username." + type: "string" + airbyte_secret: true + order: 4 + jdbc_url_params: + description: "Additional properties to pass to the jdbc url string when\ + \ connecting to the database formatted as 'key=value' pairs separated\ + \ by the symbol '&'. (example: key1=value1&key2=value2&key3=value3)" + type: "string" + order: 5 + ssl: + title: "SSL Connection" + description: "Encrypt data using SSL." + type: "boolean" + default: true + order: 6 + replication_method: + type: "string" + title: "Replication Method" + description: "Replication method to use for extracting data from the database.\ + \ STANDARD replication requires no setup on the DB side but will not be\ + \ able to represent deletions incrementally. CDC uses the Binlog to detect\ + \ inserts, updates, and deletes. This needs to be configured on the source\ + \ database itself." + order: 7 + default: "STANDARD" + enum: + - "STANDARD" + - "CDC" + tunnel_method: + type: "object" + title: "SSH Tunnel Method" + description: "Whether to initiate an SSH tunnel before connecting to the\ + \ database, and if so, which kind of authentication to use." + oneOf: + - title: "No Tunnel" + required: + - "tunnel_method" + properties: + tunnel_method: + description: "No ssh tunnel needed to connect to database" + type: "string" + const: "NO_TUNNEL" + order: 0 + - title: "SSH Key Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "ssh_key" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and ssh key" + type: "string" + const: "SSH_KEY_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host." + type: "string" + order: 3 + ssh_key: + title: "SSH Private Key" + description: "OS-level user account ssh key credentials in RSA PEM\ + \ format ( created with ssh-keygen -t rsa -m PEM -f myuser_rsa )" + type: "string" + airbyte_secret: true + multiline: true + order: 4 + - title: "Password Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "tunnel_user_password" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and password authentication" + type: "string" + const: "SSH_PASSWORD_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host" + type: "string" + order: 3 + tunnel_user_password: + title: "Password" + description: "OS-level password for logging into the jump server host" + type: "string" + airbyte_secret: true + order: 4 + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-okta:0.1.4" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/okta" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Okta Spec" + type: "object" + required: + - "token" + - "base_url" + additionalProperties: false + properties: + token: + type: "string" + title: "API Token" + description: "A Okta token. See the docs for instructions on how to generate it." + airbyte_secret: true + base_url: + type: "string" + title: "Base URL" + description: "The Okta base URL." + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-onesignal:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/onesignal" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "OneSignal Source Spec" + type: "object" + required: + - "user_auth_key" + - "start_date" + - "outcome_names" + additionalProperties: false + properties: + user_auth_key: + type: "string" + description: "OneSignal User Auth Key, see the docs for more information on how to obtain this key." + airbyte_secret: true + start_date: + type: "string" + description: "The date from which you'd like to replicate data for OneSignal\ + \ API, in the format YYYY-MM-DDT00:00:00Z. All data generated after this\ + \ date will be replicated." + examples: + - "2020-11-16T00:00:00Z" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + outcome_names: + type: "string" + description: "Comma-separated list of names and the value (sum/count) for\ + \ the returned outcome data. See the docs for more details" + examples: + - "os__session_duration.count,os__click.count,CustomOutcomeName.sum" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-oracle:0.3.8" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/oracle" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Oracle Source Spec" + type: "object" + required: + - "host" + - "port" + - "sid" + - "username" + additionalProperties: false + properties: + host: + title: "Host" + description: "Hostname of the database." + type: "string" + port: + title: "Port" + description: "Port of the database.\nOracle Corporations recommends the\ + \ following port numbers:\n1521 - Default listening port for client connections\ + \ to the listener. \n2484 - Recommended and officially registered listening\ + \ port for client connections to the listener using TCP/IP with SSL" + type: "integer" + minimum: 0 + maximum: 65536 + default: 1521 + sid: + title: "SID (Oracle System Identifier)" + type: "string" + username: + title: "User" + description: "Username to use to access the database." + type: "string" + password: + title: "Password" + description: "Password associated with the username." + type: "string" + airbyte_secret: true + schemas: + title: "Schemas" + description: "List of schemas to sync from. Defaults to user. Case sensitive." + type: "array" + items: + type: "string" + minItems: 1 + uniqueItems: true + encryption: + title: "Encryption" + type: "object" + description: "Encryption method to use when communicating with the database" + order: 6 + oneOf: + - title: "Unencrypted" + additionalProperties: false + description: "Data transfer will not be encrypted." + required: + - "encryption_method" + properties: + encryption_method: + type: "string" + const: "unencrypted" + enum: + - "unencrypted" + default: "unencrypted" + - title: "Native Network Ecryption (NNE)" + additionalProperties: false + description: "Native network encryption gives you the ability to encrypt\ + \ database connections, without the configuration overhead of TCP/IP\ + \ and SSL/TLS and without the need to open and listen on different ports." + required: + - "encryption_method" + properties: + encryption_method: + type: "string" + const: "client_nne" + enum: + - "client_nne" + default: "client_nne" + encryption_algorithm: + type: "string" + description: "This parameter defines the encryption algorithm to be\ + \ used" + title: "Encryption Algorithm" + default: "AES256" + enum: + - "AES256" + - "RC4_56" + - "3DES168" + - title: "TLS Encrypted (verify certificate)" + additionalProperties: false + description: "Verify and use the cert provided by the server." + required: + - "encryption_method" + - "ssl_certificate" + properties: + encryption_method: + type: "string" + const: "encrypted_verify_certificate" + enum: + - "encrypted_verify_certificate" + default: "encrypted_verify_certificate" + ssl_certificate: + title: "SSL PEM file" + description: "Privacy Enhanced Mail (PEM) files are concatenated certificate\ + \ containers frequently used in certificate installations" + type: "string" + airbyte_secret: true + multiline: true + order: 4 + tunnel_method: + type: "object" + title: "SSH Tunnel Method" + description: "Whether to initiate an SSH tunnel before connecting to the\ + \ database, and if so, which kind of authentication to use." + oneOf: + - title: "No Tunnel" + required: + - "tunnel_method" + properties: + tunnel_method: + description: "No ssh tunnel needed to connect to database" + type: "string" + const: "NO_TUNNEL" + order: 0 + - title: "SSH Key Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "ssh_key" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and ssh key" + type: "string" + const: "SSH_KEY_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host." + type: "string" + order: 3 + ssh_key: + title: "SSH Private Key" + description: "OS-level user account ssh key credentials in RSA PEM\ + \ format ( created with ssh-keygen -t rsa -m PEM -f myuser_rsa )" + type: "string" + airbyte_secret: true + multiline: true + order: 4 + - title: "Password Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "tunnel_user_password" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and password authentication" + type: "string" + const: "SSH_PASSWORD_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host" + type: "string" + order: 3 + tunnel_user_password: + title: "Password" + description: "OS-level password for logging into the jump server host" + type: "string" + airbyte_secret: true + order: 4 + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-paypal-transaction:0.1.1" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/paypal-transactions" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Paypal Transaction Search" + type: "object" + required: + - "client_id" + - "secret" + - "start_date" + - "is_sandbox" + additionalProperties: true + properties: + client_id: + title: "Client ID" + type: "string" + description: "The Paypal Client ID for API credentials" + secret: + title: "Secret" + type: "string" + description: "The Secret for a given Client ID." + airbyte_secret: true + start_date: + type: "string" + title: "Start Date" + description: "Start Date for data extraction in ISO format. Date must be in range from 3 years till 12 hrs before\ + \ present time" + examples: + - "2021-06-11T23:59:59-00:00" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}[+-][0-9]{2}:[0-9]{2}$" + is_sandbox: + title: "Is Sandbox" + description: "Whether or not to Sandbox or Production environment to extract\ + \ data from" + type: "boolean" + default: false + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-paystack:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/paystack" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Paystack Source Spec" + type: "object" + required: + - "secret_key" + - "start_date" + additionalProperties: false + properties: + secret_key: + type: "string" + pattern: "^(s|r)k_(live|test)_[a-zA-Z0-9]+$" + description: "Paystack API key (usually starts with 'sk_live_'; find yours\ + \ here)." + airbyte_secret: true + start_date: + type: "string" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + description: "UTC date and time in the format 2017-01-25T00:00:00Z. Any\ + \ data before this date will not be replicated." + examples: + - "2017-01-25T00:00:00Z" + lookback_window_days: + type: "integer" + title: "Lookback Window (in days)" + default: 0 + minimum: 0 + description: "When set, the connector will always reload data from the past\ + \ N days, where N is the value set here. This is useful if your data is\ + \ updated after creation." + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-pipedrive:0.1.6" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/pipedrive" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Pipedrive Spec" + type: "object" + required: + - "replication_start_date" + additionalProperties: true + properties: + authorization: + type: "object" + title: "Authentication Type" + oneOf: + - title: "Sign in via Pipedrive (OAuth)" + type: "object" + required: + - "auth_type" + - "client_id" + - "client_secret" + - "refresh_token" + properties: + auth_type: + type: "string" + const: "Client" + enum: + - "Client" + default: "Client" + order: 0 + client_id: + title: "Client ID" + type: "string" + description: "The Client ID of your developer application" + airbyte_secret: true + client_secret: + title: "Client Secret" + type: "string" + description: "The client secret of your developer application" + airbyte_secret: true + access_token: + title: "Access Token" + type: "string" + description: "An access token generated using the above client ID\ + \ and secret" + airbyte_secret: true + refresh_token: + title: "Refresh Token" + type: "string" + description: "A refresh token generated using the above client ID\ + \ and secret" + airbyte_secret: true + - type: "object" + title: "API Key Authentication" + required: + - "auth_type" + - "api_token" + properties: + auth_type: + type: "string" + const: "Token" + enum: + - "Token" + default: "Token" + order: 0 + api_token: + title: "API Token" + type: "string" + description: "Pipedrive API Token" + airbyte_secret: true + replication_start_date: + title: "Replication Start Date" + description: "UTC date and time in the format 2017-01-25T00:00:00Z. Any\ + \ data before this date will not be replicated. When specified and not\ + \ None, then stream will behave as incremental" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + examples: + - "2017-01-25T00:00:00Z" + type: "string" + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "append" +- dockerImage: "airbyte/source-plaid:0.2.1" + spec: + documentationUrl: "https://plaid.com/docs/api/" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + type: "object" + required: + - "access_token" + - "api_key" + - "client_id" + additionalProperties: false + properties: + access_token: + type: "string" + title: "Access Token" + description: "The end-user's Link access token." + api_key: + title: "API Key" + type: "string" + description: "The Plaid API key to use to hit the API." + airbyte_secret: true + client_id: + title: "Client ID" + type: "string" + description: "The Plaid client id" + plaid_env: + title: "Plaid Environment" + type: "string" + enum: + - "sandbox" + - "development" + - "production" + description: "The Plaid environment" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-pokeapi:0.1.1" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/pokeapi" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Pokeapi Spec" + type: "object" + required: + - "pokemon_name" + additionalProperties: false + properties: + pokemon_name: + type: "string" + description: "Pokemon requested from the API." + pattern: "^[a-z0-9_\\-]+$" + examples: + - "ditto, luxray, snorlax" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-posthog:0.1.4" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/posthog" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "PostHog Spec" + type: "object" + required: + - "api_key" + - "start_date" + additionalProperties: false + properties: + start_date: + title: "Start Date" + type: "string" + description: "The date from which you'd like to replicate the data" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + examples: + - "2021-01-01T00:00:00Z" + api_key: + type: "string" + airbyte_secret: true + description: "API Key. See the docs for information on how to generate this key." + base_url: + type: "string" + default: "https://app.posthog.com" + description: "Base PostHog url. Defaults to PostHog Cloud (https://app.posthog.com)." + examples: + - "https://posthog.example.com" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-postgres:0.3.13" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/postgres" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Postgres Source Spec" + type: "object" + required: + - "host" + - "port" + - "database" + - "username" + additionalProperties: false + properties: + host: + title: "Host" + description: "Hostname of the database." + type: "string" + order: 0 + port: + title: "Port" + description: "Port of the database." + type: "integer" + minimum: 0 + maximum: 65536 + default: 5432 + examples: + - "5432" + order: 1 + database: + title: "DB Name" + description: "Name of the database." + type: "string" + order: 2 + username: + title: "User" + description: "Username to use to access the database." + type: "string" + order: 3 + password: + title: "Password" + description: "Password associated with the username." + type: "string" + airbyte_secret: true + order: 4 + ssl: + title: "Connect using SSL" + description: "Encrypt client/server communications for increased security." + type: "boolean" + default: false + order: 5 + replication_method: + type: "object" + title: "Replication Method" + description: "Replication method to use for extracting data from the database." + order: 6 + oneOf: + - title: "Standard" + additionalProperties: false + description: "Standard replication requires no setup on the DB side but\ + \ will not be able to represent deletions incrementally." + required: + - "method" + properties: + method: + type: "string" + const: "Standard" + enum: + - "Standard" + default: "Standard" + order: 0 + - title: "Logical Replication (CDC)" + additionalProperties: false + description: "Logical replication uses the Postgres write-ahead log (WAL)\ + \ to detect inserts, updates, and deletes. This needs to be configured\ + \ on the source database itself. Only available on Postgres 10 and above.\ + \ Read the Postgres Source docs for more information." + required: + - "method" + - "replication_slot" + - "publication" + properties: + method: + type: "string" + const: "CDC" + enum: + - "CDC" + default: "CDC" + order: 0 + plugin: + type: "string" + description: "A logical decoding plug-in installed on the PostgreSQL\ + \ server. `pgoutput` plug-in is used by default.\nIf replication\ + \ table contains a lot of big jsonb values it is recommended to\ + \ use `wal2json` plug-in. For more information about `wal2json`\ + \ plug-in read Postgres Source docs." + enum: + - "pgoutput" + - "wal2json" + default: "pgoutput" + order: 1 + replication_slot: + type: "string" + description: "A plug-in logical replication slot." + order: 2 + publication: + type: "string" + description: "A Postgres publication used for consuming changes." + order: 3 + tunnel_method: + type: "object" + title: "SSH Tunnel Method" + description: "Whether to initiate an SSH tunnel before connecting to the\ + \ database, and if so, which kind of authentication to use." + oneOf: + - title: "No Tunnel" + required: + - "tunnel_method" + properties: + tunnel_method: + description: "No ssh tunnel needed to connect to database" + type: "string" + const: "NO_TUNNEL" + order: 0 + - title: "SSH Key Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "ssh_key" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and ssh key" + type: "string" + const: "SSH_KEY_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host." + type: "string" + order: 3 + ssh_key: + title: "SSH Private Key" + description: "OS-level user account ssh key credentials in RSA PEM\ + \ format ( created with ssh-keygen -t rsa -m PEM -f myuser_rsa )" + type: "string" + airbyte_secret: true + multiline: true + order: 4 + - title: "Password Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "tunnel_user_password" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and password authentication" + type: "string" + const: "SSH_PASSWORD_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host" + type: "string" + order: 3 + tunnel_user_password: + title: "Password" + description: "OS-level password for logging into the jump server host" + type: "string" + airbyte_secret: true + order: 4 + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-prestashop:0.1.0" + spec: + documentationUrl: "https://docsurl.com" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "PrestaShop Spec" + type: "object" + required: + - "url" + - "access_key" + additionalProperties: false + properties: + url: + type: "string" + description: "Shop URL without trailing slash (domain name or IP address)" + access_key: + type: "string" + description: "Your PrestaShop access key. See the docs for info on how to obtain this." + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-quickbooks-singer:0.1.3" + spec: + documentationUrl: "https://docsurl.com" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Source Quickbooks Singer Spec" + type: "object" + required: + - "client_id" + - "client_secret" + - "refresh_token" + - "realm_id" + - "user_agent" + - "start_date" + - "sandbox" + additionalProperties: false + properties: + client_id: + type: "string" + description: "Identifies which app is making the request. Obtain this value\ + \ from the Keys tab on the app profile via My Apps on the developer site.\ + \ There are two versions of this key: development and production" + client_secret: + description: " Obtain this value from the Keys tab on the app profile via\ + \ My Apps on the developer site. There are two versions of this key: development\ + \ and production" + type: "string" + airbyte_secret: true + refresh_token: + description: "A token used when refreshing the access token." + type: "string" + airbyte_secret: true + realm_id: + description: "Labeled Company ID. The Make API Calls panel is populated\ + \ with the realm id and the current access token" + type: "string" + airbyte_secret: true + user_agent: + type: "string" + description: "Process and email for API logging purposes. Example: tap-quickbooks\ + \ " + start_date: + description: "The default value to use if no bookmark exists for an endpoint\ + \ (rfc3339 date string) E.g, 2021-03-20T00:00:00Z" + type: "string" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + examples: + - "2021-03-20T00:00:00Z" + sandbox: + description: "Development or Production." + type: "boolean" + default: false + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-recharge:0.1.3" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/recharge" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Recharge Spec" + type: "object" + required: + - "start_date" + - "access_token" + additionalProperties: false + properties: + start_date: + type: "string" + description: "The date from which you'd like to replicate data for Recharge\ + \ API, in the format YYYY-MM-DDT00:00:00Z." + examples: + - "2021-05-14T00:00:00Z" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + access_token: + type: "string" + description: "The value of the Access Token generated. See the docs for more\ + \ information" + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-recurly:0.2.4" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/recurly" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Recurly Source Spec" + type: "object" + required: + - "api_key" + additionalProperties: false + properties: + api_key: + type: "string" + description: "Recurly API Key. See the docs for more information on how to generate this key." + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-redshift:0.3.4" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/redshift" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Redshift Source Spec" + type: "object" + required: + - "host" + - "port" + - "database" + - "username" + - "password" + additionalProperties: false + properties: + host: + description: "Host Endpoint of the Redshift Cluster (must include the cluster-id,\ + \ region and end with .redshift.amazonaws.com)" + type: "string" + port: + description: "Port of the database." + type: "integer" + minimum: 0 + maximum: 65536 + default: 5439 + examples: + - "5439" + database: + description: "Name of the database." + type: "string" + examples: + - "master" + username: + description: "Username to use to access the database." + type: "string" + password: + description: "Password associated with the username." + type: "string" + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-retently:0.1.0" + spec: + documentationUrl: "https://docsurl.com" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Retently Api Spec" + type: "object" + required: + - "api_key" + additionalProperties: false + properties: + api_key: + type: "string" + description: "API key from https://app.retently.com/settings/api/tokens" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-s3:0.1.6" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/s3" + changelogUrl: "https://docs.airbyte.io/integrations/sources/s3" + connectionSpecification: + title: "S3 Source Spec" + type: "object" + properties: + dataset: + title: "Dataset" + description: "This source creates one table per connection, this field is\ + \ the name of that table. This should include only letters, numbers, dash\ + \ and underscores. Note that this may be altered according to destination." + pattern: "^([A-Za-z0-9-_]+)$" + type: "string" + path_pattern: + title: "Path Pattern" + description: "Add at least 1 pattern here to match filepaths against. Use\ + \ | to separate multiple patterns. Airbyte uses these patterns to determine\ + \ which files to pick up from the provider storage. See wcmatch.glob to understand pattern syntax (GLOBSTAR\ + \ and SPLIT flags are enabled). Use pattern ** to pick\ + \ up all files." + examples: + - "**" + - "myFolder/myTableFiles/*.csv|myFolder/myOtherTableFiles/*.csv" + type: "string" + schema: + title: "Schema" + description: "Optionally provide a schema to enforce, as a valid JSON string.\ + \ Ensure this is a mapping of { \"column\" : \"type\" },\ + \ where types are valid JSON Schema datatypes. Leave as {} to auto-infer\ + \ the schema." + default: "{}" + examples: + - "{\"column_1\": \"number\", \"column_2\": \"string\", \"column_3\": \"\ + array\", \"column_4\": \"object\", \"column_5\": \"boolean\"}" + type: "string" + format: + title: "Format" + default: "csv" + type: "object" + oneOf: + - title: "csv" + description: "This connector utilises PyArrow (Apache Arrow) for CSV parsing." + type: "object" + properties: + filetype: + title: "Filetype" + const: "csv" + type: "string" + delimiter: + title: "Delimiter" + description: "The character delimiting individual cells in the CSV\ + \ data. This may only be a 1-character string." + default: "," + minLength: 1 + type: "string" + quote_char: + title: "Quote Char" + description: "The character used optionally for quoting CSV values.\ + \ To disallow quoting, make this field blank." + default: "\"" + type: "string" + escape_char: + title: "Escape Char" + description: "The character used optionally for escaping special characters.\ + \ To disallow escaping, leave this field blank." + type: "string" + encoding: + title: "Encoding" + description: "The character encoding of the CSV data. Leave blank\ + \ to default to UTF-8. See list of python encodings for allowable options." + type: "string" + double_quote: + title: "Double Quote" + description: "Whether two quotes in a quoted CSV value denote a single\ + \ quote in the data." + default: true + type: "boolean" + newlines_in_values: + title: "Newlines In Values" + description: "Whether newline characters are allowed in CSV values.\ + \ Turning this on may affect performance. Leave blank to default\ + \ to False." + default: false + type: "boolean" + block_size: + title: "Block Size" + description: "The chunk size in bytes to process at a time in memory\ + \ from each file. If your data is particularly wide and failing\ + \ during schema detection, increasing this should solve it. Beware\ + \ of raising this too high as you could hit OOM errors." + default: 10000 + type: "integer" + additional_reader_options: + title: "Additional Reader Options" + description: "Optionally add a valid JSON string here to provide additional\ + \ options to the csv reader. Mappings must correspond to options\ + \ detailed here. 'column_types' is used internally\ + \ to handle schema so overriding that would likely cause problems." + default: "{}" + examples: + - "{\"timestamp_parsers\": [\"%m/%d/%Y %H:%M\", \"%Y/%m/%d %H:%M\"\ + ], \"strings_can_be_null\": true, \"null_values\": [\"NA\", \"NULL\"\ + ]}" + type: "string" + advanced_options: + title: "Advanced Options" + description: "Optionally add a valid JSON string here to provide additional\ + \ Pyarrow ReadOptions. Specify 'column_names'\ + \ here if your CSV doesn't have header, or if you want to use custom\ + \ column names. 'block_size' and 'encoding' are already used above,\ + \ specify them again here will override the values above." + default: "{}" + examples: + - "{\"column_names\": [\"column1\", \"column2\"]}" + type: "string" + - title: "parquet" + description: "This connector utilises PyArrow (Apache Arrow) for Parquet parsing." + type: "object" + properties: + filetype: + title: "Filetype" + const: "parquet" + type: "string" + buffer_size: + title: "Buffer Size" + description: "Perform read buffering when deserializing individual\ + \ column chunks. By default every group column will be loaded fully\ + \ to memory. This option can help to optimize a work with memory\ + \ if your data is particularly wide or failing during detection\ + \ of OOM errors." + default: 0 + type: "integer" + columns: + title: "Columns" + description: "If you only want to sync a subset of the columns from\ + \ the file(s), add the columns you want here. Leave it empty to\ + \ sync all columns." + type: "array" + items: + type: "string" + batch_size: + title: "Batch Size" + description: "Maximum number of records per batch. Batches may be\ + \ smaller if there aren’t enough rows in the file. This option can\ + \ help to optimize a work with memory if your data is particularly\ + \ wide or failing during detection of OOM errors." + default: 65536 + type: "integer" + provider: + title: "S3: Amazon Web Services" + type: "object" + properties: + bucket: + title: "Bucket" + description: "Name of the S3 bucket where the file(s) exist." + type: "string" + aws_access_key_id: + title: "Aws Access Key Id" + description: "In order to access private Buckets stored on AWS S3, this\ + \ connector requires credentials with the proper permissions. If accessing\ + \ publicly available data, this field is not necessary." + airbyte_secret: true + type: "string" + aws_secret_access_key: + title: "Aws Secret Access Key" + description: "In order to access private Buckets stored on AWS S3, this\ + \ connector requires credentials with the proper permissions. If accessing\ + \ publicly available data, this field is not necessary." + airbyte_secret: true + type: "string" + path_prefix: + title: "Path Prefix" + description: "By providing a path-like prefix (e.g. myFolder/thisTable/)\ + \ under which all the relevant files sit, we can optimise finding\ + \ these in S3. This is optional but recommended if your bucket contains\ + \ many folders/files." + default: "" + type: "string" + endpoint: + title: "Endpoint" + description: "Endpoint to an S3 compatible service. Leave empty to use\ + \ AWS." + default: "" + type: "string" + use_ssl: + title: "Use Ssl" + description: "Is remote server using secure SSL/TLS connection" + type: "boolean" + verify_ssl_cert: + title: "Verify Ssl Cert" + description: "Allow self signed certificates" + type: "boolean" + required: + - "bucket" + required: + - "dataset" + - "path_pattern" + - "provider" + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "overwrite" + - "append" + - "append_dedup" +- dockerImage: "airbyte/source-salesloft:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/salesloft" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Source Salesloft Spec" + type: "object" + required: + - "client_id" + - "client_secret" + - "refresh_token" + - "start_date" + additionalProperties: false + properties: + client_id: + type: "string" + description: "Salesloft client id." + client_secret: + type: "string" + description: "Salesloft client secret." + airbyte_secret: true + refresh_token: + type: "string" + description: "Salesloft refresh token." + airbyte_secret: true + start_date: + type: "string" + description: "The date from which you'd like to replicate data for Salesloft\ + \ API, in the format YYYY-MM-DDT00:00:00Z. All data generated after this\ + \ date will be replicated." + examples: + - "2020-11-16T00:00:00Z" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-salesforce:0.1.3" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/salesforce" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Salesforce Source Spec" + type: "object" + required: + - "client_id" + - "client_secret" + - "refresh_token" + - "start_date" + - "api_type" + additionalProperties: false + properties: + client_id: + description: "The Consumer Key that can be found when viewing your app in\ + \ Salesforce" + type: "string" + client_secret: + description: "The Consumer Secret that can be found when viewing your app\ + \ in Salesforce" + type: "string" + airbyte_secret: true + refresh_token: + description: "Salesforce Refresh Token used for Airbyte to access your Salesforce\ + \ account. If you don't know what this is, follow this guide to retrieve it." + type: "string" + airbyte_secret: true + start_date: + description: "UTC date and time in the format 2017-01-25T00:00:00Z. Any\ + \ data before this date will not be replicated. Priority for filtering\ + \ by `updated` fields, and only then by `created` fields if they are available\ + \ for stream." + type: "string" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + examples: + - "2021-07-25T00:00:00Z" + is_sandbox: + description: "Whether or not the the app is in a Salesforce sandbox. If\ + \ you do not know what this, assume it is false. We provide more info\ + \ on this field in the docs." + type: "boolean" + default: false + api_type: + description: "Unless you know that you are transferring a very small amount\ + \ of data, prefer using the BULK API. This will help avoid using up all\ + \ of your API call quota with Salesforce. Valid values are BULK or REST." + type: "string" + enum: + - "BULK" + - "REST" + default: "BULK" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] + authSpecification: + auth_type: "oauth2.0" + oauth2Specification: + rootObject: [] + oauthFlowInitParameters: + - - "client_id" + - - "client_secret" + oauthFlowOutputParameters: + - - "refresh_token" +- dockerImage: "airbyte/source-sendgrid:0.2.6" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/sendgrid" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Sendgrid Spec" + type: "object" + required: + - "apikey" + additionalProperties: false + properties: + apikey: + type: "string" + description: "API Key, use admin to generate this key." + start_time: + type: "integer" + description: "Start time in timestamp integer format. Any data before this\ + \ timestamp will not be replicated." + examples: + - 1558359837 + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-shopify:0.1.22" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/shopify" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Shopify Source CDK Specifications" + type: "object" + required: + - "shop" + - "start_date" + - "auth_method" + additionalProperties: false + properties: + shop: + type: "string" + description: "The name of the shopify store. For https://EXAMPLE.myshopify.com,\ + \ the shop name is 'EXAMPLE'." + start_date: + type: "string" + description: "The date you would like to replicate data. Format: YYYY-MM-DD." + examples: + - "2021-01-01" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" + auth_method: + title: "Shopify Authorization Method" + type: "object" + oneOf: + - type: "object" + title: "OAuth2.0" + required: + - "client_id" + - "client_secret" + - "access_token" + properties: + auth_method: + type: "string" + const: "access_token" + enum: + - "access_token" + default: "access_token" + order: 0 + client_id: + type: "string" + description: "The API Key of the Shopify developer application." + airbyte_secret: true + client_secret: + type: "string" + description: "The API Secret the Shopify developer application." + airbyte_secret: true + access_token: + type: "string" + description: "Access Token for making authenticated requests." + airbyte_secret: true + - title: "API Password" + type: "object" + required: + - "api_password" + properties: + auth_method: + type: "string" + const: "api_password" + enum: + - "api_password" + default: "api_password" + order: 0 + api_password: + type: "string" + description: "The API PASSWORD for your private application in `Shopify`\ + \ shop." + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] + authSpecification: + auth_type: "oauth2.0" + oauth2Specification: + rootObject: + - "auth_method" + - "0" + oauthFlowInitParameters: + - - "client_id" + - - "client_secret" + oauthFlowOutputParameters: + - - "access_token" +- dockerImage: "airbyte/source-shortio:0.1.0" + spec: + documentationUrl: "https://developers.short.io/reference" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Shortio Spec" + type: "object" + required: + - "domain_id" + - "secret_key" + - "start_date" + additionalProperties: false + properties: + domain_id: + type: "string" + description: "Domain ID" + airbyte_secret: false + secret_key: + type: "string" + description: "Short.io Secret key" + airbyte_secret: true + start_date: + type: "string" + description: "Start Date, YYYY-MM-DD" + airbyte_secret: false + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-slack:0.1.12" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/slack" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Slack Spec" + type: "object" + required: + - "start_date" + - "lookback_window" + - "join_channels" + additionalProperties: true + properties: + start_date: + type: "string" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + description: "UTC date and time in the format 2017-01-25T00:00:00Z. Any\ + \ data before this date will not be replicated." + examples: + - "2017-01-25T00:00:00Z" + title: "Start Date" + lookback_window: + type: "integer" + title: "Threads Lookback window (Days)" + description: "How far into the past to look for messages in threads." + examples: + - 7 + - 14 + join_channels: + type: "boolean" + default: true + title: "Join all channels" + description: "Whether to join all channels or to sync data only from channels\ + \ the bot is already in. If false, you'll need to manually add the bot\ + \ to all the channels from which you'd like to sync messages. " + credentials: + title: "Authentication mechanism" + description: "Choose how to authenticate into Slack" + type: "object" + oneOf: + - type: "object" + title: "Sign in via Slack (OAuth)" + required: + - "access_token" + - "client_id" + - "client_secret" + - "option_title" + properties: + option_title: + type: "string" + const: "Default OAuth2.0 authorization" + client_id: + title: "Client ID" + description: "Slack client_id. See our docs if you need help finding this id." + type: "string" + examples: + - "slack-client-id-example" + client_secret: + title: "Client Secret" + description: "Slack client_secret. See our docs if you need help finding this secret." + type: "string" + examples: + - "slack-client-secret-example" + airbyte_secret: true + access_token: + title: "Access token" + description: "Slack access_token. See our docs if you need help generating the token." + type: "string" + examples: + - "slack-access-token-example" + airbyte_secret: true + refresh_token: + title: "Refresh token" + description: "Slack refresh_token. See our docs if you need help generating the token." + type: "string" + examples: + - "slack-refresh-token-example" + airbyte_secret: true + order: 0 + - type: "object" + title: "API Token" + required: + - "api_token" + - "option_title" + properties: + option_title: + type: "string" + const: "API Token Credentials" + api_token: + type: "string" + title: "API Token" + description: "A Slack bot token. See the docs for instructions on how to generate it." + airbyte_secret: true + order: 1 + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] + authSpecification: + auth_type: "oauth2.0" + oauth2Specification: + rootObject: + - "credentials" + - "0" + oauthFlowInitParameters: + - - "client_id" + - - "client_secret" + oauthFlowOutputParameters: + - - "access_token" + - - "refresh_token" +- dockerImage: "airbyte/source-smartsheets:0.1.5" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/smartsheets" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Smartsheets Source Spec" + type: "object" + required: + - "access_token" + - "spreadsheet_id" + additionalProperties: false + properties: + access_token: + title: "API Access token" + description: "Found in Profile > Apps & Integrations > API Access within\ + \ Smartsheet app" + type: "string" + airbyte_secret: true + spreadsheet_id: + title: "Smartsheet ID" + description: "Found in File > Properties" + type: "string" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-snapchat-marketing:0.1.1" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/snapchat-marketing" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Snapchat Marketing Spec" + type: "object" + required: + - "client_id" + - "client_secret" + - "refresh_token" + additionalProperties: false + properties: + client_id: + title: "Client ID" + type: "string" + description: "The Snapchat Client ID for API credentials." + airbyte_secret: true + client_secret: + title: "Client Secret" + type: "string" + description: "The Client Secret for a given Client ID." + airbyte_secret: true + refresh_token: + title: "API Refresh Token" + type: "string" + description: "Refresh Token to get next api key after expiration. Is given\ + \ with API Key" + airbyte_secret: true + start_date: + title: "Start Date" + type: "string" + description: "The start date to sync data. Leave blank for full sync. Format:\ + \ YYYY-MM-DD." + examples: + - "2021-01-01" + default: "1970-01-01" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-snowflake:0.1.2" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/snowflake" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Snowflake Source Spec" + type: "object" + required: + - "host" + - "role" + - "warehouse" + - "database" + - "schema" + - "username" + - "password" + additionalProperties: false + properties: + host: + description: "Host domain of the snowflake instance (must include the account,\ + \ region, cloud environment, and end with snowflakecomputing.com)." + examples: + - "accountname.us-east-2.aws.snowflakecomputing.com" + type: "string" + title: "Account name" + order: 0 + role: + description: "The role you created for Airbyte to access Snowflake." + examples: + - "AIRBYTE_ROLE" + type: "string" + title: "Role" + order: 1 + warehouse: + description: "The warehouse you created for Airbyte to access data into." + examples: + - "AIRBYTE_WAREHOUSE" + type: "string" + title: "Warehouse" + order: 2 + database: + description: "The database you created for Airbyte to access data into." + examples: + - "AIRBYTE_DATABASE" + type: "string" + title: "Database" + order: 3 + schema: + description: "The source Snowflake schema tables." + examples: + - "AIRBYTE_SCHEMA" + type: "string" + title: "Schema" + order: 4 + username: + description: "The username you created to allow Airbyte to access the database." + examples: + - "AIRBYTE_USER" + type: "string" + title: "Username" + order: 5 + password: + description: "Password associated with the username." + type: "string" + airbyte_secret: true + title: "Password" + order: 6 + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-square:0.1.1" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/square" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Square Source CDK Specifications" + type: "object" + required: + - "api_key" + - "is_sandbox" + additionalProperties: false + properties: + api_key: + type: "string" + description: "The API key for a Square application" + airbyte_secret: true + is_sandbox: + type: "boolean" + description: "Determines the sandbox (true) or production (false) API version" + examples: + - true + - false + default: true + start_date: + type: "string" + description: "The start date to sync data. Leave blank for full sync. Format:\ + \ YYYY-MM-DD." + examples: + - "2021-01-01" + default: "1970-01-01" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" + include_deleted_objects: + type: "boolean" + description: "In some streams there is and option to include deleted objects\ + \ (Items, Categories, Discounts, Taxes)" + examples: + - true + - false + default: false + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-strava:0.1.0" + spec: + documentationUrl: "https://docsurl.com" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Strava Spec" + type: "object" + required: + - "client_id" + - "client_secret" + - "refresh_token" + - "athlete_id" + - "start_date" + additionalProperties: false + properties: + client_id: + type: "string" + description: "Strava Client ID" + pattern: "^[0-9_\\-]+$" + examples: + - "12345" + client_secret: + type: "string" + description: "Strava Client Secret" + pattern: "^[0-9a-fA-F]+$" + examples: + - "fc6243f283e51f6ca989aab298b17da125496f50" + airbyte_secret: true + refresh_token: + type: "string" + description: "Strava Refresh Token with activity:read_all permissions" + pattern: "^[0-9a-fA-F]+$" + examples: + - "fc6243f283e51f6ca989aab298b17da125496f50" + airbyte_secret: true + athlete_id: + type: "integer" + description: "Strava Athlete ID" + pattern: "^[0-9_\\-]+$" + examples: + - "17831421" + start_date: + type: "string" + description: "Start Query Timestamp in UTC" + examples: + - "2016-12-31 23:59:59" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-stripe:0.1.22" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/stripe" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Stripe Source Spec" + type: "object" + required: + - "client_secret" + - "account_id" + - "start_date" + additionalProperties: false + properties: + client_secret: + type: "string" + pattern: "^(s|r)k_(live|test)_[a-zA-Z0-9]+$" + description: "Stripe API key (usually starts with 'sk_live_'; find yours\ + \ here)." + airbyte_secret: true + account_id: + type: "string" + description: "Your Stripe account ID (starts with 'acct_', find yours here)." + start_date: + type: "string" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + description: "UTC date and time in the format 2017-01-25T00:00:00Z. Any\ + \ data before this date will not be replicated." + examples: + - "2017-01-25T00:00:00Z" + lookback_window_days: + type: "integer" + title: "Lookback Window (in days)" + default: 0 + minimum: 0 + description: "When set, the connector will always reload data from the past\ + \ N days, where N is the value set here. This is useful if your data is\ + \ updated after creation." + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-surveymonkey:0.1.3" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/surveymonkey" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "SurveyMonkey Spec" + type: "object" + required: + - "start_date" + additionalProperties: true + properties: + start_date: + title: "Start Date" + type: "string" + description: "The date from which you'd like to replicate the data" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z?$" + examples: + - "2021-01-01T00:00:00Z" + access_token: + title: "Access Token" + type: "string" + airbyte_secret: true + description: "API Token. See the docs for information on how to generate this key." + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] + authSpecification: + auth_type: "oauth2.0" + oauth2Specification: + rootObject: [] + oauthFlowInitParameters: [] + oauthFlowOutputParameters: + - - "access_token" +- dockerImage: "airbyte/source-tempo:0.2.3" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Tempo Spec" + type: "object" + required: + - "api_token" + additionalProperties: false + properties: + api_token: + type: "string" + description: "Tempo API Token." + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-tiktok-marketing:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/tiktok-marketing" + changelogUrl: "https://docs.airbyte.io/integrations/sources/tiktok-marketing" + connectionSpecification: + title: "TikTok Marketing Source Spec" + type: "object" + properties: + environment: + title: "Environment" + default: "Production" + oneOf: + - title: "Production" + type: "object" + properties: + environment: + title: "Environment" + const: "prod" + type: "string" + app_id: + title: "App Id" + description: "The App id applied by the developer." + type: "string" + secret: + title: "Secret" + description: "The private key of the developer's application." + airbyte_secret: true + type: "string" + required: + - "app_id" + - "secret" + - title: "Sandbox" + type: "object" + properties: + environment: + title: "Environment" + const: "sandbox" + type: "string" + advertiser_id: + title: "Advertiser Id" + description: "The Advertiser ID which generated for the developer's\ + \ Sandbox application." + type: "string" + required: + - "advertiser_id" + type: "object" + access_token: + title: "Access Token" + description: "Long-term Authorized Access Token." + airbyte_secret: true + type: "string" + start_date: + title: "Start Date" + description: "Start Date in format: YYYY-MM-DD." + default: "01-09-2016" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" + type: "string" + required: + - "access_token" + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "overwrite" + - "append" + - "append_dedup" +- dockerImage: "airbyte/source-trello:0.1.1" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/trello" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Trello Spec" + type: "object" + required: + - "token" + - "key" + - "start_date" + additionalProperties: true + properties: + token: + type: "string" + title: "API token" + description: "A Trello token. See the docs for instructions on how to generate it." + airbyte_secret: true + key: + type: "string" + title: "API key" + description: "A Trello token. See the docs for instructions on how to generate it." + airbyte_secret: true + start_date: + type: "string" + title: "Start date" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}.[0-9]{3}Z$" + description: "UTC date and time in the format 2017-01-25T00:00:00Z. Any\ + \ data before this date will not be replicated." + examples: + - "2021-03-01T00:00:00.000Z" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] + authSpecification: + auth_type: "oauth2.0" + oauth2Specification: + rootObject: [] + oauthFlowInitParameters: [] + oauthFlowOutputParameters: + - - "token" + - - "key" +- dockerImage: "airbyte/source-twilio:0.1.1" + spec: + documentationUrl: "https://hub.docker.com/r/airbyte/source-twilio" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Twilio Spec" + type: "object" + required: + - "account_sid" + - "auth_token" + - "start_date" + additionalProperties: false + properties: + account_sid: + title: "Account ID" + description: "Twilio account SID" + airbyte_secret: true + type: "string" + auth_token: + title: "Auth Token" + description: "Twilio Auth Token." + airbyte_secret: true + type: "string" + start_date: + title: "Replication Start Date" + description: "UTC date and time in the format 2020-10-01T00:00:00Z. Any\ + \ data before this date will not be replicated." + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + examples: + - "2020-10-01T00:00:00Z" + type: "string" + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "append" +- dockerImage: "airbyte/source-typeform:0.1.2" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/typeform" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Typeform Spec" + type: "object" + required: + - "token" + - "start_date" + additionalProperties: true + properties: + start_date: + type: "string" + description: "The date you would like to replicate data. Format: YYYY-MM-DDTHH:mm:ss[Z]." + examples: + - "2020-01-01T00:00:00Z" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + token: + type: "string" + description: "The API Token for a Typeform account." + airbyte_secret: true + form_ids: + title: "Form IDs to replicate" + description: "When this parameter is set, the connector will replicate data\ + \ only from the input forms. Otherwise, all forms in your Typeform account\ + \ will be replicated. You can find form IDs in your form URLs. For example,\ + \ in the URL \"https://mysite.typeform.com/to/u6nXL7\" the form_id is\ + \ u6nXL7. You can find form URLs on Share panel" + type: "array" + items: + type: "string" + uniqueItems: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-us-census:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/us-census" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "https://api.census.gov/ Source Spec" + type: "object" + required: + - "api_key" + - "query_path" + additionalProperties: false + properties: + query_params: + type: "string" + description: "The query parameters portion of the GET request, without the\ + \ api key" + pattern: "^\\w+=[\\w,:*]+(&(?!key)\\w+=[\\w,:*]+)*$" + examples: + - "get=NAME,NAICS2017_LABEL,LFO_LABEL,EMPSZES_LABEL,ESTAB,PAYANN,PAYQTR1,EMP&for=us:*&NAICS2017=72&LFO=001&EMPSZES=001" + - "get=MOVEDIN,GEOID1,GEOID2,MOVEDOUT,FULL1_NAME,FULL2_NAME,MOVEDNET&for=county:*" + query_path: + type: "string" + description: "The path portion of the GET request" + pattern: "^data(\\/[\\w\\d]+)+$" + examples: + - "data/2019/cbp" + - "data/2018/acs" + - "data/timeseries/healthins/sahie" + api_key: + type: "string" + description: "Your API Key. Get your key here." + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-zendesk-chat:0.1.3" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/zendesk-chat" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Zendesk Chat Spec" + type: "object" + required: + - "start_date" + - "access_token" + additionalProperties: false + properties: + start_date: + type: "string" + description: "The date from which you'd like to replicate data for Zendesk\ + \ Chat API, in the format YYYY-MM-DDT00:00:00Z." + examples: + - "2021-02-01T00:00:00Z" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + access_token: + type: "string" + description: "The value of the Access Token generated. See the docs for\ + \ more information" + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-zendesk-sunshine:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/zendesk_sunshine" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Zendesk Sunshine Spec" + type: "object" + required: + - "api_token" + - "email" + - "start_date" + - "subdomain" + additionalProperties: false + properties: + api_token: + type: "string" + airbyte_secret: true + description: "API Token. See the docs for information on how to generate this key." + email: + type: "string" + description: "The user email for your Zendesk account" + subdomain: + type: "string" + description: "The subdomain for your Zendesk Account" + start_date: + title: "Start Date" + type: "string" + description: "The date from which you'd like to replicate the data" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + examples: "2021-01-01T00:00:00.000000Z" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-zendesk-support:0.1.4" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/zendesk-support" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Source Zendesk Support Spec" + type: "object" + required: + - "start_date" + - "subdomain" + - "auth_method" + additionalProperties: false + properties: + start_date: + type: "string" + description: "The date from which you'd like to replicate data for Zendesk\ + \ Support API, in the format YYYY-MM-DDT00:00:00Z. All data generated\ + \ after this date will be replicated." + examples: + - "2020-10-15T00:00:00Z" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + subdomain: + type: "string" + description: "The subdomain for your Zendesk Support" + auth_method: + title: "ZenDesk Authorization Method" + type: "object" + default: "api_token" + description: "Zendesk service provides 2 auth method: API token and oAuth2.\ + \ Now only the first one is available. Another one will be added in the\ + \ future" + oneOf: + - title: "API Token" + type: "object" + required: + - "email" + - "api_token" + additionalProperties: false + properties: + auth_method: + type: "string" + const: "api_token" + email: + type: "string" + description: "The user email for your Zendesk account" + api_token: + type: "string" + description: "The value of the API token generated. See the docs\ + \ for more information" + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-zendesk-talk:0.1.2" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/zendesk-talk" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Zendesk Talk Spec" + type: "object" + required: + - "start_date" + - "subdomain" + - "access_token" + - "email" + additionalProperties: false + properties: + start_date: + type: "string" + description: "The date from which you'd like to replicate data for Zendesk\ + \ Talk API, in the format YYYY-MM-DDT00:00:00Z." + examples: + - "2021-04-01T00:00:00Z" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + subdomain: + type: "string" + description: "The subdomain for your Zendesk Talk" + access_token: + type: "string" + description: "The value of the API token generated. See the docs for more information" + airbyte_secret: true + email: + type: "string" + description: "The user email for your Zendesk account" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-sentry:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/sentry" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Sentry Spec" + type: "object" + required: + - "auth_token" + - "organization" + - "project" + additionalProperties: false + properties: + auth_token: + type: "string" + title: "Authentication tokens" + description: "Log into Sentry and then create authentication tokens.For self-hosted, you can find or create\ + \ authentication tokens by visiting \"{instance_url_prefix}/settings/account/api/auth-tokens/\"" + airbyte_secret: true + hostname: + type: "string" + title: "Host Name" + description: "Host name of Sentry API server.For self-hosted, specify your\ + \ host name here. Otherwise, leave it empty." + default: "sentry.io" + organization: + type: "string" + title: "Organization" + description: "The slug of the organization the groups belong to." + project: + type: "string" + title: "Project" + description: "The slug of the project the groups belong to." + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-zoom-singer:0.2.4" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/zoom" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Source Zoom Singer Spec" + type: "object" + required: + - "jwt" + additionalProperties: false + properties: + jwt: + title: "JWT Token" + type: "string" + description: "Zoom JWT Token. See the docs for more information on how to obtain this key." + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-zuora:0.1.3" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/zuora" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Zuora Connector Configuration" + type: "object" + required: + - "start_date" + - "tenant_endpoint" + - "data_query" + - "client_id" + - "client_secret" + properties: + start_date: + type: "string" + title: "Start Date" + description: "Start Date in format: YYYY-MM-DD" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" + window_in_days: + type: "string" + title: "Query Window (in days)" + description: "The amount of days for each data-chunk begining from start_date.\ + \ Bigger the value - faster the fetch. (0.1 - as for couple of hours,\ + \ 1 - as for a Day; 364 - as for a Year)." + examples: + - "0.5" + - "1" + - "30" + - "60" + - "90" + - "120" + - "200" + - "364" + pattern: "^(0|[1-9]\\d*)(\\.\\d+)?$" + default: "90" + tenant_endpoint: + title: "Tenant Endpoint Location" + type: "string" + description: "Please choose the right endpoint where your Tenant is located.\ + \ More info by this Link" + enum: + - "US Production" + - "US Cloud Production" + - "US API Sandbox" + - "US Cloud API Sandbox" + - "US Central Sandbox" + - "US Performance Test" + - "EU Production" + - "EU API Sandbox" + - "EU Central Sandbox" + data_query: + title: "Data Query Type" + type: "string" + description: "Choose between `Live`, or `Unlimited` - the optimized, replicated\ + \ database at 12 hours freshness for high volume extraction Link" + enum: + - "Live" + - "Unlimited" + default: "Live" + client_id: + type: "string" + title: "Client ID" + description: "Your OAuth user Client ID" + airbyte_secret: true + client_secret: + type: "string" + title: "Client Secret" + description: "Your OAuth user Client Secret" + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] diff --git a/airbyte-config/persistence/src/test/java/io/airbyte/config/persistence/YamlSeedConfigPersistenceTest.java b/airbyte-config/init/src/test/java/io/airbyte/config/init/YamlSeedConfigPersistenceTest.java similarity index 86% rename from airbyte-config/persistence/src/test/java/io/airbyte/config/persistence/YamlSeedConfigPersistenceTest.java rename to airbyte-config/init/src/test/java/io/airbyte/config/init/YamlSeedConfigPersistenceTest.java index 8a740ba535688..57090570f3e21 100644 --- a/airbyte-config/persistence/src/test/java/io/airbyte/config/persistence/YamlSeedConfigPersistenceTest.java +++ b/airbyte-config/init/src/test/java/io/airbyte/config/init/YamlSeedConfigPersistenceTest.java @@ -2,7 +2,7 @@ * Copyright (c) 2021 Airbyte, Inc., all rights reserved. */ -package io.airbyte.config.persistence; +package io.airbyte.config.init; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; @@ -14,7 +14,9 @@ import io.airbyte.config.StandardSourceDefinition; import io.airbyte.config.StandardSync; import io.airbyte.config.StandardWorkspace; +import io.airbyte.config.persistence.ConfigNotFoundException; import java.io.IOException; +import java.net.URI; import java.util.Collections; import java.util.Map; import java.util.stream.Stream; @@ -41,6 +43,7 @@ public void testGetConfig() throws Exception { assertEquals("airbyte/source-mysql", mysqlSource.getDockerRepository()); assertEquals("https://docs.airbyte.io/integrations/sources/mysql", mysqlSource.getDocumentationUrl()); assertEquals("mysql.svg", mysqlSource.getIcon()); + assertEquals(URI.create("https://docs.airbyte.io/integrations/sources/mysql"), mysqlSource.getSpec().getDocumentationUrl()); // destination final String s3DestinationId = "4816b78f-1489-44c1-9060-4b19d5fa9362"; @@ -50,13 +53,16 @@ public void testGetConfig() throws Exception { assertEquals("S3", s3Destination.getName()); assertEquals("airbyte/destination-s3", s3Destination.getDockerRepository()); assertEquals("https://docs.airbyte.io/integrations/destinations/s3", s3Destination.getDocumentationUrl()); + assertEquals(URI.create("https://docs.airbyte.io/integrations/destinations/s3"), s3Destination.getSpec().getDocumentationUrl()); } @Test public void testGetInvalidConfig() { - assertThrows(UnsupportedOperationException.class, + assertThrows( + UnsupportedOperationException.class, () -> PERSISTENCE.getConfig(ConfigSchema.STANDARD_SYNC, "invalid_id", StandardSync.class)); - assertThrows(ConfigNotFoundException.class, + assertThrows( + ConfigNotFoundException.class, () -> PERSISTENCE.getConfig(ConfigSchema.STANDARD_SOURCE_DEFINITION, "invalid_id", StandardWorkspace.class)); } diff --git a/airbyte-config/models/build.gradle b/airbyte-config/models/build.gradle index 271b3fe685e7e..d62c88c7e1633 100644 --- a/airbyte-config/models/build.gradle +++ b/airbyte-config/models/build.gradle @@ -7,10 +7,11 @@ plugins { dependencies { implementation project(':airbyte-json-validation') implementation project(':airbyte-protocol:models') + implementation project(':airbyte-commons') } jsonSchema2Pojo { - sourceType = SourceType.YAMLSCHEMA + sourceType = SourceType.YAMLSCHEMA source = files("${sourceSets.main.output.resourcesDir}/types") targetDirectory = new File(project.buildDir, 'generated/src/gen/java/') diff --git a/airbyte-config/models/src/main/java/io/airbyte/config/Configs.java b/airbyte-config/models/src/main/java/io/airbyte/config/Configs.java index 99b1fc3709159..6d3804ad9b20f 100644 --- a/airbyte-config/models/src/main/java/io/airbyte/config/Configs.java +++ b/airbyte-config/models/src/main/java/io/airbyte/config/Configs.java @@ -5,6 +5,7 @@ package io.airbyte.config; import io.airbyte.commons.version.AirbyteVersion; +import io.airbyte.config.helpers.LogConfigs; import java.nio.file.Path; import java.util.List; import java.util.Map; @@ -96,6 +97,8 @@ public interface Configs { String getMemoryLimit(); // Logging + LogConfigs getLogConfigs(); + String getS3LogBucket(); String getS3LogBucketRegion(); diff --git a/airbyte-config/models/src/main/java/io/airbyte/config/EnvConfigs.java b/airbyte-config/models/src/main/java/io/airbyte/config/EnvConfigs.java index cb61f7710eb8b..3950fab5d432b 100644 --- a/airbyte-config/models/src/main/java/io/airbyte/config/EnvConfigs.java +++ b/airbyte-config/models/src/main/java/io/airbyte/config/EnvConfigs.java @@ -9,6 +9,8 @@ import com.google.common.base.Strings; import io.airbyte.commons.version.AirbyteVersion; import io.airbyte.config.helpers.LogClientSingleton; +import io.airbyte.config.helpers.LogConfigs; +import io.airbyte.config.helpers.LogConfiguration; import java.nio.file.Path; import java.util.Arrays; import java.util.HashSet; @@ -93,6 +95,7 @@ public class EnvConfigs implements Configs { public static final String DEFAULT_NETWORK = "host"; private final Function getEnv; + private LogConfiguration logConfiguration; public EnvConfigs() { this(System::getenv); @@ -100,6 +103,14 @@ public EnvConfigs() { EnvConfigs(final Function getEnv) { this.getEnv = getEnv; + this.logConfiguration = new LogConfiguration( + getEnvOrDefault(LogClientSingleton.S3_LOG_BUCKET, ""), + getEnvOrDefault(LogClientSingleton.S3_LOG_BUCKET_REGION, ""), + getEnvOrDefault(LogClientSingleton.AWS_ACCESS_KEY_ID, ""), + getEnvOrDefault(LogClientSingleton.AWS_SECRET_ACCESS_KEY, ""), + getEnvOrDefault(LogClientSingleton.S3_MINIO_ENDPOINT, ""), + getEnvOrDefault(LogClientSingleton.GCP_STORAGE_BUCKET, ""), + getEnvOrDefault(LogClientSingleton.GOOGLE_APPLICATION_CREDENTIALS, "")); } @Override @@ -289,9 +300,11 @@ public String getJobImagePullPolicy() { /** * Returns worker pod tolerations parsed from its own environment variable. The value of the env is * a string that represents one or more tolerations. + *
    *
  • Tolerations are separated by a `;` *
  • Each toleration contains k=v pairs mentioning some/all of key, effect, operator and value and * separated by `,` + *
*

* For example:- The following represents two tolerations, one checking existence and another * matching a value @@ -399,37 +412,41 @@ public String getJobsImagePullSecret() { @Override public String getS3LogBucket() { - return getEnvOrDefault(LogClientSingleton.S3_LOG_BUCKET, ""); + return logConfiguration.getS3LogBucket(); } @Override public String getS3LogBucketRegion() { - return getEnvOrDefault(LogClientSingleton.S3_LOG_BUCKET_REGION, ""); + return logConfiguration.getS3LogBucketRegion(); } @Override public String getAwsAccessKey() { - return getEnvOrDefault(LogClientSingleton.AWS_ACCESS_KEY_ID, ""); + return logConfiguration.getAwsAccessKey(); } @Override public String getAwsSecretAccessKey() { - return getEnvOrDefault(LogClientSingleton.AWS_SECRET_ACCESS_KEY, ""); + return logConfiguration.getAwsSecretAccessKey(); } @Override public String getS3MinioEndpoint() { - return getEnvOrDefault(LogClientSingleton.S3_MINIO_ENDPOINT, ""); + return logConfiguration.getS3MinioEndpoint(); } @Override public String getGcpStorageBucket() { - return getEnvOrDefault(LogClientSingleton.GCP_STORAGE_BUCKET, ""); + return logConfiguration.getGcpStorageBucket(); } @Override public String getGoogleApplicationCredentials() { - return getEnvOrDefault(LogClientSingleton.GOOGLE_APPLICATION_CREDENTIALS, ""); + return logConfiguration.getGoogleApplicationCredentials(); + } + + public LogConfigs getLogConfigs() { + return logConfiguration; } @Override @@ -443,7 +460,7 @@ public SecretPersistenceType getSecretPersistenceType() { return SecretPersistenceType.valueOf(secretPersistenceStr); } - private String getEnvOrDefault(final String key, final String defaultValue) { + protected String getEnvOrDefault(final String key, final String defaultValue) { return getEnvOrDefault(key, defaultValue, Function.identity(), false); } diff --git a/airbyte-config/models/src/main/java/io/airbyte/config/helpers/GcsLogs.java b/airbyte-config/models/src/main/java/io/airbyte/config/helpers/GcsLogs.java index e248982892d28..5a0787366d46a 100644 --- a/airbyte-config/models/src/main/java/io/airbyte/config/helpers/GcsLogs.java +++ b/airbyte-config/models/src/main/java/io/airbyte/config/helpers/GcsLogs.java @@ -135,7 +135,7 @@ public static void main(final String[] args) throws IOException { blob.downloadTo(os); } os.close(); - final var data = new GcsLogs().tailCloudLog(new LogConfigDelegator(new EnvConfigs()), "tail", 6); + final var data = new GcsLogs().tailCloudLog((new EnvConfigs()).getLogConfigs(), "tail", 6); System.out.println(data); } diff --git a/airbyte-config/models/src/main/java/io/airbyte/config/helpers/LogClientSingleton.java b/airbyte-config/models/src/main/java/io/airbyte/config/helpers/LogClientSingleton.java index 11924e72e31a0..c393017121d39 100644 --- a/airbyte-config/models/src/main/java/io/airbyte/config/helpers/LogClientSingleton.java +++ b/airbyte-config/models/src/main/java/io/airbyte/config/helpers/LogClientSingleton.java @@ -8,7 +8,6 @@ import io.airbyte.commons.io.IOs; import io.airbyte.config.Configs; import io.airbyte.config.Configs.WorkerEnvironment; -import io.airbyte.config.EnvConfigs; import java.io.File; import java.io.IOException; import java.nio.file.Path; @@ -19,11 +18,10 @@ import org.slf4j.LoggerFactory; import org.slf4j.MDC; -// todo (cgardens) - make this an actual singleton so we can write tests and mock the components. /** * Airbyte's logging layer entrypoint. Handles logs written to local disk as well as logs written to * cloud storages. - * + *

* Although the configuration is passed in as {@link Configs}, it is transformed to * {@link LogConfigs} within this class. Beyond this class, all configuration consumption is via the * {@link LogConfigs} interface via the {@link CloudLogs} interface. @@ -31,51 +29,56 @@ public class LogClientSingleton { private static final Logger LOGGER = LoggerFactory.getLogger(LogClientSingleton.class); + private static LogClientSingleton instance; @VisibleForTesting - static final int LOG_TAIL_SIZE = 1000000; + final static int LOG_TAIL_SIZE = 1000000; @VisibleForTesting - static CloudLogs logClient; + CloudLogs logClient; // Any changes to the following values must also be propagated to the log4j2.xml in main/resources. - public static String WORKSPACE_MDC_KEY = "workspace_app_root"; - public static String CLOUD_WORKSPACE_MDC_KEY = "cloud_workspace_app_root"; + public static final String WORKSPACE_MDC_KEY = "workspace_app_root"; + public static final String CLOUD_WORKSPACE_MDC_KEY = "cloud_workspace_app_root"; - public static String JOB_LOG_PATH_MDC_KEY = "job_log_path"; - public static String CLOUD_JOB_LOG_PATH_MDC_KEY = "cloud_job_log_path"; + public static final String JOB_LOG_PATH_MDC_KEY = "job_log_path"; + public static final String CLOUD_JOB_LOG_PATH_MDC_KEY = "cloud_job_log_path"; // S3/Minio - public static String S3_LOG_BUCKET = "S3_LOG_BUCKET"; - public static String S3_LOG_BUCKET_REGION = "S3_LOG_BUCKET_REGION"; - public static String AWS_ACCESS_KEY_ID = "AWS_ACCESS_KEY_ID"; - public static String AWS_SECRET_ACCESS_KEY = "AWS_SECRET_ACCESS_KEY"; - public static String S3_MINIO_ENDPOINT = "S3_MINIO_ENDPOINT"; + public static final String S3_LOG_BUCKET = "S3_LOG_BUCKET"; + public static final String S3_LOG_BUCKET_REGION = "S3_LOG_BUCKET_REGION"; + public static final String AWS_ACCESS_KEY_ID = "AWS_ACCESS_KEY_ID"; + public static final String AWS_SECRET_ACCESS_KEY = "AWS_SECRET_ACCESS_KEY"; + public static final String S3_MINIO_ENDPOINT = "S3_MINIO_ENDPOINT"; // GCS - public static String GCP_STORAGE_BUCKET = "GCP_STORAGE_BUCKET"; - public static String GOOGLE_APPLICATION_CREDENTIALS = "GOOGLE_APPLICATION_CREDENTIALS"; + public static final String GCP_STORAGE_BUCKET = "GCP_STORAGE_BUCKET"; + public static final String GOOGLE_APPLICATION_CREDENTIALS = "GOOGLE_APPLICATION_CREDENTIALS"; - public static int DEFAULT_PAGE_SIZE = 1000; - public static String LOG_FILENAME = "logs.log"; - public static String APP_LOGGING_CLOUD_PREFIX = "app-logging"; - public static String JOB_LOGGING_CLOUD_PREFIX = "job-logging"; + public static final int DEFAULT_PAGE_SIZE = 1000; + public static final String LOG_FILENAME = "logs.log"; + public static final String APP_LOGGING_CLOUD_PREFIX = "app-logging"; + public static final String JOB_LOGGING_CLOUD_PREFIX = "job-logging"; - public static Path getServerLogsRoot(final Configs configs) { - return configs.getWorkspaceRoot().resolve("server/logs"); + public static synchronized LogClientSingleton getInstance() { + if (instance == null) { + instance = new LogClientSingleton(); + } + return instance; } - public static Path getSchedulerLogsRoot(final Configs configs) { - return configs.getWorkspaceRoot().resolve("scheduler/logs"); + public Path getServerLogsRoot(final Path workspaceRoot) { + return workspaceRoot.resolve("server/logs"); } - public static File getServerLogFile(final Configs configs) { - final var logPathBase = getServerLogsRoot(configs); - if (shouldUseLocalLogs(configs.getWorkerEnvironment())) { - return logPathBase.resolve(LOG_FILENAME).toFile(); - } + public Path getSchedulerLogsRoot(final Path workspaceRoot) { + return workspaceRoot.resolve("scheduler/logs"); + } - final var logConfigs = new LogConfigDelegator(configs); - final var cloudLogPath = APP_LOGGING_CLOUD_PREFIX + logPathBase; + public File getServerLogFile(final Path workspaceRoot, final WorkerEnvironment workerEnvironment, final LogConfigs logConfigs) { + if (shouldUseLocalLogs(workerEnvironment)) { + return getServerLogsRoot(workspaceRoot).resolve(LOG_FILENAME).toFile(); + } + final var cloudLogPath = APP_LOGGING_CLOUD_PREFIX + getServerLogsRoot(workspaceRoot); try { return logClient.downloadCloudLog(logConfigs, cloudLogPath); } catch (final IOException e) { @@ -83,14 +86,12 @@ public static File getServerLogFile(final Configs configs) { } } - public static File getSchedulerLogFile(final Configs configs) { - final var logPathBase = getSchedulerLogsRoot(configs); - if (shouldUseLocalLogs(configs.getWorkerEnvironment())) { - return logPathBase.resolve(LOG_FILENAME).toFile(); + public File getSchedulerLogFile(final Path workspaceRoot, final WorkerEnvironment workerEnvironment, final LogConfigs logConfigs) { + if (shouldUseLocalLogs(workerEnvironment)) { + return getSchedulerLogsRoot(workspaceRoot).resolve(LOG_FILENAME).toFile(); } - final var logConfigs = new LogConfigDelegator(configs); - final var cloudLogPath = APP_LOGGING_CLOUD_PREFIX + logPathBase; + final var cloudLogPath = APP_LOGGING_CLOUD_PREFIX + getSchedulerLogsRoot(workspaceRoot); try { return logClient.downloadCloudLog(logConfigs, cloudLogPath); } catch (final IOException e) { @@ -98,16 +99,15 @@ public static File getSchedulerLogFile(final Configs configs) { } } - public static List getJobLogFile(final Configs configs, final Path logPath) throws IOException { + public List getJobLogFile(final WorkerEnvironment workerEnvironment, final LogConfigs logConfigs, final Path logPath) throws IOException { if (logPath == null || logPath.equals(Path.of(""))) { return Collections.emptyList(); } - if (shouldUseLocalLogs(configs.getWorkerEnvironment())) { + if (shouldUseLocalLogs(workerEnvironment)) { return IOs.getTail(LOG_TAIL_SIZE, logPath); } - final var logConfigs = new LogConfigDelegator(configs); final var cloudLogPath = JOB_LOGGING_CLOUD_PREFIX + logPath; return logClient.tailCloudLog(logConfigs, cloudLogPath, LOG_TAIL_SIZE); } @@ -116,52 +116,47 @@ public static List getJobLogFile(final Configs configs, final Path logPa * Primarily to clean up logs after testing. Only valid for Kube logs. */ @VisibleForTesting - public static void deleteLogs(final Configs configs, final String logPath) { + public void deleteLogs(final WorkerEnvironment workerEnvironment, final LogConfigs logConfigs, final String logPath) { if (logPath == null || logPath.equals(Path.of(""))) { return; } - if (shouldUseLocalLogs(configs.getWorkerEnvironment())) { + if (shouldUseLocalLogs(workerEnvironment)) { throw new NotImplementedException("Local log deletes not supported."); } - final var logConfigs = new LogConfigDelegator(configs); final var cloudLogPath = JOB_LOGGING_CLOUD_PREFIX + logPath; logClient.deleteLogs(logConfigs, cloudLogPath); } - public static void setJobMdc(final Path path) { - // setJobMdc is referenced from TemporalAttemptExecution without input parameters, so hard to pass - // this in. - final Configs configs = new EnvConfigs(); - if (shouldUseLocalLogs(configs.getWorkerEnvironment())) { + public void setJobMdc(final WorkerEnvironment workerEnvironment, final LogConfigs logConfigs, final Path path) { + if (shouldUseLocalLogs(workerEnvironment)) { LOGGER.debug("Setting docker job mdc"); MDC.put(LogClientSingleton.JOB_LOG_PATH_MDC_KEY, path.resolve(LogClientSingleton.LOG_FILENAME).toString()); } else { LOGGER.debug("Setting kube job mdc"); - final var logConfigs = new LogConfigDelegator(configs); createCloudClientIfNull(logConfigs); MDC.put(LogClientSingleton.CLOUD_JOB_LOG_PATH_MDC_KEY, path.resolve(LogClientSingleton.LOG_FILENAME).toString()); } } - public static void setWorkspaceMdc(final Path path) { - final var configs = new EnvConfigs(); - if (shouldUseLocalLogs(configs.getWorkerEnvironment())) { + public void setWorkspaceMdc(final WorkerEnvironment workerEnvironment, final LogConfigs logConfigs, final Path path) { + if (shouldUseLocalLogs(workerEnvironment)) { LOGGER.debug("Setting docker workspace mdc"); MDC.put(LogClientSingleton.WORKSPACE_MDC_KEY, path.toString()); } else { LOGGER.debug("Setting kube workspace mdc"); - final var logConfigs = new LogConfigDelegator(configs); createCloudClientIfNull(logConfigs); MDC.put(LogClientSingleton.CLOUD_WORKSPACE_MDC_KEY, path.toString()); } } + // This method should cease to exist here and become a property on the enum instead + // TODO handle this as part of refactor https://github.com/airbytehq/airbyte/issues/7545 private static boolean shouldUseLocalLogs(final WorkerEnvironment workerEnvironment) { return workerEnvironment.equals(WorkerEnvironment.DOCKER); } - private static void createCloudClientIfNull(final LogConfigs configs) { + private void createCloudClientIfNull(final LogConfigs configs) { if (logClient == null) { logClient = CloudLogs.createCloudLogClient(configs); } diff --git a/airbyte-config/models/src/main/java/io/airbyte/config/helpers/LogConfigDelegator.java b/airbyte-config/models/src/main/java/io/airbyte/config/helpers/LogConfigDelegator.java deleted file mode 100644 index 18f194d283d18..0000000000000 --- a/airbyte-config/models/src/main/java/io/airbyte/config/helpers/LogConfigDelegator.java +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2021 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.config.helpers; - -import io.airbyte.config.Configs; - -/** - * Implements {@link LogConfigs} by delegating to a {@link Configs} implementation. Because the - * logging configuration overlaps with other configuration, this delegation is intended to avoid - * multiple configurations existing at once. - */ -public class LogConfigDelegator implements LogConfigs { - - private final Configs delegate; - - public LogConfigDelegator(final Configs configs) { - delegate = configs; - } - - @Override - public String getS3LogBucket() { - return delegate.getS3LogBucket(); - } - - @Override - public String getS3LogBucketRegion() { - return delegate.getS3LogBucketRegion(); - } - - @Override - public String getAwsAccessKey() { - return delegate.getAwsAccessKey(); - } - - @Override - public String getAwsSecretAccessKey() { - return delegate.getAwsSecretAccessKey(); - } - - @Override - public String getS3MinioEndpoint() { - return delegate.getS3MinioEndpoint(); - } - - @Override - public String getGcpStorageBucket() { - return delegate.getGcpStorageBucket(); - } - - @Override - public String getGoogleApplicationCredentials() { - return delegate.getGoogleApplicationCredentials(); - } - -} diff --git a/airbyte-config/models/src/main/java/io/airbyte/config/helpers/LogConfigs.java b/airbyte-config/models/src/main/java/io/airbyte/config/helpers/LogConfigs.java index 5acaadd83bace..345e21a5cd045 100644 --- a/airbyte-config/models/src/main/java/io/airbyte/config/helpers/LogConfigs.java +++ b/airbyte-config/models/src/main/java/io/airbyte/config/helpers/LogConfigs.java @@ -6,8 +6,8 @@ /** * Configuration required to retrieve logs. This is a subset of the methods defined in - * {@link io.airbyte.config.Configs} so actual look up can be delegated in - * {@link LogConfigDelegator}. This prevents conflicting configuration existing at once. + * {@link io.airbyte.config.Configs} so actual look up can be delegated in {@link LogConfiguration}. + * This prevents conflicting configuration existing at once. */ public interface LogConfigs { diff --git a/airbyte-config/models/src/main/java/io/airbyte/config/helpers/LogConfiguration.java b/airbyte-config/models/src/main/java/io/airbyte/config/helpers/LogConfiguration.java new file mode 100644 index 0000000000000..85903e2c4100c --- /dev/null +++ b/airbyte-config/models/src/main/java/io/airbyte/config/helpers/LogConfiguration.java @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.config.helpers; + +/** + * Implements {@link LogConfigs} with immutable values. Because the logging configuration overlaps + * with other configuration, this delegation is intended to avoid multiple configurations existing + * at once. + */ +public class LogConfiguration implements LogConfigs { + + public final static LogConfiguration EMPTY = new LogConfiguration("", "", "", "", "", "", ""); + + private final String s3LogBucket; + private final String s3LogBucketRegion; + private final String awsAccessKey; + private final String awsSecretAccessKey; + private final String s3MinioEndpoint; + private final String gcpStorageBucket; + private final String googleApplicationCredentials; + + public LogConfiguration(final String s3LogBucket, + final String s3LogBucketRegion, + final String awsAccessKey, + final String awsSecretAccessKey, + final String s3MinioEndpoint, + final String gcpStorageBucket, + final String googleApplicationCredentials) { + this.s3LogBucket = s3LogBucket; + this.s3LogBucketRegion = s3LogBucketRegion; + this.awsAccessKey = awsAccessKey; + this.awsSecretAccessKey = awsSecretAccessKey; + this.s3MinioEndpoint = s3MinioEndpoint; + this.gcpStorageBucket = gcpStorageBucket; + this.googleApplicationCredentials = googleApplicationCredentials; + } + + @Override + public String getS3LogBucket() { + return s3LogBucket; + } + + @Override + public String getS3LogBucketRegion() { + return s3LogBucketRegion; + } + + @Override + public String getAwsAccessKey() { + return awsAccessKey; + } + + @Override + public String getAwsSecretAccessKey() { + return awsSecretAccessKey; + } + + @Override + public String getS3MinioEndpoint() { + return s3MinioEndpoint; + } + + @Override + public String getGcpStorageBucket() { + return gcpStorageBucket; + } + + @Override + public String getGoogleApplicationCredentials() { + return googleApplicationCredentials; + } + +} diff --git a/airbyte-config/models/src/main/resources/types/DockerImageSpec.yaml b/airbyte-config/models/src/main/resources/types/DockerImageSpec.yaml new file mode 100644 index 0000000000000..0d3becf8e74c6 --- /dev/null +++ b/airbyte-config/models/src/main/resources/types/DockerImageSpec.yaml @@ -0,0 +1,16 @@ +--- +"$schema": http://json-schema.org/draft-07/schema# +"$id": https://github.com/airbytehq/airbyte/blob/master/airbyte-config/models/src/main/resources/types/DockerImageSpec.yaml +title: DockerImageSpec +description: docker image name and the connector specification associated with it +type: object +required: + - dockerImage + - spec +additionalProperties: false +properties: + dockerImage: + type: string + spec: + type: object + existingJavaType: io.airbyte.protocol.models.ConnectorSpecification diff --git a/airbyte-config/models/src/test/java/io/airbyte/config/helpers/GcsLogsTest.java b/airbyte-config/models/src/test/java/io/airbyte/config/helpers/GcsLogsTest.java index f353c166716c0..b59774795442f 100644 --- a/airbyte-config/models/src/test/java/io/airbyte/config/helpers/GcsLogsTest.java +++ b/airbyte-config/models/src/test/java/io/airbyte/config/helpers/GcsLogsTest.java @@ -10,6 +10,7 @@ import static org.mockito.Mockito.when; import io.airbyte.config.EnvConfigs; +import java.io.File; import java.io.IOException; import java.nio.file.Files; import java.util.ArrayList; @@ -36,8 +37,7 @@ public void testMissingConfiguration() { */ @Test public void testRetrieveAllLogs() throws IOException { - final var configs = new LogConfigDelegator(new EnvConfigs()); - final var data = GcsLogs.getFile(configs, "paginate", 6); + final File data = GcsLogs.getFile((new EnvConfigs()).getLogConfigs(), "paginate", 6); final var retrieved = new ArrayList(); Files.lines(data.toPath()).forEach(retrieved::add); @@ -56,8 +56,7 @@ public void testRetrieveAllLogs() throws IOException { */ @Test public void testTail() throws IOException { - final var configs = new LogConfigDelegator(new EnvConfigs()); - final var data = new GcsLogs().tailCloudLog(configs, "tail", 6); + final var data = new GcsLogs().tailCloudLog((new EnvConfigs()).getLogConfigs(), "tail", 6); final var expected = List.of("Line 4", "Line 5", "Line 6", "Line 7", "Line 8", "Line 9"); assertEquals(data, expected); diff --git a/airbyte-config/models/src/test/java/io/airbyte/config/helpers/KubeLoggingConfigTest.java b/airbyte-config/models/src/test/java/io/airbyte/config/helpers/KubeLoggingConfigTest.java index 8d2477852e33b..726d235dcf40e 100644 --- a/airbyte-config/models/src/test/java/io/airbyte/config/helpers/KubeLoggingConfigTest.java +++ b/airbyte-config/models/src/test/java/io/airbyte/config/helpers/KubeLoggingConfigTest.java @@ -31,7 +31,8 @@ public class KubeLoggingConfigTest { public void cleanUpLogs() { if (logPath != null) { try { - LogClientSingleton.deleteLogs(new EnvConfigs(), logPath); + final EnvConfigs envConfigs = new EnvConfigs(); + LogClientSingleton.getInstance().deleteLogs(envConfigs.getWorkerEnvironment(), envConfigs.getLogConfigs(), logPath); } catch (final Exception e) { // Ignore Minio delete error. } @@ -47,9 +48,10 @@ public void cleanUpLogs() { */ @Test public void testLoggingConfiguration() throws IOException, InterruptedException { + final EnvConfigs envConfigs = new EnvConfigs(); final var randPath = Strings.addRandomSuffix("-", "", 5); // This mirrors our Log4j2 set up. See log4j2.xml. - LogClientSingleton.setJobMdc(Path.of(randPath)); + LogClientSingleton.getInstance().setJobMdc(envConfigs.getWorkerEnvironment(), envConfigs.getLogConfigs(), Path.of(randPath)); final var toLog = List.of("line 1", "line 2", "line 3"); for (final String l : toLog) { @@ -64,7 +66,7 @@ public void testLoggingConfiguration() throws IOException, InterruptedException logPath = randPath + "/logs.log/"; // The same env vars that log4j2 uses to determine where to publish to determine how to retrieve the // log file. - final var logs = LogClientSingleton.getJobLogFile(new EnvConfigs(), Path.of(logPath)); + final var logs = LogClientSingleton.getInstance().getJobLogFile(envConfigs.getWorkerEnvironment(), envConfigs.getLogConfigs(), Path.of(logPath)); // Each log line is of the form . Further, there might be // other log lines from the system running. Join all the lines to simplify assertions. final var logsLine = Strings.join(logs, " "); diff --git a/airbyte-config/models/src/test/java/io/airbyte/config/helpers/LogClientSingletonTest.java b/airbyte-config/models/src/test/java/io/airbyte/config/helpers/LogClientSingletonTest.java index c1d60e8955e72..2d5de01af56b5 100644 --- a/airbyte-config/models/src/test/java/io/airbyte/config/helpers/LogClientSingletonTest.java +++ b/airbyte-config/models/src/test/java/io/airbyte/config/helpers/LogClientSingletonTest.java @@ -29,25 +29,28 @@ class LogClientSingletonTest { void setup() { configs = mock(Configs.class); mockLogClient = mock(CloudLogs.class); - LogClientSingleton.logClient = mockLogClient; + LogClientSingleton.getInstance().logClient = mockLogClient; } @Test void testGetJobLogFileK8s() throws IOException { when(configs.getWorkerEnvironment()).thenReturn(WorkerEnvironment.KUBERNETES); - assertEquals(Collections.emptyList(), LogClientSingleton.getJobLogFile(configs, Path.of("/job/1"))); + assertEquals(Collections.emptyList(), + LogClientSingleton.getInstance().getJobLogFile(configs.getWorkerEnvironment(), configs.getLogConfigs(), Path.of("/job/1"))); verify(mockLogClient).tailCloudLog(any(), eq("job-logging/job/1"), eq(LogClientSingleton.LOG_TAIL_SIZE)); } @Test void testGetJobLogFileNullPath() throws IOException { - assertEquals(Collections.emptyList(), LogClientSingleton.getJobLogFile(configs, null)); + assertEquals(Collections.emptyList(), + LogClientSingleton.getInstance().getJobLogFile(configs.getWorkerEnvironment(), configs.getLogConfigs(), null)); verifyNoInteractions(mockLogClient); } @Test void testGetJobLogFileEmptyPath() throws IOException { - assertEquals(Collections.emptyList(), LogClientSingleton.getJobLogFile(configs, Path.of(""))); + assertEquals(Collections.emptyList(), + LogClientSingleton.getInstance().getJobLogFile(configs.getWorkerEnvironment(), configs.getLogConfigs(), Path.of(""))); verifyNoInteractions(mockLogClient); } diff --git a/airbyte-config/models/src/test/java/io/airbyte/config/helpers/S3LogsTest.java b/airbyte-config/models/src/test/java/io/airbyte/config/helpers/S3LogsTest.java index 74959a55be01f..07d47af19fdba 100644 --- a/airbyte-config/models/src/test/java/io/airbyte/config/helpers/S3LogsTest.java +++ b/airbyte-config/models/src/test/java/io/airbyte/config/helpers/S3LogsTest.java @@ -25,6 +25,8 @@ @Tag("logger-client") public class S3LogsTest { + private static final LogConfigs logConfigs = (new EnvConfigs()).getLogConfigs(); + @Test public void testMissingCredentials() { final var configs = mock(LogConfigs.class); @@ -41,8 +43,7 @@ public void testMissingCredentials() { */ @Test public void testRetrieveAllLogs() throws IOException { - final var configs = new LogConfigDelegator(new EnvConfigs()); - final var data = S3Logs.getFile(configs, "paginate", 6); + final var data = S3Logs.getFile(logConfigs, "paginate", 6); final var retrieved = new ArrayList(); Files.lines(data.toPath()).forEach(retrieved::add); @@ -61,9 +62,7 @@ public void testRetrieveAllLogs() throws IOException { */ @Test public void testTail() throws IOException { - final var configs = new LogConfigDelegator(new EnvConfigs()); - final var data = new S3Logs().tailCloudLog(configs, "tail", 6); - + final var data = new S3Logs().tailCloudLog(logConfigs, "tail", 6); final var expected = List.of("Line 4", "Line 5", "Line 6", "Line 7", "Line 8", "Line 9"); assertEquals(data, expected); } diff --git a/airbyte-config/persistence/build.gradle b/airbyte-config/persistence/build.gradle index 834f38596c98d..6b072911359db 100644 --- a/airbyte-config/persistence/build.gradle +++ b/airbyte-config/persistence/build.gradle @@ -11,7 +11,6 @@ dependencies { implementation project(':airbyte-db:jooq') implementation project(':airbyte-protocol:models') implementation project(':airbyte-config:models') - implementation project(':airbyte-config:init') implementation project(':airbyte-json-validation') implementation 'com.google.cloud:google-cloud-secretmanager:1.7.2' testImplementation "org.testcontainers:postgresql:1.15.3" diff --git a/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/ConfigRepository.java b/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/ConfigRepository.java index d7d47242df735..40c5edb5286b4 100644 --- a/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/ConfigRepository.java +++ b/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/ConfigRepository.java @@ -461,7 +461,7 @@ public void updateConnectionState(final UUID connectionId, final State state) th /** * Converts between a dumpConfig() output and a replaceAllConfigs() input, by deserializing the - * string/jsonnode into the AirbyteConfig, Stream + * string/jsonnode into the AirbyteConfig, Stream<Object<AirbyteConfig.getClassName()>> * * @param configurations from dumpConfig() * @return input suitable for replaceAllConfigs() diff --git a/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/DatabaseConfigPersistence.java b/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/DatabaseConfigPersistence.java index 19035ed42295d..2181bcb162640 100644 --- a/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/DatabaseConfigPersistence.java +++ b/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/DatabaseConfigPersistence.java @@ -366,7 +366,14 @@ ConnectorCounter updateConnectorDefinitions(final DSLContext ctx, final ConnectorInfo connectorInfo = connectorRepositoryToIdVersionMap.get(repository); final JsonNode currentDefinition = connectorInfo.definition; - final Set newFields = getNewFields(currentDefinition, latestDefinition); + + // todo (lmossman) - this logic to remove the "spec" field is temporary; it is necessary to avoid + // breaking users who are actively using an old connector version, otherwise specs from the most + // recent connector versions may be inserted into the db which could be incompatible with the + // version they are actually using. + // Once the faux major version bump has been merged, this "new field" logic will be removed + // entirely. + final Set newFields = Sets.difference(getNewFields(currentDefinition, latestDefinition), Set.of("spec")); // Process connector in use if (connectorRepositoriesInUse.contains(repository)) { diff --git a/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/split_secrets/ReadOnlySecretPersistence.java b/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/split_secrets/ReadOnlySecretPersistence.java index 15c4c2b9aff47..68a3cd5626116 100644 --- a/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/split_secrets/ReadOnlySecretPersistence.java +++ b/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/split_secrets/ReadOnlySecretPersistence.java @@ -8,7 +8,7 @@ /** * Provides a read-only interface to a backing secrets store similar to {@link SecretPersistence}. - * In practice, the functionality should be provided by a {@link SecretPersistence#read function. + * In practice, the functionality should be provided by a {@link SecretPersistence#read} function. */ @FunctionalInterface public interface ReadOnlySecretPersistence { diff --git a/airbyte-config/persistence/src/test-integration/java/io/airbyte/config/persistence/GoogleSecretManagerPersistenceIntegrationTest.java b/airbyte-config/persistence/src/test-integration/java/io/airbyte/config/persistence/GoogleSecretManagerPersistenceIntegrationTest.java index 21521f6881d64..e4dc563be31fd 100644 --- a/airbyte-config/persistence/src/test-integration/java/io/airbyte/config/persistence/GoogleSecretManagerPersistenceIntegrationTest.java +++ b/airbyte-config/persistence/src/test-integration/java/io/airbyte/config/persistence/GoogleSecretManagerPersistenceIntegrationTest.java @@ -9,6 +9,7 @@ import com.google.api.gax.rpc.NotFoundException; import com.google.cloud.secretmanager.v1.SecretName; +import io.airbyte.config.Configs; import io.airbyte.config.EnvConfigs; import io.airbyte.config.persistence.split_secrets.GoogleSecretManagerPersistence; import io.airbyte.config.persistence.split_secrets.SecretCoordinate; @@ -26,10 +27,10 @@ public class GoogleSecretManagerPersistenceIntegrationTest { private GoogleSecretManagerPersistence persistence; private String baseCoordinate; + private final Configs configs = new EnvConfigs(); @BeforeEach void setUp() { - final var configs = new EnvConfigs(); persistence = GoogleSecretManagerPersistence.getEphemeral( configs.getSecretStoreGcpProjectId(), configs.getSecretStoreGcpCredentials()); @@ -38,7 +39,6 @@ void setUp() { @AfterEach void tearDown() throws IOException { - final var configs = new EnvConfigs(); try (final var client = GoogleSecretManagerPersistence.getSecretManagerServiceClient(configs.getSecretStoreGcpCredentials())) { // try to delete this so we aren't charged for the secret // if this is missed due to some sort of failure the secret will be deleted after the ttl diff --git a/airbyte-config/persistence/src/test/java/io/airbyte/config/persistence/BaseDatabaseConfigPersistenceTest.java b/airbyte-config/persistence/src/test/java/io/airbyte/config/persistence/BaseDatabaseConfigPersistenceTest.java index 02091e35e0f0a..4dcbf918c9e6c 100644 --- a/airbyte-config/persistence/src/test/java/io/airbyte/config/persistence/BaseDatabaseConfigPersistenceTest.java +++ b/airbyte-config/persistence/src/test/java/io/airbyte/config/persistence/BaseDatabaseConfigPersistenceTest.java @@ -13,10 +13,12 @@ import io.airbyte.config.ConfigSchema; import io.airbyte.config.StandardDestinationDefinition; import io.airbyte.config.StandardSourceDefinition; +import io.airbyte.config.StandardSourceDefinition.SourceType; import io.airbyte.db.Database; import java.util.Map; import java.util.Map.Entry; import java.util.Set; +import java.util.UUID; import java.util.stream.Collectors; import java.util.stream.Stream; import org.jooq.Record1; @@ -48,26 +50,34 @@ public static void dbDown() { container.close(); } - protected static final StandardSourceDefinition SOURCE_GITHUB; - protected static final StandardSourceDefinition SOURCE_POSTGRES; - protected static final StandardDestinationDefinition DESTINATION_SNOWFLAKE; - protected static final StandardDestinationDefinition DESTINATION_S3; - - static { - try { - final ConfigPersistence seedPersistence = YamlSeedConfigPersistence.getDefault(); - SOURCE_GITHUB = seedPersistence - .getConfig(ConfigSchema.STANDARD_SOURCE_DEFINITION, "ef69ef6e-aa7f-4af1-a01d-ef775033524e", StandardSourceDefinition.class); - SOURCE_POSTGRES = seedPersistence - .getConfig(ConfigSchema.STANDARD_SOURCE_DEFINITION, "decd338e-5647-4c0b-adf4-da0e75f5a750", StandardSourceDefinition.class); - DESTINATION_SNOWFLAKE = seedPersistence - .getConfig(ConfigSchema.STANDARD_DESTINATION_DEFINITION, "424892c4-daac-4491-b35d-c6688ba547ba", StandardDestinationDefinition.class); - DESTINATION_S3 = seedPersistence - .getConfig(ConfigSchema.STANDARD_DESTINATION_DEFINITION, "4816b78f-1489-44c1-9060-4b19d5fa9362", StandardDestinationDefinition.class); - } catch (final Exception e) { - throw new RuntimeException(e); - } - } + protected static final StandardSourceDefinition SOURCE_GITHUB = new StandardSourceDefinition() + .withName("GitHub") + .withSourceDefinitionId(UUID.fromString("ef69ef6e-aa7f-4af1-a01d-ef775033524e")) + .withDockerRepository("airbyte/source-github") + .withDockerImageTag("0.2.3") + .withDocumentationUrl("https://docs.airbyte.io/integrations/sources/github") + .withIcon("github.svg") + .withSourceType(SourceType.API); + protected static final StandardSourceDefinition SOURCE_POSTGRES = new StandardSourceDefinition() + .withName("Postgres") + .withSourceDefinitionId(UUID.fromString("decd338e-5647-4c0b-adf4-da0e75f5a750")) + .withDockerRepository("airbyte/source-postgres") + .withDockerImageTag("0.3.11") + .withDocumentationUrl("https://docs.airbyte.io/integrations/sources/postgres") + .withIcon("postgresql.svg") + .withSourceType(SourceType.DATABASE); + protected static final StandardDestinationDefinition DESTINATION_SNOWFLAKE = new StandardDestinationDefinition() + .withName("Snowflake") + .withDestinationDefinitionId(UUID.fromString("424892c4-daac-4491-b35d-c6688ba547ba")) + .withDockerRepository("airbyte/destination-snowflake") + .withDockerImageTag("0.3.16") + .withDocumentationUrl("https://docs.airbyte.io/integrations/destinations/snowflake"); + protected static final StandardDestinationDefinition DESTINATION_S3 = new StandardDestinationDefinition() + .withName("S3") + .withDestinationDefinitionId(UUID.fromString("4816b78f-1489-44c1-9060-4b19d5fa9362")) + .withDockerRepository("airbyte/destination-s3") + .withDockerImageTag("0.1.12") + .withDocumentationUrl("https://docs.airbyte.io/integrations/destinations/s3"); protected static void writeSource(final ConfigPersistence configPersistence, final StandardSourceDefinition source) throws Exception { configPersistence.writeConfig(ConfigSchema.STANDARD_SOURCE_DEFINITION, source.getSourceDefinitionId().toString(), source); diff --git a/airbyte-config/persistence/src/test/java/io/airbyte/config/persistence/DatabaseConfigPersistenceLoadDataTest.java b/airbyte-config/persistence/src/test/java/io/airbyte/config/persistence/DatabaseConfigPersistenceLoadDataTest.java index 1a94209294d6b..9177115f48865 100644 --- a/airbyte-config/persistence/src/test/java/io/airbyte/config/persistence/DatabaseConfigPersistenceLoadDataTest.java +++ b/airbyte-config/persistence/src/test/java/io/airbyte/config/persistence/DatabaseConfigPersistenceLoadDataTest.java @@ -13,6 +13,7 @@ import static org.mockito.Mockito.when; import com.google.common.collect.Lists; +import io.airbyte.commons.json.Jsons; import io.airbyte.config.ConfigSchema; import io.airbyte.config.DestinationConnection; import io.airbyte.config.SourceConnection; @@ -80,14 +81,10 @@ public void testUpdateConfigsInNonEmptyDatabase() throws Exception { @DisplayName("When a connector is in use, its definition should not be updated") public void testNoUpdateForUsedConnector() throws Exception { // the seed has a newer version of s3 destination and github source - final StandardDestinationDefinition destinationS3V2 = YamlSeedConfigPersistence.getDefault() - .getConfig(ConfigSchema.STANDARD_DESTINATION_DEFINITION, "4816b78f-1489-44c1-9060-4b19d5fa9362", StandardDestinationDefinition.class) - .withDockerImageTag("10000.1.0"); + final StandardDestinationDefinition destinationS3V2 = Jsons.clone(DESTINATION_S3).withDockerImageTag("10000.1.0"); when(seedPersistence.listConfigs(ConfigSchema.STANDARD_DESTINATION_DEFINITION, StandardDestinationDefinition.class)) .thenReturn(Collections.singletonList(destinationS3V2)); - final StandardSourceDefinition sourceGithubV2 = YamlSeedConfigPersistence.getDefault() - .getConfig(ConfigSchema.STANDARD_SOURCE_DEFINITION, "ef69ef6e-aa7f-4af1-a01d-ef775033524e", StandardSourceDefinition.class) - .withDockerImageTag("10000.15.3"); + final StandardSourceDefinition sourceGithubV2 = Jsons.clone(SOURCE_GITHUB).withDockerImageTag("10000.15.3"); when(seedPersistence.listConfigs(ConfigSchema.STANDARD_SOURCE_DEFINITION, StandardSourceDefinition.class)) .thenReturn(Collections.singletonList(sourceGithubV2)); @@ -112,9 +109,7 @@ public void testNoUpdateForUsedConnector() throws Exception { @DisplayName("When a connector is not in use, its definition should be updated") public void testUpdateForUnusedConnector() throws Exception { // the seed has a newer version of snowflake destination - final StandardDestinationDefinition snowflakeV2 = YamlSeedConfigPersistence.getDefault() - .getConfig(ConfigSchema.STANDARD_DESTINATION_DEFINITION, "424892c4-daac-4491-b35d-c6688ba547ba", StandardDestinationDefinition.class) - .withDockerImageTag("10000.2.0"); + final StandardDestinationDefinition snowflakeV2 = Jsons.clone(DESTINATION_SNOWFLAKE).withDockerImageTag("10000.2.0"); when(seedPersistence.listConfigs(ConfigSchema.STANDARD_DESTINATION_DEFINITION, StandardDestinationDefinition.class)) .thenReturn(Collections.singletonList(snowflakeV2)); diff --git a/airbyte-config/specs/README.md b/airbyte-config/specs/README.md new file mode 100644 index 0000000000000..8d043e1ec9729 --- /dev/null +++ b/airbyte-config/specs/README.md @@ -0,0 +1,16 @@ +# Generating Seed Connector Specs + +The catalog of seeded connector definitions is stored and manually updated in the `airbyte-config/init/src/main/resources/seed/*_definitions.yaml` +files. These manually-maintained connector definitions intentionally _do not_ contain the connector specs, in an effort to keep these files +human-readable and easily-editable, and because specs can be automatically fetched. + +This automatic fetching of connector specs is the goal of the SeedConnectorSpecGenerator. This class reads the connector definitions in +the `airbyte-config/init/src/main/resources/seed/*_definitions.yaml` files, fetches the corresponding specs from the GCS bucket cache, and writes the +specs to the `airbyte-config/init/src/main/resources/seed/*_specs.yaml` files. See the +[SeedConnectorSpecGenerator](src/main/java/io/airbyte/config/specs/SeedConnectorSpecGenerator.java) class for more details. + +Therefore, whenever a connector definition is updated in the `airbyte-config/init/src/main/resources/seed/*_definitions.yaml` files, the +SeedConnectorSpecGenerator should be re-ran to generate the updated connector specs files. To do so, +run `./gradlew :airbyte-config:init:processResources`, or just build the platform project, and commit the changes to your PR. If you do not do this, +the build in the CI will fail because there will be a diff in the generated files as you have not checked in the changes that were applied by the +generator. diff --git a/airbyte-config/specs/build.gradle b/airbyte-config/specs/build.gradle new file mode 100644 index 0000000000000..91d1fd0921706 --- /dev/null +++ b/airbyte-config/specs/build.gradle @@ -0,0 +1,24 @@ +plugins { + id 'java' +} + +dependencies { + implementation 'commons-cli:commons-cli:1.4' + + implementation project(':airbyte-commons') + implementation project(':airbyte-commons-cli') + implementation project(':airbyte-config:models') + implementation project(':airbyte-protocol:models') + implementation project(':airbyte-json-validation') +} + +task generateSeedConnectorSpecs(type: JavaExec, dependsOn: compileJava) { + classpath = sourceSets.main.runtimeClasspath + + mainClass = 'io.airbyte.config.specs.SeedConnectorSpecGenerator' + + args '--seed-root' + args new File(project(":airbyte-config:init").projectDir, '/src/main/resources/seed') +} + +project(":airbyte-config:init").tasks.processResources.dependsOn(generateSeedConnectorSpecs) diff --git a/airbyte-config/specs/src/main/java/io/airbyte/config/specs/GcsBucketSpecFetcher.java b/airbyte-config/specs/src/main/java/io/airbyte/config/specs/GcsBucketSpecFetcher.java new file mode 100644 index 0000000000000..832326c551c46 --- /dev/null +++ b/airbyte-config/specs/src/main/java/io/airbyte/config/specs/GcsBucketSpecFetcher.java @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.config.specs; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.api.client.util.Preconditions; +import com.google.cloud.storage.Blob; +import com.google.cloud.storage.Storage; +import io.airbyte.commons.json.Jsons; +import io.airbyte.protocol.models.AirbyteProtocolSchema; +import io.airbyte.protocol.models.ConnectorSpecification; +import io.airbyte.validation.json.JsonSchemaValidator; +import io.airbyte.validation.json.JsonValidationException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Path; +import java.util.Optional; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class GcsBucketSpecFetcher { + + private static final Logger LOGGER = LoggerFactory.getLogger(GcsBucketSpecFetcher.class); + + private final Storage storage; + private final String bucketName; + + public GcsBucketSpecFetcher(final Storage storage, final String bucketName) { + this.storage = storage; + this.bucketName = bucketName; + } + + public String getBucketName() { + return bucketName; + } + + public Optional attemptFetch(final String dockerImage) { + final String[] dockerImageComponents = dockerImage.split(":"); + Preconditions.checkArgument(dockerImageComponents.length == 2, "Invalidate docker image: " + dockerImage); + final String dockerImageName = dockerImageComponents[0]; + final String dockerImageTag = dockerImageComponents[1]; + + final Path specPath = Path.of("specs").resolve(dockerImageName).resolve(dockerImageTag).resolve("spec.json"); + LOGGER.debug("Checking path for cached spec: {} {}", bucketName, specPath); + final Blob specAsBlob = storage.get(bucketName, specPath.toString()); + + // if null it means the object was not found. + if (specAsBlob == null) { + LOGGER.debug("Spec not found in bucket storage"); + return Optional.empty(); + } + + final String specAsString = new String(specAsBlob.getContent(), StandardCharsets.UTF_8); + try { + validateConfig(Jsons.deserialize(specAsString)); + } catch (final JsonValidationException e) { + LOGGER.error("Received invalid spec from bucket store. {}", e.toString()); + return Optional.empty(); + } + return Optional.of(Jsons.deserialize(specAsString, ConnectorSpecification.class)); + } + + private static void validateConfig(final JsonNode json) throws JsonValidationException { + final JsonSchemaValidator jsonSchemaValidator = new JsonSchemaValidator(); + final JsonNode specJsonSchema = JsonSchemaValidator.getSchema(AirbyteProtocolSchema.PROTOCOL.getFile(), "ConnectorSpecification"); + jsonSchemaValidator.ensure(specJsonSchema, json); + } + +} diff --git a/airbyte-config/specs/src/main/java/io/airbyte/config/specs/SeedConnectorSpecGenerator.java b/airbyte-config/specs/src/main/java/io/airbyte/config/specs/SeedConnectorSpecGenerator.java new file mode 100644 index 0000000000000..980772ccf6ea1 --- /dev/null +++ b/airbyte-config/specs/src/main/java/io/airbyte/config/specs/SeedConnectorSpecGenerator.java @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.config.specs; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.cloud.storage.StorageOptions; +import com.google.common.annotations.VisibleForTesting; +import io.airbyte.commons.cli.Clis; +import io.airbyte.commons.io.IOs; +import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.util.MoreIterators; +import io.airbyte.commons.yaml.Yamls; +import io.airbyte.config.DockerImageSpec; +import io.airbyte.config.EnvConfigs; +import io.airbyte.protocol.models.ConnectorSpecification; +import java.io.IOException; +import java.nio.file.Path; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.Options; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This script is responsible for ensuring that up-to-date {@link ConnectorSpecification}s for every + * connector definition in the seed are stored in a corresponding resource file, for the purpose of + * seeding the specs into the config database on server startup. See + * ./airbyte-config/specs/readme.md for more details on how this class is run and how it fits into + * the project. + *

+ * Specs are stored in a separate file from the definitions in an effort to keep the definitions + * yaml files human-readable and easily-editable, as specs can be rather large. + *

+ * Specs are fetched from the GCS spec cache bucket, so if any specs are missing from the bucket + * then this will fail. Note that this script only pulls specs from the bucket cache; it never + * pushes specs to the bucket. Since this script runs at build time, the decision was to depend on + * the bucket cache rather than running a docker container to fetch the spec during the build which + * could be slow and unwieldy. If there is a failure, check the bucket cache and figure out how to + * get the correct spec in there. + */ +public class SeedConnectorSpecGenerator { + + private static final String DOCKER_REPOSITORY_FIELD = "dockerRepository"; + private static final String DOCKER_IMAGE_TAG_FIELD = "dockerImageTag"; + private static final String DOCKER_IMAGE_FIELD = "dockerImage"; + private static final String SPEC_FIELD = "spec"; + private static final String SPEC_BUCKET_NAME = new EnvConfigs().getSpecCacheBucket(); + + private static final Logger LOGGER = LoggerFactory.getLogger(SeedConnectorSpecGenerator.class); + + private static final Option SEED_ROOT_OPTION = Option.builder("s").longOpt("seed-root").hasArg(true).required(true) + .desc("path to where seed resource files are stored").build(); + private static final Options OPTIONS = new Options().addOption(SEED_ROOT_OPTION); + + private final GcsBucketSpecFetcher bucketSpecFetcher; + + public SeedConnectorSpecGenerator(final GcsBucketSpecFetcher bucketSpecFetcher) { + this.bucketSpecFetcher = bucketSpecFetcher; + } + + public static void main(final String[] args) throws Exception { + final CommandLine parsed = Clis.parse(args, OPTIONS); + final Path outputRoot = Path.of(parsed.getOptionValue(SEED_ROOT_OPTION.getOpt())); + + final GcsBucketSpecFetcher bucketSpecFetcher = new GcsBucketSpecFetcher(StorageOptions.getDefaultInstance().getService(), SPEC_BUCKET_NAME); + final SeedConnectorSpecGenerator seedConnectorSpecGenerator = new SeedConnectorSpecGenerator(bucketSpecFetcher); + seedConnectorSpecGenerator.run(outputRoot, SeedConnectorType.SOURCE); + seedConnectorSpecGenerator.run(outputRoot, SeedConnectorType.DESTINATION); + } + + public void run(final Path seedRoot, final SeedConnectorType seedConnectorType) throws IOException { + LOGGER.info("Updating seeded {} definition specs if necessary...", seedConnectorType.name()); + + final JsonNode seedDefinitionsJson = yamlToJson(seedRoot, seedConnectorType.getDefinitionFileName()); + final JsonNode seedSpecsJson = yamlToJson(seedRoot, seedConnectorType.getSpecFileName()); + + final List updatedSeedSpecs = fetchUpdatedSeedSpecs(seedDefinitionsJson, seedSpecsJson); + + final String outputString = String.format("# This file is generated by %s.\n", this.getClass().getName()) + + "# Do NOT edit this file directly. See generator class for more details.\n" + + Yamls.serialize(updatedSeedSpecs); + final Path outputPath = IOs.writeFile(seedRoot.resolve(seedConnectorType.getSpecFileName()), outputString); + + LOGGER.info("Finished updating {}", outputPath); + } + + private JsonNode yamlToJson(final Path root, final String fileName) { + final String yamlString = IOs.readFile(root, fileName); + return Yamls.deserialize(yamlString); + } + + @VisibleForTesting + final List fetchUpdatedSeedSpecs(final JsonNode seedDefinitions, final JsonNode currentSeedSpecs) { + final List seedDefinitionsDockerImages = MoreIterators.toList(seedDefinitions.elements()) + .stream() + .map(json -> String.format("%s:%s", json.get(DOCKER_REPOSITORY_FIELD).asText(), json.get(DOCKER_IMAGE_TAG_FIELD).asText())) + .collect(Collectors.toList()); + + final Map currentSeedImageToSpec = MoreIterators.toList(currentSeedSpecs.elements()) + .stream() + .collect(Collectors.toMap( + json -> json.get(DOCKER_IMAGE_FIELD).asText(), + json -> new DockerImageSpec().withDockerImage(json.get(DOCKER_IMAGE_FIELD).asText()) + .withSpec(Jsons.object(json.get(SPEC_FIELD), ConnectorSpecification.class)))); + + return seedDefinitionsDockerImages + .stream() + .map(dockerImage -> currentSeedImageToSpec.containsKey(dockerImage) ? currentSeedImageToSpec.get(dockerImage) : fetchSpecFromGCS(dockerImage)) + .collect(Collectors.toList()); + } + + private DockerImageSpec fetchSpecFromGCS(final String dockerImage) { + LOGGER.info("Seeded spec not found for docker image {} - fetching from GCS bucket {}...", dockerImage, bucketSpecFetcher.getBucketName()); + final ConnectorSpecification spec = bucketSpecFetcher.attemptFetch(dockerImage) + .orElseThrow(() -> new RuntimeException(String.format( + "Failed to fetch valid spec file for docker image %s from GCS bucket %s. This will continue to fail until the connector change has been approved and published. See https://github.com/airbytehq/airbyte/tree/master/docs/connector-development#publishing-a-connector for more details.", + dockerImage, + bucketSpecFetcher.getBucketName()))); + return new DockerImageSpec().withDockerImage(dockerImage).withSpec(spec); + } + +} diff --git a/airbyte-config/specs/src/main/java/io/airbyte/config/specs/SeedConnectorType.java b/airbyte-config/specs/src/main/java/io/airbyte/config/specs/SeedConnectorType.java new file mode 100644 index 0000000000000..36d1326af215b --- /dev/null +++ b/airbyte-config/specs/src/main/java/io/airbyte/config/specs/SeedConnectorType.java @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.config.specs; + +public enum SeedConnectorType { + + SOURCE( + "source_definitions.yaml", + "source_specs.yaml"), + DESTINATION( + "destination_definitions.yaml", + "destination_specs.yaml"); + + private final String definitionFileName; + private final String specFileName; + + SeedConnectorType(final String definitionFileName, + final String specFileName) { + this.definitionFileName = definitionFileName; + this.specFileName = specFileName; + } + + public String getDefinitionFileName() { + return definitionFileName; + } + + public String getSpecFileName() { + return specFileName; + } + +} diff --git a/airbyte-config/specs/src/test/java/io/airbyte/config/specs/GcsBucketSpecFetcherTest.java b/airbyte-config/specs/src/test/java/io/airbyte/config/specs/GcsBucketSpecFetcherTest.java new file mode 100644 index 0000000000000..25e16bea545bf --- /dev/null +++ b/airbyte-config/specs/src/test/java/io/airbyte/config/specs/GcsBucketSpecFetcherTest.java @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.config.specs; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import com.google.cloud.storage.Blob; +import com.google.cloud.storage.Storage; +import com.google.common.collect.ImmutableMap; +import io.airbyte.commons.json.Jsons; +import io.airbyte.protocol.models.ConnectorSpecification; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Path; +import java.util.Optional; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +class GcsBucketSpecFetcherTest { + + private static final String BUCKET_NAME = "bucket"; + private static final String DOCKER_REPOSITORY = "image"; + private static final String DOCKER_IMAGE_TAG = "0.1.0"; + private static final String DOCKER_IMAGE = DOCKER_REPOSITORY + ":" + DOCKER_IMAGE_TAG; + private static final String SPEC_PATH = Path.of("specs").resolve(DOCKER_REPOSITORY).resolve(DOCKER_IMAGE_TAG).resolve("spec.json").toString(); + + private Storage storage; + private Blob specBlob; + private final ConnectorSpecification spec = new ConnectorSpecification().withConnectionSpecification(Jsons.jsonNode(ImmutableMap.of("foo", "bar"))); + + @SuppressWarnings("unchecked") + @BeforeEach + void setup() throws IOException { + storage = mock(Storage.class); + + final byte[] specBytes = Jsons.toBytes(Jsons.jsonNode(spec)); + specBlob = mock(Blob.class); + when(specBlob.getContent()).thenReturn(specBytes); + } + + @Test + void testGetsSpecIfPresent() throws IOException { + when(storage.get(BUCKET_NAME, SPEC_PATH)).thenReturn(specBlob); + + final GcsBucketSpecFetcher bucketSpecFetcher = new GcsBucketSpecFetcher(storage, BUCKET_NAME); + final Optional returnedSpec = bucketSpecFetcher.attemptFetch(DOCKER_IMAGE); + + assertTrue(returnedSpec.isPresent()); + assertEquals(spec, returnedSpec.get()); + } + + @Test + void testReturnsEmptyIfNotPresent() throws IOException { + when(storage.get(BUCKET_NAME, SPEC_PATH)).thenReturn(null); + + final GcsBucketSpecFetcher bucketSpecFetcher = new GcsBucketSpecFetcher(storage, BUCKET_NAME); + final Optional returnedSpec = bucketSpecFetcher.attemptFetch(DOCKER_IMAGE); + + assertTrue(returnedSpec.isEmpty()); + } + + @Test + void testReturnsEmptyIfInvalidSpec() throws IOException { + final Blob invalidSpecBlob = mock(Blob.class); + when(invalidSpecBlob.getContent()).thenReturn("{\"notASpec\": true}".getBytes(StandardCharsets.UTF_8)); + when(storage.get(BUCKET_NAME, SPEC_PATH)).thenReturn(invalidSpecBlob); + + final GcsBucketSpecFetcher bucketSpecFetcher = new GcsBucketSpecFetcher(storage, BUCKET_NAME); + final Optional returnedSpec = bucketSpecFetcher.attemptFetch(DOCKER_IMAGE); + + assertTrue(returnedSpec.isEmpty()); + } + +} diff --git a/airbyte-config/specs/src/test/java/io/airbyte/config/specs/SeedConnectorSpecGeneratorTest.java b/airbyte-config/specs/src/test/java/io/airbyte/config/specs/SeedConnectorSpecGeneratorTest.java new file mode 100644 index 0000000000000..0925608a2f62f --- /dev/null +++ b/airbyte-config/specs/src/test/java/io/airbyte/config/specs/SeedConnectorSpecGeneratorTest.java @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.config.specs; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; +import io.airbyte.commons.json.Jsons; +import io.airbyte.config.DockerImageSpec; +import io.airbyte.config.StandardDestinationDefinition; +import io.airbyte.protocol.models.ConnectorSpecification; +import java.util.Arrays; +import java.util.List; +import java.util.Optional; +import java.util.UUID; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +class SeedConnectorSpecGeneratorTest { + + private static final UUID DEF_ID1 = java.util.UUID.randomUUID(); + private static final UUID DEF_ID2 = java.util.UUID.randomUUID(); + private static final String CONNECTOR_NAME1 = "connector1"; + private static final String CONNECTOR_NAME2 = "connector2"; + private static final String DOCUMENTATION_URL = "https://wwww.example.com"; + private static final String DOCKER_REPOSITORY1 = "airbyte/connector1"; + private static final String DOCKER_REPOSITORY2 = "airbyte/connector2"; + private static final String DOCKER_TAG1 = "0.1.0"; + private static final String DOCKER_TAG2 = "0.2.0"; + private static final String BUCKET_NAME = "bucket"; + + private SeedConnectorSpecGenerator seedConnectorSpecGenerator; + private GcsBucketSpecFetcher bucketSpecFetcherMock; + + @BeforeEach + void setup() { + bucketSpecFetcherMock = mock(GcsBucketSpecFetcher.class); + when(bucketSpecFetcherMock.getBucketName()).thenReturn(BUCKET_NAME); + + seedConnectorSpecGenerator = new SeedConnectorSpecGenerator(bucketSpecFetcherMock); + } + + @Test + void testMissingSpecIsFetched() { + final StandardDestinationDefinition sourceDefinition1 = new StandardDestinationDefinition() + .withDestinationDefinitionId(DEF_ID1) + .withDockerRepository(DOCKER_REPOSITORY1) + .withDockerImageTag(DOCKER_TAG1) + .withName(CONNECTOR_NAME1) + .withDocumentationUrl(DOCUMENTATION_URL); + final ConnectorSpecification spec1 = new ConnectorSpecification().withConnectionSpecification(Jsons.jsonNode(ImmutableMap.of("foo1", "bar1"))); + final DockerImageSpec dockerImageSpec1 = new DockerImageSpec().withDockerImage(DOCKER_REPOSITORY1 + ":" + DOCKER_TAG1).withSpec(spec1); + + final StandardDestinationDefinition sourceDefinition2 = new StandardDestinationDefinition() + .withDestinationDefinitionId(DEF_ID2) + .withDockerRepository(DOCKER_REPOSITORY2) + .withDockerImageTag(DOCKER_TAG2) + .withName(CONNECTOR_NAME2) + .withDocumentationUrl(DOCUMENTATION_URL); + final ConnectorSpecification spec2 = new ConnectorSpecification().withConnectionSpecification(Jsons.jsonNode(ImmutableMap.of("foo2", "bar2"))); + final DockerImageSpec dockerImageSpec2 = new DockerImageSpec().withDockerImage(DOCKER_REPOSITORY2 + ":" + DOCKER_TAG2).withSpec(spec2); + + final JsonNode seedDefinitions = Jsons.jsonNode(Arrays.asList(sourceDefinition1, sourceDefinition2)); + final JsonNode seedSpecs = Jsons.jsonNode(List.of(dockerImageSpec1)); + + when(bucketSpecFetcherMock.attemptFetch(DOCKER_REPOSITORY2 + ":" + DOCKER_TAG2)).thenReturn(Optional.of(spec2)); + + final List actualSeedSpecs = seedConnectorSpecGenerator.fetchUpdatedSeedSpecs(seedDefinitions, seedSpecs); + final List expectedSeedSpecs = Arrays.asList(dockerImageSpec1, dockerImageSpec2); + + assertEquals(expectedSeedSpecs, actualSeedSpecs); + } + + @Test + void testOutdatedSpecIsFetched() { + final StandardDestinationDefinition sourceDefinition = new StandardDestinationDefinition() + .withDestinationDefinitionId(DEF_ID1) + .withDockerRepository(DOCKER_REPOSITORY1) + .withDockerImageTag(DOCKER_TAG2) + .withName(CONNECTOR_NAME1) + .withDocumentationUrl(DOCUMENTATION_URL); + final ConnectorSpecification outdatedSpec = new ConnectorSpecification().withConnectionSpecification(Jsons.jsonNode(ImmutableMap.of( + "foo1", + "bar1"))); + final DockerImageSpec outdatedDockerImageSpec = new DockerImageSpec().withDockerImage(DOCKER_REPOSITORY1 + ":" + DOCKER_TAG1) + .withSpec(outdatedSpec); + + final JsonNode seedDefinitions = Jsons.jsonNode(List.of(sourceDefinition)); + final JsonNode seedSpecs = Jsons.jsonNode(List.of(outdatedDockerImageSpec)); + + final ConnectorSpecification newSpec = new ConnectorSpecification().withConnectionSpecification(Jsons.jsonNode(ImmutableMap.of("foo2", "bar2"))); + final DockerImageSpec newDockerImageSpec = new DockerImageSpec().withDockerImage(DOCKER_REPOSITORY1 + ":" + DOCKER_TAG2).withSpec(newSpec); + + when(bucketSpecFetcherMock.attemptFetch(DOCKER_REPOSITORY1 + ":" + DOCKER_TAG2)).thenReturn(Optional.of(newSpec)); + + final List actualSeedSpecs = seedConnectorSpecGenerator.fetchUpdatedSeedSpecs(seedDefinitions, seedSpecs); + final List expectedSeedSpecs = List.of(newDockerImageSpec); + + assertEquals(expectedSeedSpecs, actualSeedSpecs); + } + + @Test + void testExtraneousSpecIsRemoved() { + final StandardDestinationDefinition sourceDefinition = new StandardDestinationDefinition() + .withDestinationDefinitionId(DEF_ID1) + .withDockerRepository(DOCKER_REPOSITORY1) + .withDockerImageTag(DOCKER_TAG1) + .withName(CONNECTOR_NAME1) + .withDocumentationUrl(DOCUMENTATION_URL); + final ConnectorSpecification spec1 = new ConnectorSpecification().withConnectionSpecification(Jsons.jsonNode(ImmutableMap.of("foo1", "bar1"))); + final DockerImageSpec dockerImageSpec1 = new DockerImageSpec().withDockerImage(DOCKER_REPOSITORY1 + ":" + DOCKER_TAG1).withSpec(spec1); + + final ConnectorSpecification spec2 = new ConnectorSpecification().withConnectionSpecification(Jsons.jsonNode(ImmutableMap.of("foo2", "bar2"))); + final DockerImageSpec dockerImageSpec2 = new DockerImageSpec().withDockerImage(DOCKER_REPOSITORY2 + ":" + DOCKER_TAG2).withSpec(spec2); + + final JsonNode seedDefinitions = Jsons.jsonNode(List.of(sourceDefinition)); + final JsonNode seedSpecs = Jsons.jsonNode(Arrays.asList(dockerImageSpec1, dockerImageSpec2)); + + final List actualSeedSpecs = seedConnectorSpecGenerator.fetchUpdatedSeedSpecs(seedDefinitions, seedSpecs); + final List expectedSeedSpecs = List.of(dockerImageSpec1); + + assertEquals(expectedSeedSpecs, actualSeedSpecs); + } + + @Test + void testNoFetchIsPerformedIfAllSpecsUpToDate() { + final StandardDestinationDefinition sourceDefinition = new StandardDestinationDefinition() + .withDestinationDefinitionId(DEF_ID1) + .withDockerRepository(DOCKER_REPOSITORY1) + .withDockerImageTag(DOCKER_TAG1) + .withName(CONNECTOR_NAME1) + .withDocumentationUrl(DOCUMENTATION_URL); + final ConnectorSpecification spec = new ConnectorSpecification().withConnectionSpecification(Jsons.jsonNode(ImmutableMap.of("foo", "bar"))); + final DockerImageSpec dockerImageSpec = new DockerImageSpec().withDockerImage(DOCKER_REPOSITORY1 + ":" + DOCKER_TAG1).withSpec(spec); + + final JsonNode seedDefinitions = Jsons.jsonNode(List.of(sourceDefinition)); + final JsonNode seedSpecs = Jsons.jsonNode(List.of(dockerImageSpec)); + + final List actualSeedSpecs = seedConnectorSpecGenerator.fetchUpdatedSeedSpecs(seedDefinitions, seedSpecs); + final List expectedSeedSpecs = List.of(dockerImageSpec); + + assertEquals(expectedSeedSpecs, actualSeedSpecs); + verify(bucketSpecFetcherMock, never()).attemptFetch(any()); + } + +} diff --git a/airbyte-db/lib/src/main/java/io/airbyte/db/Databases.java b/airbyte-db/lib/src/main/java/io/airbyte/db/Databases.java index 776356a264991..323edd576ccb9 100644 --- a/airbyte-db/lib/src/main/java/io/airbyte/db/Databases.java +++ b/airbyte-db/lib/src/main/java/io/airbyte/db/Databases.java @@ -33,6 +33,9 @@ public static Database createPostgresDatabaseWithRetry(final String username, final String jdbcConnectionString, final Function isDbReady) { Database database = null; + if (jdbcConnectionString == null || jdbcConnectionString.trim().equals("")) { + throw new IllegalArgumentException("Using a null or empty jdbc url will hang database creation; aborting."); + } while (database == null) { LOGGER.warn("Waiting for database to become available..."); diff --git a/airbyte-db/lib/src/main/java/io/airbyte/db/instance/FlywayMigrationDatabase.java b/airbyte-db/lib/src/main/java/io/airbyte/db/instance/FlywayMigrationDatabase.java index 6dc841a1ce944..c627b7e82d659 100644 --- a/airbyte-db/lib/src/main/java/io/airbyte/db/instance/FlywayMigrationDatabase.java +++ b/airbyte-db/lib/src/main/java/io/airbyte/db/instance/FlywayMigrationDatabase.java @@ -17,10 +17,13 @@ /** * Custom database for jOOQ code generation. It performs the following operations: + *

    *
  • Run Flyway migration.
  • *
  • Dump the database schema.
  • *
  • Create a connection for jOOQ code generation.
  • - *

    + *

+ *

+ *

* Reference: https://github.com/sabomichal/jooq-meta-postgres-flyway */ public abstract class FlywayMigrationDatabase extends PostgresDatabase { diff --git a/airbyte-db/lib/src/main/java/io/airbyte/db/instance/configs/migrations/V0_30_22_001__Store_last_sync_state.java b/airbyte-db/lib/src/main/java/io/airbyte/db/instance/configs/migrations/V0_30_22_001__Store_last_sync_state.java index 79567bb34c5c5..ce1e8f1debe69 100644 --- a/airbyte-db/lib/src/main/java/io/airbyte/db/instance/configs/migrations/V0_30_22_001__Store_last_sync_state.java +++ b/airbyte-db/lib/src/main/java/io/airbyte/db/instance/configs/migrations/V0_30_22_001__Store_last_sync_state.java @@ -9,7 +9,6 @@ import io.airbyte.commons.jackson.MoreMappers; import io.airbyte.commons.json.Jsons; import io.airbyte.config.ConfigSchema; -import io.airbyte.config.Configs; import io.airbyte.config.EnvConfigs; import io.airbyte.config.StandardSyncState; import io.airbyte.config.State; @@ -51,15 +50,24 @@ public class V0_30_22_001__Store_last_sync_state extends BaseJavaMigration { static final Field COLUMN_CREATED_AT = DSL.field("created_at", SQLDataType.TIMESTAMPWITHTIMEZONE); static final Field COLUMN_UPDATED_AT = DSL.field("updated_at", SQLDataType.TIMESTAMPWITHTIMEZONE); - private final Configs configs; + private final String databaseUser; + private final String databasePassword; + private final String databaseUrl; public V0_30_22_001__Store_last_sync_state() { - this.configs = new EnvConfigs(); + // EnvConfigs left in place for migration purposes as FlyWay prevents injection, but isolated to + // local scope. + final EnvConfigs configs = new EnvConfigs(); + this.databaseUser = configs.getDatabaseUser(); + this.databasePassword = configs.getDatabasePassword(); + this.databaseUrl = configs.getDatabaseUrl(); } @VisibleForTesting - V0_30_22_001__Store_last_sync_state(final Configs configs) { - this.configs = configs; + V0_30_22_001__Store_last_sync_state(final String databaseUser, final String databasePassword, final String databaseUrl) { + this.databaseUser = databaseUser; + this.databasePassword = databasePassword; + this.databaseUrl = databaseUrl; } @Override @@ -67,7 +75,7 @@ public void migrate(final Context context) throws Exception { LOGGER.info("Running migration: {}", this.getClass().getSimpleName()); final DSLContext ctx = DSL.using(context.getConnection()); - final Optional jobsDatabase = getJobsDatabase(configs); + final Optional jobsDatabase = getJobsDatabase(databaseUser, databasePassword, databaseUrl); if (jobsDatabase.isPresent()) { copyData(ctx, getStandardSyncStates(jobsDatabase.get()), OffsetDateTime.now()); } @@ -100,16 +108,15 @@ static void copyData(final DSLContext ctx, final Set standard * data from the job database). */ @VisibleForTesting - static Optional getJobsDatabase(final Configs configs) { + static Optional getJobsDatabase(final String databaseUser, final String databasePassword, final String databaseUrl) { try { + if (databaseUrl == null || "".equals(databaseUrl.trim())) { + throw new IllegalArgumentException("The databaseUrl cannot be empty."); + } // If the environment variables exist, it means the migration is run in production. // Connect to the official job database. - final Database jobsDatabase = new JobsDatabaseInstance( - configs.getDatabaseUser(), - configs.getDatabasePassword(), - configs.getDatabaseUrl()) - .getInitialized(); - LOGGER.info("[{}] Connected to jobs database: {}", MIGRATION_NAME, configs.getDatabaseUrl()); + final Database jobsDatabase = new JobsDatabaseInstance(databaseUser, databasePassword, databaseUrl).getInitialized(); + LOGGER.info("[{}] Connected to jobs database: {}", MIGRATION_NAME, databaseUrl); return Optional.of(jobsDatabase); } catch (final IllegalArgumentException e) { // If the environment variables do not exist, it means the migration is run in development. diff --git a/airbyte-db/lib/src/main/java/io/airbyte/db/jdbc/JdbcSourceOperations.java b/airbyte-db/lib/src/main/java/io/airbyte/db/jdbc/JdbcSourceOperations.java index cf06d70a979b6..6b6ec98c34ff5 100644 --- a/airbyte-db/lib/src/main/java/io/airbyte/db/jdbc/JdbcSourceOperations.java +++ b/airbyte-db/lib/src/main/java/io/airbyte/db/jdbc/JdbcSourceOperations.java @@ -116,8 +116,17 @@ protected void putBoolean(final ObjectNode node, final String columnName, final node.put(columnName, resultSet.getBoolean(index)); } + /** + * In some sources Short might have value larger than {@link Short#MAX_VALUE}. E.q. MySQL has + * unsigned smallint type, which can contain value 65535. If we fail to cast Short value, we will + * try to cast Integer. + */ protected void putShortInt(final ObjectNode node, final String columnName, final ResultSet resultSet, final int index) throws SQLException { - node.put(columnName, resultSet.getShort(index)); + try { + node.put(columnName, resultSet.getShort(index)); + } catch (final SQLException e) { + node.put(columnName, DataTypeUtils.returnNullIfInvalid(() -> resultSet.getInt(index))); + } } /** diff --git a/airbyte-db/lib/src/main/java/io/airbyte/db/jdbc/JdbcUtils.java b/airbyte-db/lib/src/main/java/io/airbyte/db/jdbc/JdbcUtils.java index 6f8f46862fd79..a7d681857aae6 100644 --- a/airbyte-db/lib/src/main/java/io/airbyte/db/jdbc/JdbcUtils.java +++ b/airbyte-db/lib/src/main/java/io/airbyte/db/jdbc/JdbcUtils.java @@ -4,14 +4,22 @@ package io.airbyte.db.jdbc; +import org.jooq.JSONFormat; + public class JdbcUtils { private static final JdbcSourceOperations defaultSourceOperations = new JdbcSourceOperations(); + private static final JSONFormat defaultJSONFormat = new JSONFormat().recordFormat(JSONFormat.RecordFormat.OBJECT); + public static JdbcSourceOperations getDefaultSourceOperations() { return defaultSourceOperations; } + public static JSONFormat getDefaultJSONFormat() { + return defaultJSONFormat; + } + public static String getFullyQualifiedTableName(final String schemaName, final String tableName) { return schemaName != null ? schemaName + "." + tableName : tableName; } diff --git a/airbyte-db/lib/src/test/java/io/airbyte/db/instance/configs/migrations/V0_30_22_001__Store_last_sync_state_test.java b/airbyte-db/lib/src/test/java/io/airbyte/db/instance/configs/migrations/V0_30_22_001__Store_last_sync_state_test.java index e7fbd7e371a77..22115a9f45324 100644 --- a/airbyte-db/lib/src/test/java/io/airbyte/db/instance/configs/migrations/V0_30_22_001__Store_last_sync_state_test.java +++ b/airbyte-db/lib/src/test/java/io/airbyte/db/instance/configs/migrations/V0_30_22_001__Store_last_sync_state_test.java @@ -24,7 +24,6 @@ import io.airbyte.commons.json.Jsons; import io.airbyte.config.ConfigSchema; import io.airbyte.config.Configs; -import io.airbyte.config.EnvConfigs; import io.airbyte.config.JobOutput; import io.airbyte.config.JobOutput.OutputType; import io.airbyte.config.StandardSyncOutput; @@ -39,6 +38,7 @@ import java.util.Collections; import java.util.Set; import java.util.UUID; +import java.util.concurrent.TimeUnit; import javax.annotation.Nullable; import org.flywaydb.core.api.configuration.Configuration; import org.flywaydb.core.api.migration.Context; @@ -52,6 +52,7 @@ import org.junit.jupiter.api.Order; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestMethodOrder; +import org.junit.jupiter.api.Timeout; @TestMethodOrder(MethodOrderer.OrderAnnotation.class) class V0_30_22_001__Store_last_sync_state_test extends AbstractConfigsDatabaseTest { @@ -86,6 +87,8 @@ class V0_30_22_001__Store_last_sync_state_test extends AbstractConfigsDatabaseTe private static Database jobDatabase; @BeforeAll + @Timeout(value = 2, + unit = TimeUnit.MINUTES) public static void setupJobDatabase() throws Exception { jobDatabase = new JobsDatabaseInstance( container.getUsername(), @@ -97,8 +100,7 @@ public static void setupJobDatabase() throws Exception { @Test @Order(10) public void testGetJobsDatabase() { - // when there is no database environment variable, the return value is empty - assertTrue(V0_30_22_001__Store_last_sync_state.getJobsDatabase(new EnvConfigs()).isEmpty()); + assertTrue(V0_30_22_001__Store_last_sync_state.getJobsDatabase("", "", "").isEmpty()); // when there is database environment variable, return the database final Configs configs = mock(Configs.class); @@ -106,7 +108,8 @@ public void testGetJobsDatabase() { when(configs.getDatabasePassword()).thenReturn(container.getPassword()); when(configs.getDatabaseUrl()).thenReturn(container.getJdbcUrl()); - assertTrue(V0_30_22_001__Store_last_sync_state.getJobsDatabase(configs).isPresent()); + assertTrue(V0_30_22_001__Store_last_sync_state + .getJobsDatabase(configs.getDatabaseUser(), configs.getDatabasePassword(), configs.getDatabaseUrl()).isPresent()); } @Test @@ -180,12 +183,7 @@ public void testMigration() throws Exception { .where(COLUMN_CONFIG_TYPE.eq(ConfigSchema.STANDARD_SYNC_STATE.name())) .execute()); - final Configs configs = mock(Configs.class); - when(configs.getDatabaseUser()).thenReturn(container.getUsername()); - when(configs.getDatabasePassword()).thenReturn(container.getPassword()); - when(configs.getDatabaseUrl()).thenReturn(container.getJdbcUrl()); - - final var migration = new V0_30_22_001__Store_last_sync_state(configs); + final var migration = new V0_30_22_001__Store_last_sync_state(container.getUsername(), container.getPassword(), container.getJdbcUrl()); // this context is a flyway class; only the getConnection method is needed to run the migration final Context context = new Context() { diff --git a/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/buffered_stream_consumer/BufferedStreamConsumer.java b/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/buffered_stream_consumer/BufferedStreamConsumer.java index b5ab459de8869..cb31fc19ad917 100644 --- a/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/buffered_stream_consumer/BufferedStreamConsumer.java +++ b/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/buffered_stream_consumer/BufferedStreamConsumer.java @@ -128,7 +128,6 @@ protected void startTracked() throws Exception { @Override protected void acceptTracked(final AirbyteMessage message) throws Exception { Preconditions.checkState(hasStarted, "Cannot accept records until consumer has started"); - if (message.getType() == Type.RECORD) { final AirbyteRecordMessage recordMessage = message.getRecord(); final AirbyteStreamNameNamespacePair stream = AirbyteStreamNameNamespacePair.fromRecordMessage(recordMessage); diff --git a/airbyte-integrations/bases/debezium/src/main/java/io/airbyte/integrations/debezium/internals/DebeziumConverterUtils.java b/airbyte-integrations/bases/debezium/src/main/java/io/airbyte/integrations/debezium/internals/DebeziumConverterUtils.java new file mode 100644 index 0000000000000..a97694727ed3d --- /dev/null +++ b/airbyte-integrations/bases/debezium/src/main/java/io/airbyte/integrations/debezium/internals/DebeziumConverterUtils.java @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.debezium.internals; + +import io.airbyte.db.DataTypeUtils; +import io.debezium.spi.converter.RelationalColumn; +import java.sql.Timestamp; +import java.time.Duration; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.format.DateTimeParseException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public final class DebeziumConverterUtils { + + private static final Logger LOGGER = LoggerFactory.getLogger(DebeziumConverterUtils.class); + + private DebeziumConverterUtils() { + throw new UnsupportedOperationException(); + } + + public static String convertDate(final Object input) { + /** + * While building this custom converter we were not sure what type debezium could return cause there + * is no mention of it in the documentation. Secondly if you take a look at + * {@link io.debezium.connector.mysql.converters.TinyIntOneToBooleanConverter#converterFor(io.debezium.spi.converter.RelationalColumn, io.debezium.spi.converter.CustomConverter.ConverterRegistration)} + * method, even it is handling multiple data types but its not clear under what circumstances which + * data type would be returned. I just went ahead and handled the data types that made sense. + * Secondly, we use LocalDateTime to handle this cause it represents DATETIME datatype in JAVA + */ + if (input instanceof LocalDateTime) { + return DataTypeUtils.toISO8601String((LocalDateTime) input); + } else if (input instanceof LocalDate) { + return DataTypeUtils.toISO8601String((LocalDate) input); + } else if (input instanceof Duration) { + return DataTypeUtils.toISO8601String((Duration) input); + } else if (input instanceof Timestamp) { + return DataTypeUtils.toISO8601String(((Timestamp) input).toLocalDateTime()); + } else if (input instanceof Number) { + return DataTypeUtils.toISO8601String( + new Timestamp(((Number) input).longValue()).toLocalDateTime()); + } else if (input instanceof String) { + try { + return LocalDateTime.parse((String) input).toString(); + } catch (final DateTimeParseException e) { + LOGGER.warn("Cannot convert value '{}' to LocalDateTime type", input); + return input.toString(); + } + } + LOGGER.warn("Uncovered date class type '{}'. Use default converter", input.getClass().getName()); + return input.toString(); + } + + public static Object convertDefaultValue(RelationalColumn field) { + if (field.isOptional()) { + return null; + } else if (field.hasDefaultValue()) { + return field.defaultValue(); + } + return null; + } + +} diff --git a/airbyte-integrations/bases/debezium/src/main/java/io/airbyte/integrations/debezium/internals/MSSQLConverter.java b/airbyte-integrations/bases/debezium/src/main/java/io/airbyte/integrations/debezium/internals/MSSQLConverter.java new file mode 100644 index 0000000000000..e162262ba9fb5 --- /dev/null +++ b/airbyte-integrations/bases/debezium/src/main/java/io/airbyte/integrations/debezium/internals/MSSQLConverter.java @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.debezium.internals; + +import io.debezium.spi.converter.CustomConverter; +import io.debezium.spi.converter.RelationalColumn; +import java.math.BigDecimal; +import java.util.Objects; +import java.util.Properties; +import org.apache.kafka.connect.data.SchemaBuilder; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class MSSQLConverter implements CustomConverter { + + private final Logger LOGGER = LoggerFactory.getLogger(MSSQLConverter.class);; + + private final String SMALLDATETIME_TYPE = "SMALLDATETIME"; + private final String SMALLMONEY_TYPE = "SMALLMONEY"; + + @Override + public void configure(Properties props) {} + + @Override + public void converterFor(final RelationalColumn field, + final ConverterRegistration registration) { + if (SMALLDATETIME_TYPE.equalsIgnoreCase(field.typeName())) { + registerDate(field, registration); + } else if (SMALLMONEY_TYPE.equalsIgnoreCase(field.typeName())) { + registerMoney(field, registration); + } + + } + + private void registerDate(final RelationalColumn field, + final ConverterRegistration registration) { + registration.register(SchemaBuilder.string(), input -> { + if (Objects.isNull(input)) { + return DebeziumConverterUtils.convertDefaultValue(field); + } + + return DebeziumConverterUtils.convertDate(input); + }); + } + + private void registerMoney(final RelationalColumn field, + final ConverterRegistration registration) { + registration.register(SchemaBuilder.float64(), input -> { + if (Objects.isNull(input)) { + return DebeziumConverterUtils.convertDefaultValue(field); + } + + if (input instanceof BigDecimal) { + return ((BigDecimal) input).doubleValue(); + } + + LOGGER.warn("Uncovered money class type '{}'. Use default converter", + input.getClass().getName()); + return input.toString(); + }); + } + +} diff --git a/airbyte-integrations/bases/debezium/src/main/java/io/airbyte/integrations/debezium/internals/MySQLConverter.java b/airbyte-integrations/bases/debezium/src/main/java/io/airbyte/integrations/debezium/internals/MySQLConverter.java index 0b80d39c9c0ab..24af44c5af1ab 100644 --- a/airbyte-integrations/bases/debezium/src/main/java/io/airbyte/integrations/debezium/internals/MySQLConverter.java +++ b/airbyte-integrations/bases/debezium/src/main/java/io/airbyte/integrations/debezium/internals/MySQLConverter.java @@ -4,14 +4,8 @@ package io.airbyte.integrations.debezium.internals; -import io.airbyte.db.DataTypeUtils; import io.debezium.spi.converter.CustomConverter; import io.debezium.spi.converter.RelationalColumn; -import java.sql.Timestamp; -import java.time.Duration; -import java.time.LocalDate; -import java.time.LocalDateTime; -import java.time.format.DateTimeParseException; import java.util.Arrays; import java.util.Properties; import org.apache.kafka.connect.data.SchemaBuilder; @@ -61,50 +55,15 @@ private void registerText(final RelationalColumn field, final ConverterRegistrat if (x instanceof byte[]) { return new String((byte[]) x); - } else + } else { return x.toString(); + } }); } private void registerDate(final RelationalColumn field, final ConverterRegistration registration) { - registration.register(SchemaBuilder.string(), x -> { - if (x == null) { - if (field.isOptional()) { - return null; - } else if (field.hasDefaultValue()) { - return field.defaultValue(); - } - return null; - } - /** - * While building this custom converter we were not sure what type debezium could return cause there - * is no mention of it in the documentation. Secondly if you take a look at - * {@link io.debezium.connector.mysql.converters.TinyIntOneToBooleanConverter#converterFor(RelationalColumn, ConverterRegistration)} - * method, even it is handling multiple data types but its not clear under what circumstances which - * data type would be returned. I just went ahead and handled the data types that made sense. - * Secondly, we use LocalDateTime to handle this cause it represents DATETIME datatype in JAVA - */ - if (x instanceof LocalDateTime) { - return DataTypeUtils.toISO8601String((LocalDateTime) x); - } else if (x instanceof LocalDate) { - return DataTypeUtils.toISO8601String((LocalDate) x); - } else if (x instanceof Duration) { - return DataTypeUtils.toISO8601String((Duration) x); - } else if (x instanceof Timestamp) { - return DataTypeUtils.toISO8601String(((Timestamp) x).toLocalDateTime()); - } else if (x instanceof Number) { - return DataTypeUtils.toISO8601String(new Timestamp(((Number) x).longValue()).toLocalDateTime()); - } else if (x instanceof String) { - try { - return LocalDateTime.parse((String) x).toString(); - } catch (final DateTimeParseException e) { - LOGGER.warn("Cannot convert value '{}' to LocalDateTime type", x); - return x.toString(); - } - } - LOGGER.warn("Uncovered date class type '{}'. Use default converter", x.getClass().getName()); - return x.toString(); - }); + registration.register(SchemaBuilder.string(), + x -> x == null ? DebeziumConverterUtils.convertDefaultValue(field) : DebeziumConverterUtils.convertDate(x)); } } diff --git a/airbyte-integrations/bases/debezium/src/main/java/io/airbyte/integrations/debezium/internals/PostgresConverter.java b/airbyte-integrations/bases/debezium/src/main/java/io/airbyte/integrations/debezium/internals/PostgresConverter.java new file mode 100644 index 0000000000000..dc45ee017e47b --- /dev/null +++ b/airbyte-integrations/bases/debezium/src/main/java/io/airbyte/integrations/debezium/internals/PostgresConverter.java @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.debezium.internals; + +import io.debezium.spi.converter.CustomConverter; +import io.debezium.spi.converter.RelationalColumn; +import java.math.BigDecimal; +import java.util.Arrays; +import java.util.Properties; +import org.apache.kafka.connect.data.SchemaBuilder; +import org.postgresql.util.PGInterval; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class PostgresConverter implements CustomConverter { + + private static final Logger LOGGER = LoggerFactory.getLogger(PostgresConverter.class); + + private final String[] DATE_TYPES = {"DATE", "DATETIME", "TIME", "TIMETZ", "INTERVAL", "TIMESTAMP"}; + private final String[] BIT_TYPES = {"BIT", "VARBIT"}; + private final String[] MONEY_ITEM_TYPE = {"MONEY"}; + private final String[] GEOMETRICS_TYPES = {"BOX", "CIRCLE", "LINE", "LSEG", "POINT", "POLYGON", "PATH"}; + private final String[] TEXT_TYPES = {"VARCHAR", "VARBINARY", "BLOB", "TEXT", "LONGTEXT", "TINYTEXT", "MEDIUMTEXT", "INVENTORY_ITEM", "TSVECTOR"}; + + @Override + public void configure(Properties props) {} + + @Override + public void converterFor(RelationalColumn field, ConverterRegistration registration) { + if (Arrays.stream(DATE_TYPES).anyMatch(s -> s.equalsIgnoreCase(field.typeName()))) { + registerDate(field, registration); + } else if (Arrays.stream(TEXT_TYPES).anyMatch(s -> s.equalsIgnoreCase(field.typeName())) + || Arrays.stream(GEOMETRICS_TYPES).anyMatch(s -> s.equalsIgnoreCase(field.typeName())) + || Arrays.stream(BIT_TYPES).anyMatch(s -> s.equalsIgnoreCase(field.typeName()))) { + registerText(field, registration); + } else if (Arrays.stream(MONEY_ITEM_TYPE).anyMatch(s -> s.equalsIgnoreCase(field.typeName()))) { + registerMoney(field, registration); + } + } + + private void registerText(RelationalColumn field, ConverterRegistration registration) { + registration.register(SchemaBuilder.string(), x -> { + if (x == null) { + return DebeziumConverterUtils.convertDefaultValue(field); + } + + if (x instanceof byte[]) { + return new String((byte[]) x); + } else { + return x.toString(); + } + }); + } + + private void registerDate(RelationalColumn field, ConverterRegistration registration) { + registration.register(SchemaBuilder.string(), x -> { + if (x == null) { + return DebeziumConverterUtils.convertDefaultValue(field); + } else if (x instanceof PGInterval) { + return convertInterval((PGInterval) x); + } else { + return DebeziumConverterUtils.convertDate(x); + } + }); + } + + private String convertInterval(PGInterval pgInterval) { + StringBuilder resultInterval = new StringBuilder(); + formatDateUnit(resultInterval, pgInterval.getYears(), " year "); + formatDateUnit(resultInterval, pgInterval.getMonths(), " mons "); + formatDateUnit(resultInterval, pgInterval.getDays(), " days "); + + formatTimeValues(resultInterval, pgInterval); + return resultInterval.toString(); + } + + private void registerMoney(RelationalColumn field, ConverterRegistration registration) { + registration.register(SchemaBuilder.string(), x -> { + if (x == null) { + return DebeziumConverterUtils.convertDefaultValue(field); + } else if (x instanceof Double) { + BigDecimal result = BigDecimal.valueOf((Double) x); + if (result.compareTo(new BigDecimal("999999999999999")) == 1 + || result.compareTo(new BigDecimal("-999999999999999")) == -1) { + return null; + } + return result.toString(); + } else { + return x.toString(); + } + }); + } + + private void formatDateUnit(StringBuilder resultInterval, int dateUnit, String s) { + if (dateUnit != 0) { + resultInterval + .append(dateUnit) + .append(s); + } + } + + private void formatTimeValues(StringBuilder resultInterval, PGInterval pgInterval) { + if (isNegativeTime(pgInterval)) { + resultInterval.append("-"); + } + // TODO check if value more or less than Integer.MIN_VALUE Integer.MAX_VALUE, + int hours = Math.abs(pgInterval.getHours()); + int minutes = Math.abs(pgInterval.getMinutes()); + int seconds = Math.abs(pgInterval.getWholeSeconds()); + resultInterval.append(addFirstDigit(hours)); + resultInterval.append(hours); + resultInterval.append(":"); + resultInterval.append(addFirstDigit(minutes)); + resultInterval.append(minutes); + resultInterval.append(":"); + resultInterval.append(addFirstDigit(seconds)); + resultInterval.append(seconds); + } + + private String addFirstDigit(int hours) { + return hours <= 9 ? "0" : ""; + } + + private boolean isNegativeTime(PGInterval pgInterval) { + return pgInterval.getHours() < 0 + || pgInterval.getMinutes() < 0 + || pgInterval.getWholeSeconds() < 0; + } + +} diff --git a/airbyte-integrations/bases/debezium/src/test/java/io/airbyte/integrations/debezium/internals/DebeziumConverterUtilsTest.java b/airbyte-integrations/bases/debezium/src/test/java/io/airbyte/integrations/debezium/internals/DebeziumConverterUtilsTest.java new file mode 100644 index 0000000000000..a82990a2c6bb6 --- /dev/null +++ b/airbyte-integrations/bases/debezium/src/test/java/io/airbyte/integrations/debezium/internals/DebeziumConverterUtilsTest.java @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.debezium.internals; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import io.debezium.spi.converter.RelationalColumn; +import java.sql.Timestamp; +import java.time.Duration; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.LocalTime; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; + +class DebeziumConverterUtilsTest { + + @Test + public void convertDefaultValueTest() { + + RelationalColumn relationalColumn = mock(RelationalColumn.class); + + when(relationalColumn.isOptional()).thenReturn(true); + Object actualColumnDefaultValue = DebeziumConverterUtils.convertDefaultValue(relationalColumn); + Assertions.assertNull(actualColumnDefaultValue, "Default value for optional relational column should be null"); + + when(relationalColumn.isOptional()).thenReturn(false); + when(relationalColumn.hasDefaultValue()).thenReturn(false); + actualColumnDefaultValue = DebeziumConverterUtils.convertDefaultValue(relationalColumn); + Assertions.assertNull(actualColumnDefaultValue); + + when(relationalColumn.isOptional()).thenReturn(false); + when(relationalColumn.hasDefaultValue()).thenReturn(true); + String expectedColumnDefaultValue = "default value"; + when(relationalColumn.defaultValue()).thenReturn(expectedColumnDefaultValue); + actualColumnDefaultValue = DebeziumConverterUtils.convertDefaultValue(relationalColumn); + Assertions.assertEquals(actualColumnDefaultValue, expectedColumnDefaultValue); + } + + @Test + public void convertLocalDate() { + LocalDate localDate = LocalDate.of(2021, 1, 1); + + String actual = DebeziumConverterUtils.convertDate(localDate); + Assertions.assertEquals("2021-01-01T00:00:00Z", actual); + } + + @Test + public void convertTLocalTime() { + LocalTime localTime = LocalTime.of(8, 1, 1); + String actual = DebeziumConverterUtils.convertDate(localTime); + Assertions.assertEquals("08:01:01", actual); + } + + @Test + public void convertLocalDateTime() { + LocalDateTime localDateTime = LocalDateTime.of(2021, 1, 1, 8, 1, 1); + + String actual = DebeziumConverterUtils.convertDate(localDateTime); + Assertions.assertEquals("2021-01-01T08:01:01Z", actual); + } + + @Test + @Disabled + public void convertDuration() { + Duration duration = Duration.ofHours(100_000); + + String actual = DebeziumConverterUtils.convertDate(duration); + Assertions.assertEquals("1981-05-29T20:00:00Z", actual); + } + + @Test + public void convertTimestamp() { + LocalDateTime localDateTime = LocalDateTime.of(2021, 1, 1, 8, 1, 1); + Timestamp timestamp = Timestamp.valueOf(localDateTime); + + String actual = DebeziumConverterUtils.convertDate(timestamp); + Assertions.assertEquals("2021-01-01T08:01:01Z", actual); + } + + @Test + @Disabled + public void convertNumber() { + Number number = 100_000; + + String actual = DebeziumConverterUtils.convertDate(number); + Assertions.assertEquals("1970-01-01T03:01:40Z", actual); + } + + @Test + public void convertStringDateFormat() { + String stringValue = "2021-01-01T00:00:00Z"; + + String actual = DebeziumConverterUtils.convertDate(stringValue); + Assertions.assertEquals("2021-01-01T00:00:00Z", actual); + } + +} diff --git a/airbyte-integrations/bases/source-acceptance-test/CHANGELOG.md b/airbyte-integrations/bases/source-acceptance-test/CHANGELOG.md index 6d6729b3f611a..c47226fda6e9b 100644 --- a/airbyte-integrations/bases/source-acceptance-test/CHANGELOG.md +++ b/airbyte-integrations/bases/source-acceptance-test/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +## 0.1.28 +Print stream name when incremental sync tests fail + ## 0.1.27 Add ignored fields for full refresh test (unit tests) diff --git a/airbyte-integrations/bases/source-acceptance-test/Dockerfile b/airbyte-integrations/bases/source-acceptance-test/Dockerfile index f13ecdebf4ed0..22f70e298ff9a 100644 --- a/airbyte-integrations/bases/source-acceptance-test/Dockerfile +++ b/airbyte-integrations/bases/source-acceptance-test/Dockerfile @@ -9,7 +9,7 @@ COPY setup.py ./ COPY pytest.ini ./ RUN pip install . -LABEL io.airbyte.version=0.1.27 +LABEL io.airbyte.version=0.1.28 LABEL io.airbyte.name=airbyte/source-acceptance-test ENTRYPOINT ["python", "-m", "pytest", "-p", "source_acceptance_test.plugin"] diff --git a/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/tests/test_full_refresh.py b/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/tests/test_full_refresh.py index 1eaae862cf8f6..1d78264ef0261 100644 --- a/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/tests/test_full_refresh.py +++ b/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/tests/test_full_refresh.py @@ -45,5 +45,10 @@ def test_sequential_reads( if output_diff: msg = f"{stream}: the two sequential reads should produce either equal set of records or one of them is a strict subset of the other" detailed_logger.info(msg) + detailed_logger.info("First read") + detailed_logger.log_json_list(stream_records_1) + detailed_logger.info("Second read") + detailed_logger.log_json_list(stream_records_2) + detailed_logger.info("Difference") detailed_logger.log_json_list(output_diff) pytest.fail(msg) diff --git a/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/tests/test_incremental.py b/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/tests/test_incremental.py index e7fdf1cf2ca0b..dc9db26749c95 100644 --- a/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/tests/test_incremental.py +++ b/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/tests/test_incremental.py @@ -73,7 +73,7 @@ def records_with_state(records, state, stream_mapping, state_cursor_paths) -> It except KeyError: # try second time as an absolute path in state file (i.e. bookmarks -> stream_name -> column -> value) state_value = cursor_field.parse(record=state, path=state_cursor_paths[stream_name]) - yield record_value, state_value + yield record_value, state_value, stream_name @pytest.mark.default_timeout(20 * 60) @@ -89,18 +89,18 @@ def test_two_sequential_reads(self, connector_config, configured_catalog_for_inc assert records_1, "Should produce at least one record" latest_state = states_1[-1].state.data - for record_value, state_value in records_with_state(records_1, latest_state, stream_mapping, cursor_paths): + for record_value, state_value, stream_name in records_with_state(records_1, latest_state, stream_mapping, cursor_paths): assert ( record_value <= state_value - ), "First incremental sync should produce records younger or equal to cursor value from the state" + ), f"First incremental sync should produce records younger or equal to cursor value from the state. Stream: {stream_name}" output = docker_runner.call_read_with_state(connector_config, configured_catalog_for_incremental, state=latest_state) records_2 = filter_output(output, type_=Type.RECORD) - for record_value, state_value in records_with_state(records_2, latest_state, stream_mapping, cursor_paths): + for record_value, state_value, stream_name in records_with_state(records_2, latest_state, stream_mapping, cursor_paths): assert ( record_value >= state_value - ), "Second incremental sync should produce records older or equal to cursor value from the state" + ), f"Second incremental sync should produce records older or equal to cursor value from the state. Stream: {stream_name}" def test_state_with_abnormally_large_values(self, connector_config, configured_catalog, future_state, docker_runner: ConnectorRunner): configured_catalog = incremental_only_catalog(configured_catalog) diff --git a/airbyte-integrations/bases/source-acceptance-test/unit_tests/test_json_schema_helper.py b/airbyte-integrations/bases/source-acceptance-test/unit_tests/test_json_schema_helper.py index 3a9a433704d5c..11478abed4852 100644 --- a/airbyte-integrations/bases/source-acceptance-test/unit_tests/test_json_schema_helper.py +++ b/airbyte-integrations/bases/source-acceptance-test/unit_tests/test_json_schema_helper.py @@ -85,7 +85,7 @@ def test_simple_path(records, stream_mapping, simple_state): paths = {"my_stream": ["id"]} result = records_with_state(records=records, state=simple_state, stream_mapping=stream_mapping, state_cursor_paths=paths) - record_value, state_value = next(result) + record_value, state_value, stream_name = next(result) assert record_value == 1, "record value must be correctly found" assert state_value == 11, "state value must be correctly found" @@ -96,7 +96,7 @@ def test_nested_path(records, stream_mapping, nested_state): paths = {"my_stream": ["some_account_id", "ts_updated"]} result = records_with_state(records=records, state=nested_state, stream_mapping=stream_mapping, state_cursor_paths=paths) - record_value, state_value = next(result) + record_value, state_value, stream_name = next(result) assert record_value == pendulum.datetime(2015, 5, 1), "record value must be correctly found" assert state_value == pendulum.datetime(2015, 1, 1, 22, 3, 11), "state value must be correctly found" @@ -116,7 +116,7 @@ def test_absolute_path(records, stream_mapping, singer_state): paths = {"my_stream": ["bookmarks", "my_stream", "ts_created"]} result = records_with_state(records=records, state=singer_state, stream_mapping=stream_mapping, state_cursor_paths=paths) - record_value, state_value = next(result) + record_value, state_value, stream_name = next(result) assert record_value == pendulum.datetime(2015, 11, 1, 22, 3, 11), "record value must be correctly found" assert state_value == pendulum.datetime(2014, 1, 1, 22, 3, 11), "state value must be correctly found" diff --git a/airbyte-integrations/bases/standard-destination-test/src/main/java/io/airbyte/integrations/standardtest/destination/DestinationAcceptanceTest.java b/airbyte-integrations/bases/standard-destination-test/src/main/java/io/airbyte/integrations/standardtest/destination/DestinationAcceptanceTest.java index 2959aab70089a..8c12d911d7700 100644 --- a/airbyte-integrations/bases/standard-destination-test/src/main/java/io/airbyte/integrations/standardtest/destination/DestinationAcceptanceTest.java +++ b/airbyte-integrations/bases/standard-destination-test/src/main/java/io/airbyte/integrations/standardtest/destination/DestinationAcceptanceTest.java @@ -496,7 +496,6 @@ public void testIncrementalSync() throws Exception { .map(record -> Jsons.deserialize(record, AirbyteMessage.class)).collect(Collectors.toList()); final JsonNode config = getConfig(); runSyncAndVerifyStateOutput(config, firstSyncMessages, configuredCatalog, false); - final List secondSyncMessages = Lists.newArrayList( new AirbyteMessage() .withType(Type.RECORD) diff --git a/airbyte-integrations/builds.md b/airbyte-integrations/builds.md index 70ce39f56dc4e..31f450fc9517b 100644 --- a/airbyte-integrations/builds.md +++ b/airbyte-integrations/builds.md @@ -72,6 +72,7 @@ | Salesforce | [![source-salesforce](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-salesforce%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-salesforce) | | Salesloft | [![source-salesloft](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-salesloft%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-salesloft) | | Sendgrid | [![source-sendgrid](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-sendgrid%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-sendgrid) | +| Sentry | [![source-sentry](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-sentry%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-sentry) | | Shopify | [![source-shopify](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-shopify%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-shopify) | | Slack | [![source-slack](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-slack%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-slack) | | Smartsheets | [![source-smartsheets](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-smartsheets%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-smartsheets) | @@ -108,6 +109,8 @@ | Local JSON | [![destination-local-json](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fdestination-local-json%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/destination-local-json) | | Mongo DB | [![destination-mongodb](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fdestination-mongodb%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/destination-mongodb) | | Postgres | [![destination-postgres](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fdestination-postgres%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/destination-postgres) | +| Pulsar | [![destination-pulsar](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fdestination-pulsar%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/destination-pulsar) | | Redshift | [![destination-redshift](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fdestination-redshift%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/destination-redshift) | | S3 | [![destination-s3](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fdestination-s3%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/destination-s3) | | Snowflake | [![destination-snowflake](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fdestination-snowflake%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/destination-snowflake) | +| Cassandra | [![destination-cassandra](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fdestination-cassandra%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/destination-cassandra) | diff --git a/airbyte-integrations/connectors/destination-bigquery-denormalized/BOOTSTRAP.md b/airbyte-integrations/connectors/destination-bigquery-denormalized/BOOTSTRAP.md new file mode 100644 index 0000000000000..edb26b327d2a6 --- /dev/null +++ b/airbyte-integrations/connectors/destination-bigquery-denormalized/BOOTSTRAP.md @@ -0,0 +1,5 @@ +# BigQuery Denormalized Destination Connector Bootstrap + +Instead of splitting the final data into multiple tables, this destination leverages BigQuery capabilities with [Structured and Repeated fields](https://cloud.google.com/bigquery/docs/nested-repeated) to produce a single "big" table per stream. This does not write the `_airbyte_raw_*` tables in the destination and normalization from this connector is not supported at this time. + +See [this](https://docs.airbyte.io/integrations/destinations/databricks) link for the nuances about the connector. \ No newline at end of file diff --git a/airbyte-integrations/connectors/destination-bigquery-denormalized/Dockerfile b/airbyte-integrations/connectors/destination-bigquery-denormalized/Dockerfile index 2ad0b213627c2..ec6426734c09c 100644 --- a/airbyte-integrations/connectors/destination-bigquery-denormalized/Dockerfile +++ b/airbyte-integrations/connectors/destination-bigquery-denormalized/Dockerfile @@ -8,5 +8,5 @@ COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar RUN tar xf ${APPLICATION}.tar --strip-components=1 -LABEL io.airbyte.version=0.1.7 +LABEL io.airbyte.version=0.1.8 LABEL io.airbyte.name=airbyte/destination-bigquery-denormalized diff --git a/airbyte-integrations/connectors/destination-bigquery-denormalized/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryDenormalizedRecordConsumer.java b/airbyte-integrations/connectors/destination-bigquery-denormalized/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryDenormalizedRecordConsumer.java index d52da1ffe77bb..19c03205692a0 100644 --- a/airbyte-integrations/connectors/destination-bigquery-denormalized/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryDenormalizedRecordConsumer.java +++ b/airbyte-integrations/connectors/destination-bigquery-denormalized/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryDenormalizedRecordConsumer.java @@ -59,6 +59,7 @@ protected JsonNode formatRecord(final Schema schema, final AirbyteRecordMessage final ObjectNode data = (ObjectNode) formatData(schema.getFields(), recordMessage.getData()); data.put(JavaBaseConstants.COLUMN_NAME_AB_ID, UUID.randomUUID().toString()); data.put(JavaBaseConstants.COLUMN_NAME_EMITTED_AT, formattedEmittedAt); + return data; } @@ -67,6 +68,10 @@ protected JsonNode formatData(final FieldList fields, final JsonNode root) { if (fields == null) { return root; } + List dateTimeFields = BigQueryUtils.getDateTimeFieldsFromSchema(fields); + if (!dateTimeFields.isEmpty()) { + BigQueryUtils.transformJsonDateTimeToBigDataFormat(dateTimeFields, (ObjectNode) root); + } if (root.isObject()) { final List fieldNames = fields.stream().map(Field::getName).collect(Collectors.toList()); return Jsons.jsonNode(Jsons.keys(root).stream() diff --git a/airbyte-integrations/connectors/destination-bigquery-denormalized/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryDenormalizedDestinationTest.java b/airbyte-integrations/connectors/destination-bigquery-denormalized/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryDenormalizedDestinationTest.java index aa9026098504a..4ffcd7ccc5b76 100644 --- a/airbyte-integrations/connectors/destination-bigquery-denormalized/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryDenormalizedDestinationTest.java +++ b/airbyte-integrations/connectors/destination-bigquery-denormalized/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryDenormalizedDestinationTest.java @@ -4,6 +4,7 @@ package io.airbyte.integrations.destination.bigquery; +import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestDataUtils.*; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.params.provider.Arguments.arguments; @@ -42,6 +43,7 @@ import java.util.stream.Collectors; import java.util.stream.Stream; import java.util.stream.StreamSupport; +import org.joda.time.DateTime; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -73,6 +75,10 @@ class BigQueryDenormalizedDestinationTest { .withRecord(new AirbyteRecordMessage().withStream(USERS_STREAM_NAME) .withData(getDataWithFormats()) .withEmittedAt(NOW.toEpochMilli())); + private static final AirbyteMessage MESSAGE_USERS4 = new AirbyteMessage().withType(AirbyteMessage.Type.RECORD) + .withRecord(new AirbyteRecordMessage().withStream(USERS_STREAM_NAME) + .withData(getDataWithJSONDateTimeFormats()) + .withEmittedAt(NOW.toEpochMilli())); private JsonNode config; @@ -109,6 +115,7 @@ void setup(final TestInfo info) throws IOException { MESSAGE_USERS1.getRecord().setNamespace(datasetId); MESSAGE_USERS2.getRecord().setNamespace(datasetId); MESSAGE_USERS3.getRecord().setNamespace(datasetId); + MESSAGE_USERS4.getRecord().setNamespace(datasetId); final DatasetInfo datasetInfo = DatasetInfo.newBuilder(datasetId).setLocation(datasetLocation).build(); dataset = bigquery.create(datasetInfo); @@ -199,7 +206,7 @@ void testWriteWithFormat() throws Exception { // Bigquery's datetime type accepts multiple input format but always outputs the same, so we can't // expect to receive the value we sent. - assertEquals(extractJsonValues(resultJson, "updated_at"), Set.of("2018-08-19T12:11:35.220")); + assertEquals(extractJsonValues(resultJson, "updated_at"), Set.of("2021-10-11T06:36:53")); final Schema expectedSchema = Schema.of( Field.of("name", StandardSQLTypeName.STRING), @@ -211,6 +218,30 @@ void testWriteWithFormat() throws Exception { assertEquals(BigQueryUtils.getTableDefinition(bigquery, dataset.getDatasetId().getDataset(), USERS_STREAM_NAME).getSchema(), expectedSchema); } + @Test + void testIfJSONDateTimeWasConvertedToBigQueryFormat() throws Exception { + catalog = new ConfiguredAirbyteCatalog().withStreams(Lists.newArrayList(new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(USERS_STREAM_NAME).withNamespace(datasetId).withJsonSchema(getSchemaWithDateTime())) + .withSyncMode(SyncMode.FULL_REFRESH).withDestinationSyncMode(DestinationSyncMode.OVERWRITE))); + + final BigQueryDestination destination = new BigQueryDenormalizedDestination(); + final AirbyteMessageConsumer consumer = destination.getConsumer(config, catalog, Destination::defaultOutputRecordCollector); + + consumer.accept(MESSAGE_USERS4); + consumer.close(); + + final List usersActual = retrieveRecordsAsJson(USERS_STREAM_NAME); + assertEquals(usersActual.size(), 1); + final JsonNode resultJson = usersActual.get(0); + + // BigQuery Accepts "YYYY-MM-DD HH:MM:SS[.SSSSSS]" format + // returns "yyyy-MM-dd'T'HH:mm:ss" format + assertEquals(Set.of(new DateTime("2021-10-11T06:36:53+00:00").toString("yyyy-MM-dd'T'HH:mm:ss")), extractJsonValues(resultJson, "updated_at")); + // check nested datetime + assertEquals(Set.of(new DateTime("2021-11-11T06:36:53+00:00").toString("yyyy-MM-dd'T'HH:mm:ss")), + extractJsonValues(resultJson.get("items"), "nested_datetime")); + } + private Set extractJsonValues(final JsonNode node, final String attributeName) { final List valuesNode = node.findValues(attributeName); final Set resultSet = new HashSet<>(); @@ -233,7 +264,6 @@ private List retrieveRecordsAsJson(final String tableName) throws Exce .newBuilder( String.format("select TO_JSON_STRING(t) as jsonValue from %s.%s t;", dataset.getDatasetId().getDataset(), tableName.toLowerCase())) .setUseLegacySql(false).build(); - BigQueryUtils.executeQuery(bigquery, queryConfig); return StreamSupport @@ -250,170 +280,4 @@ private static Stream schemaAndDataProvider() { arguments(getSchema(), MESSAGE_USERS2)); } - private static JsonNode getSchema() { - return Jsons.deserialize( - "{\n" - + " \"type\": [\n" - + " \"object\"\n" - + " ],\n" - + " \"properties\": {\n" - + " \"name\": {\n" - + " \"type\": [\n" - + " \"string\"\n" - + " ]\n" - + " },\n" - + " \"permissions\": {\n" - + " \"type\": [\n" - + " \"array\"\n" - + " ],\n" - + " \"items\": {\n" - + " \"type\": [\n" - + " \"object\"\n" - + " ],\n" - + " \"properties\": {\n" - + " \"domain\": {\n" - + " \"type\": [\n" - + " \"string\"\n" - + " ]\n" - + " },\n" - + " \"grants\": {\n" - + " \"type\": [\n" - + " \"array\"\n" - + " ],\n" - + " \"items\": {\n" - + " \"type\": [\n" - + " \"string\"\n" - + " ]\n" - + " }\n" - + " }\n" - + " }\n" - + " }\n" - + " }\n" - + " }\n" - + "}"); - - } - - private static JsonNode getSchemaWithFormats() { - return Jsons.deserialize( - "{\n" - + " \"type\": [\n" - + " \"object\"\n" - + " ],\n" - + " \"properties\": {\n" - + " \"name\": {\n" - + " \"type\": [\n" - + " \"string\"\n" - + " ]\n" - + " },\n" - + " \"date_of_birth\": {\n" - + " \"type\": [\n" - + " \"string\"\n" - + " ],\n" - + " \"format\": \"date\"\n" - + " },\n" - + " \"updated_at\": {\n" - + " \"type\": [\n" - + " \"string\"\n" - + " ],\n" - + " \"format\": \"date-time\"\n" - + " }\n" - + " }\n" - + "}"); - } - - private static JsonNode getSchemaWithInvalidArrayType() { - return Jsons.deserialize( - "{\n" - + " \"type\": [\n" - + " \"object\"\n" - + " ],\n" - + " \"properties\": {\n" - + " \"name\": {\n" - + " \"type\": [\n" - + " \"string\"\n" - + " ]\n" - + " },\n" - + " \"permissions\": {\n" - + " \"type\": [\n" - + " \"array\"\n" - + " ],\n" - + " \"items\": {\n" - + " \"type\": [\n" - + " \"object\"\n" - + " ],\n" - + " \"properties\": {\n" - + " \"domain\": {\n" - + " \"type\": [\n" - + " \"string\"\n" - + " ]\n" - + " },\n" - + " \"grants\": {\n" - + " \"type\": [\n" - + " \"array\"\n" // missed "items" element - + " ]\n" - + " }\n" - + " }\n" - + " }\n" - + " }\n" - + " }\n" - + "}"); - - } - - private static JsonNode getData() { - return Jsons.deserialize( - "{\n" - + " \"name\": \"Andrii\",\n" - + " \"permissions\": [\n" - + " {\n" - + " \"domain\": \"abs\",\n" - + " \"grants\": [\n" - + " \"admin\"\n" - + " ]\n" - + " },\n" - + " {\n" - + " \"domain\": \"tools\",\n" - + " \"grants\": [\n" - + " \"read\", \"write\"\n" - + " ]\n" - + " }\n" - + " ]\n" - + "}"); - } - - private static JsonNode getDataWithFormats() { - return Jsons.deserialize( - "{\n" - + " \"name\": \"Andrii\",\n" - + " \"date_of_birth\": \"1996-01-25\",\n" - + " \"updated_at\": \"2018-08-19 12:11:35.22\"\n" - + "}"); - } - - private static JsonNode getDataWithEmptyObjectAndArray() { - return Jsons.deserialize( - "{\n" - + " \"name\": \"Andrii\",\n" - + " \"permissions\": [\n" - + " {\n" - + " \"domain\": \"abs\",\n" - + " \"items\": {},\n" // empty object - + " \"grants\": [\n" - + " \"admin\"\n" - + " ]\n" - + " },\n" - + " {\n" - + " \"domain\": \"tools\",\n" - + " \"grants\": [],\n" // empty array - + " \"items\": {\n" // object with empty array and object - + " \"object\": {},\n" - + " \"array\": []\n" - + " }\n" - + " }\n" - + " ]\n" - + "}"); - - } - } diff --git a/airbyte-integrations/connectors/destination-bigquery-denormalized/src/test-integration/java/io/airbyte/integrations/destination/bigquery/util/BigQueryDenormalizedTestDataUtils.java b/airbyte-integrations/connectors/destination-bigquery-denormalized/src/test-integration/java/io/airbyte/integrations/destination/bigquery/util/BigQueryDenormalizedTestDataUtils.java new file mode 100644 index 0000000000000..2c1fbea15e8e9 --- /dev/null +++ b/airbyte-integrations/connectors/destination-bigquery-denormalized/src/test-integration/java/io/airbyte/integrations/destination/bigquery/util/BigQueryDenormalizedTestDataUtils.java @@ -0,0 +1,229 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.bigquery.util; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.commons.json.Jsons; + +public class BigQueryDenormalizedTestDataUtils { + + public static JsonNode getSchema() { + return Jsons.deserialize( + "{\n" + + " \"type\": [\n" + + " \"object\"\n" + + " ],\n" + + " \"properties\": {\n" + + " \"accepts_marketing_updated_at\": {\n" + + " \"type\": [\n" + + " \"null\",\n" + + " \"string\"\n" + + " ],\n" + + " \"format\": \"date-time\"\n" + + " },\n" + + " \"name\": {\n" + + " \"type\": [\n" + + " \"string\"\n" + + " ]\n" + + " },\n" + + " \"permissions\": {\n" + + " \"type\": [\n" + + " \"array\"\n" + + " ],\n" + + " \"items\": {\n" + + " \"type\": [\n" + + " \"object\"\n" + + " ],\n" + + " \"properties\": {\n" + + " \"domain\": {\n" + + " \"type\": [\n" + + " \"string\"\n" + + " ]\n" + + " },\n" + + " \"grants\": {\n" + + " \"type\": [\n" + + " \"array\"\n" + + " ],\n" + + " \"items\": {\n" + + " \"type\": [\n" + + " \"string\"\n" + + " ]\n" + + " }\n" + + " }\n" + + " }\n" + + " }\n" + + " }\n" + + " }\n" + + "}"); + + } + + public static JsonNode getSchemaWithFormats() { + return Jsons.deserialize( + "{\n" + + " \"type\": [\n" + + " \"object\"\n" + + " ],\n" + + " \"properties\": {\n" + + " \"name\": {\n" + + " \"type\": [\n" + + " \"string\"\n" + + " ]\n" + + " },\n" + + " \"date_of_birth\": {\n" + + " \"type\": [\n" + + " \"string\"\n" + + " ],\n" + + " \"format\": \"date\"\n" + + " },\n" + + " \"updated_at\": {\n" + + " \"type\": [\n" + + " \"string\"\n" + + " ],\n" + + " \"format\": \"date-time\"\n" + + " }\n" + + " }\n" + + "}"); + } + + public static JsonNode getSchemaWithDateTime() { + return Jsons.deserialize( + "{\n" + + " \"type\": [\n" + + " \"object\"\n" + + " ],\n" + + " \"properties\": {\n" + + " " + + + "\"updated_at\": {\n" + + " \"type\": [\n" + + " \"string\"\n" + + " ],\n" + + " \"format\": \"date-time\"\n" + + " },\n" + + " \"items\": {\n" + + " \"type\": [\n" + + " \"object\"\n" + + " ],\n" + + " \"properties\": {\n" + + " \"nested_datetime\": {\n" + + " \"type\": [\n" + + " \"string\"\n" + + " ],\n" + + " \"format\": \"date-time\"\n" + + " }\n" + + + " " + + "}\n" + + " }\n" + + " }\n" + + "}"); + } + + public static JsonNode getSchemaWithInvalidArrayType() { + return Jsons.deserialize( + "{\n" + + " \"type\": [\n" + + " \"object\"\n" + + " ],\n" + + " \"properties\": {\n" + + " \"name\": {\n" + + " \"type\": [\n" + + " \"string\"\n" + + " ]\n" + + " },\n" + + " \"permissions\": {\n" + + " \"type\": [\n" + + " \"array\"\n" + + " ],\n" + + " \"items\": {\n" + + " \"type\": [\n" + + " \"object\"\n" + + " ],\n" + + " \"properties\": {\n" + + " \"domain\": {\n" + + " \"type\": [\n" + + " \"string\"\n" + + " ]\n" + + " },\n" + + " \"grants\": {\n" + + " \"type\": [\n" + + " \"array\"\n" // missed "items" element + + " ]\n" + + " }\n" + + " }\n" + + " }\n" + + " }\n" + + " }\n" + + "}"); + + } + + public static JsonNode getData() { + return Jsons.deserialize( + "{\n" + + " \"name\": \"Andrii\",\n" + + " \"accepts_marketing_updated_at\": \"2021-10-11T06:36:53-07:00\",\n" + + " \"permissions\": [\n" + + " {\n" + + " \"domain\": \"abs\",\n" + + " \"grants\": [\n" + + " \"admin\"\n" + + " ]\n" + + " },\n" + + " {\n" + + " \"domain\": \"tools\",\n" + + " \"grants\": [\n" + + " \"read\", \"write\"\n" + + " ]\n" + + " }\n" + + " ]\n" + + "}"); + } + + public static JsonNode getDataWithFormats() { + return Jsons.deserialize( + "{\n" + + " \"name\": \"Andrii\",\n" + + " \"date_of_birth\": \"1996-01-25\",\n" + + " \"updated_at\": \"2021-10-11T06:36:53\"\n" + + "}"); + } + + public static JsonNode getDataWithJSONDateTimeFormats() { + return Jsons.deserialize( + "{\n" + + " \"updated_at\": \"2021-10-11T06:36:53+00:00\",\n" + + " \"items\": {\n" + + " \"nested_datetime\": \"2021-11-11T06:36:53+00:00\"\n" + + " }\n" + + "}"); + } + + public static JsonNode getDataWithEmptyObjectAndArray() { + return Jsons.deserialize( + "{\n" + + " \"name\": \"Andrii\",\n" + + " \"permissions\": [\n" + + " {\n" + + " \"domain\": \"abs\",\n" + + " \"items\": {},\n" // empty object + + " \"grants\": [\n" + + " \"admin\"\n" + + " ]\n" + + " },\n" + + " {\n" + + " \"domain\": \"tools\",\n" + + " \"grants\": [],\n" // empty array + + " \"items\": {\n" // object with empty array and object + + " \"object\": {},\n" + + " \"array\": []\n" + + " }\n" + + " }\n" + + " ]\n" + + "}"); + } + +} diff --git a/airbyte-integrations/connectors/destination-bigquery/BOOTSTRAP.md b/airbyte-integrations/connectors/destination-bigquery/BOOTSTRAP.md new file mode 100644 index 0000000000000..9a5d31b122345 --- /dev/null +++ b/airbyte-integrations/connectors/destination-bigquery/BOOTSTRAP.md @@ -0,0 +1,8 @@ +# BigQuery Destination Connector Bootstrap + +BigQuery is a serverless, highly scalable, and cost-effective data warehouse +offered by Google Cloud Provider. + +BigQuery connector is producing the standard Airbyte outputs using a `_airbyte_raw_*` tables storing the JSON blob data first. Afterward, these are transformed and normalized into separate tables, potentially "exploding" nested streams into their own tables if [basic normalization](https://docs.airbyte.io/understanding-airbyte/basic-normalization) is configured. + +See [this](https://docs.airbyte.io/integrations/destinations/bigquery) link for more information about the connector. diff --git a/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryUtils.java b/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryUtils.java index d5fc8a397cb07..96fd7d4d6c2a3 100644 --- a/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryUtils.java +++ b/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryUtils.java @@ -5,16 +5,21 @@ package io.airbyte.integrations.destination.bigquery; import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; import com.google.cloud.bigquery.BigQuery; import com.google.cloud.bigquery.BigQueryException; import com.google.cloud.bigquery.Clustering; import com.google.cloud.bigquery.Dataset; import com.google.cloud.bigquery.DatasetInfo; +import com.google.cloud.bigquery.Field; +import com.google.cloud.bigquery.FieldList; import com.google.cloud.bigquery.Job; import com.google.cloud.bigquery.JobId; import com.google.cloud.bigquery.JobInfo; import com.google.cloud.bigquery.QueryJobConfiguration; +import com.google.cloud.bigquery.QueryParameterValue; import com.google.cloud.bigquery.Schema; +import com.google.cloud.bigquery.StandardSQLTypeName; import com.google.cloud.bigquery.StandardTableDefinition; import com.google.cloud.bigquery.TableDefinition; import com.google.cloud.bigquery.TableId; @@ -24,15 +29,19 @@ import com.google.common.collect.ImmutableMap; import io.airbyte.commons.json.Jsons; import io.airbyte.integrations.base.JavaBaseConstants; +import java.util.ArrayList; +import java.util.List; import java.util.Set; import java.util.UUID; import org.apache.commons.lang3.tuple.ImmutablePair; +import org.joda.time.DateTime; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class BigQueryUtils { private static final Logger LOGGER = LoggerFactory.getLogger(BigQueryUtils.class); + private static final String BIG_QUERY_DATETIME_FORMAT = "yyyy-MM-dd HH:mm:ss.SSSSSS"; static ImmutablePair executeQuery(final BigQuery bigquery, final QueryJobConfiguration queryConfig) { final JobId jobId = JobId.of(UUID.randomUUID().toString()); @@ -143,4 +152,42 @@ static TableDefinition getTableDefinition(final BigQuery bigquery, final String return bigquery.getTable(tableId).getDefinition(); } + /** + * @param fieldList - the list to be checked + * @return The list of fields with datetime format. + * + */ + public static List getDateTimeFieldsFromSchema(FieldList fieldList) { + List dateTimeFields = new ArrayList<>(); + for (Field field : fieldList) { + if (field.getType().getStandardType().equals(StandardSQLTypeName.DATETIME)) { + dateTimeFields.add(field.getName()); + } + } + return dateTimeFields; + } + + /** + * @param dateTimeFields - list contains fields of DATETIME format + * @param data - Json will be sent to Google BigData service + * + * The special DATETIME format is required to save this type to BigQuery. + * @see Supported + * Google bigquery datatype This method is responsible to adapt JSON DATETIME to Bigquery + */ + public static void transformJsonDateTimeToBigDataFormat(List dateTimeFields, ObjectNode data) { + dateTimeFields.forEach(e -> { + if (data.findValue(e) != null && !data.get(e).isNull()) { + String googleBigQueryDateFormat = QueryParameterValue + .dateTime(new DateTime(data + .findValue(e) + .asText()) + .toString(BIG_QUERY_DATETIME_FORMAT)) + .getValue(); + data.put(e, googleBigQueryDateFormat); + } + }); + } + } diff --git a/airbyte-integrations/connectors/destination-cassandra/.dockerignore b/airbyte-integrations/connectors/destination-cassandra/.dockerignore new file mode 100644 index 0000000000000..65c7d0ad3e73c --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/.dockerignore @@ -0,0 +1,3 @@ +* +!Dockerfile +!build diff --git a/airbyte-integrations/connectors/destination-cassandra/Dockerfile b/airbyte-integrations/connectors/destination-cassandra/Dockerfile new file mode 100644 index 0000000000000..197bb25ec9fc4 --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/Dockerfile @@ -0,0 +1,11 @@ +FROM airbyte/integration-base-java:dev + +WORKDIR /airbyte +ENV APPLICATION destination-cassandra + +COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar + +RUN tar xf ${APPLICATION}.tar --strip-components=1 + +LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.name=airbyte/destination-cassandra diff --git a/airbyte-integrations/connectors/destination-cassandra/README.md b/airbyte-integrations/connectors/destination-cassandra/README.md new file mode 100644 index 0000000000000..5e5237291eabe --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/README.md @@ -0,0 +1,68 @@ +# Destination Cassandra + +This is the repository for the Cassandra destination connector in Java. +For information about how to use this connector within Airbyte, see [the User Documentation](https://docs.airbyte.io/integrations/destinations/cassandra). + +## Local development + +#### Building via Gradle +From the Airbyte repository root, run: +``` +./gradlew :airbyte-integrations:connectors:destination-cassandra:build +``` + +#### Create credentials +**If you are a community contributor**, generate the necessary credentials and place them in `secrets/config.json` conforming to the spec file in `src/main/resources/spec.json`. +Note that the `secrets` directory is git-ignored by default, so there is no danger of accidentally checking in sensitive information. + +**If you are an Airbyte core member**, follow the [instructions](https://docs.airbyte.io/connector-development#using-credentials-in-ci) to set up the credentials. + +### Locally running the connector docker image + +#### Build +Build the connector image via Gradle: +``` +./gradlew :airbyte-integrations:connectors:destination-cassandra:airbyteDocker +``` +When building via Gradle, the docker image name and tag, respectively, are the values of the `io.airbyte.name` and `io.airbyte.version` `LABEL`s in +the Dockerfile. + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/destination-cassandra:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-cassandra:dev check --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-cassandra:dev discover --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/destination-cassandra:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json +``` + +## Testing +We use `JUnit` for Java tests. + +### Unit and Integration Tests +Place unit tests under `src/test/io/airbyte/integrations/destinations/cassandra`. + +#### Acceptance Tests +Airbyte has a standard test suite that all destination connectors must pass. Implement the `TODO`s in +`src/test-integration/java/io/airbyte/integrations/destinations/cassandraDestinationAcceptanceTest.java`. + +### Using gradle to run tests +All commands should be run from airbyte project root. +To run unit tests: +``` +./gradlew :airbyte-integrations:connectors:destination-cassandra:unitTest +``` +To run acceptance and custom integration tests: +``` +./gradlew :airbyte-integrations:connectors:destination-cassandra:integrationTest +``` + +## Dependency Management + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing unit and integration tests. +1. Bump the connector version in `Dockerfile` -- just increment the value of the `LABEL io.airbyte.version` appropriately (we use [SemVer](https://semver.org/)). +1. Create a Pull Request. +1. Pat yourself on the back for being an awesome contributor. +1. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. diff --git a/airbyte-integrations/connectors/destination-cassandra/bootstrap.md b/airbyte-integrations/connectors/destination-cassandra/bootstrap.md new file mode 100644 index 0000000000000..35c19425c395f --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/bootstrap.md @@ -0,0 +1,30 @@ +# Cassandra Destination + +Cassandra is a free and open-source, distributed, wide-column store, NoSQL database management system designed to handle +large amounts of data across many commodity servers, providing high availability with no single point of failure + +The data is structured in keyspaces and tables and is partitioned and replicated across different nodes in the +cluster. +[Read more about Cassandra](https://cassandra.apache.org/_/index.html) + +This connector maps an incoming `stream` to a Cassandra `table` and a `namespace` to a Cassandra`keyspace`. +When using destination sync mode `append` and `append_dedup`, an `insert` operation is performed against an existing +Cassandra table. +When using `overwrite`, the records are first placed in a temp table. When all the messages have been received the data +is copied to the final table which is first truncated and the temp table is deleted. + +The Implementation uses the [Datastax](https://github.com/datastax/java-driver) driver in order to access +Cassandra. [CassandraCqlProvider](./src/main/java/io/airbyte/integrations/destination/cassandra/CassandraCqlProvider.java) +handles the communication with the Cassandra cluster and internally it uses +the [SessionManager](./src/main/java/io/airbyte/integrations/destination/cassandra/SessionManager.java) to retrieve a +CqlSession to the cluster. + +The [CassandraMessageConsumer](./src/main/java/io/airbyte/integrations/destination/cassandra/CassandraMessageConsumer.java) +class contains the logic for handling airbyte messages, events and copying data between tables. + +## Development + +See the [CassandraCqlProvider](./src/main/java/io/airbyte/integrations/destination/cassandra/CassandraCqlProvider.java) +class on how to use the datastax driver. + +[Datastax docs.](https://docs.datastax.com/en/developer/java-driver/3.0/) \ No newline at end of file diff --git a/airbyte-integrations/connectors/destination-cassandra/build.gradle b/airbyte-integrations/connectors/destination-cassandra/build.gradle new file mode 100644 index 0000000000000..e3c4e6ce133c7 --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/build.gradle @@ -0,0 +1,33 @@ +plugins { + id 'application' + id 'airbyte-docker' + id 'airbyte-integration-test-java' +} + +application { + mainClass = 'io.airbyte.integrations.destination.cassandra.CassandraDestination' +} + +def cassandraDriver = '4.13.0' +def testContainersVersion = '1.16.0' +def assertVersion = '3.21.0' + +dependencies { + implementation project(':airbyte-config:models') + implementation project(':airbyte-protocol:models') + implementation project(':airbyte-integrations:bases:base-java') + implementation files(project(':airbyte-integrations:bases:base-java').airbyteDocker.outputs) + + implementation "com.datastax.oss:java-driver-core:${cassandraDriver}" + implementation "com.datastax.oss:java-driver-query-builder:${cassandraDriver}" + implementation "com.datastax.oss:java-driver-mapper-runtime:${cassandraDriver}" + + + // https://mvnrepository.com/artifact/org.assertj/assertj-core + testImplementation "org.assertj:assertj-core:${assertVersion}" + testImplementation "org.testcontainers:cassandra:${testContainersVersion}" + + + integrationTestJavaImplementation project(':airbyte-integrations:bases:standard-destination-test') + integrationTestJavaImplementation project(':airbyte-integrations:connectors:destination-cassandra') +} diff --git a/airbyte-integrations/connectors/destination-cassandra/docker-compose.yml b/airbyte-integrations/connectors/destination-cassandra/docker-compose.yml new file mode 100644 index 0000000000000..75090b3b59cae --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/docker-compose.yml @@ -0,0 +1,24 @@ +version: '3.7' + +services: + cassandra1: + image: cassandra:4.0 + ports: + - "9042:9042" + environment: + - "MAX_HEAP_SIZE=2048M" + - "HEAP_NEWSIZE=1024M" + - "CASSANDRA_CLUSTER_NAME=cassandra_cluster" + +# Uncomment if you want to run a Cassandra cluster +# cassandra2: +# image: cassandra:4.0 +# ports: +# - "9043:9042" +# environment: +# - "MAX_HEAP_SIZE=2048M" +# - "HEAP_NEWSIZE=1024M" +# - "CASSANDRA_SEEDS=cassandra1" +# - "CASSANDRA_CLUSTER_NAME=cassandra_cluster" +# depends_on: +# - cassandra1 \ No newline at end of file diff --git a/airbyte-integrations/connectors/destination-cassandra/sample_secrets/config.json b/airbyte-integrations/connectors/destination-cassandra/sample_secrets/config.json new file mode 100644 index 0000000000000..644fd54c1ab92 --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/sample_secrets/config.json @@ -0,0 +1,4 @@ +{ + "username": "paste-username-here", + "password": "paste-password-here" +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraConfig.java b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraConfig.java new file mode 100644 index 0000000000000..62a80f3b78036 --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraConfig.java @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.cassandra; + +import com.fasterxml.jackson.databind.JsonNode; +import java.util.Objects; + +/* + * Immutable configuration class for storing cassandra related config. + */ +class CassandraConfig { + + private final String keyspace; + + private final String username; + + private final String password; + + private final String address; + + private final int port; + + private final String datacenter; + + private final int replication; + + public CassandraConfig(String keyspace, + String username, + String password, + String address, + int port, + String datacenter, + int replication) { + this.keyspace = keyspace; + this.username = username; + this.password = password; + this.address = address; + this.port = port; + this.datacenter = datacenter; + this.replication = replication; + } + + public CassandraConfig(JsonNode config) { + this.keyspace = config.get("keyspace").asText(); + this.username = config.get("username").asText(); + this.password = config.get("password").asText(); + this.address = config.get("address").asText(); + this.port = config.get("port").asInt(9042); + this.datacenter = config.get("datacenter").asText("datacenter1"); + this.replication = config.get("replication").asInt(1); + } + + public String getKeyspace() { + return keyspace; + } + + public String getUsername() { + return username; + } + + public String getPassword() { + return password; + } + + public String getAddress() { + return address; + } + + public int getPort() { + return port; + } + + public String getDatacenter() { + return datacenter; + } + + public int getReplication() { + return replication; + } + + @Override + public String toString() { + return "CassandraConfig{" + + "keyspace='" + keyspace + '\'' + + ", username='" + username + '\'' + + ", password='" + password + '\'' + + ", address='" + address + '\'' + + ", port=" + port + + ", datacenter='" + datacenter + '\'' + + ", replication=" + replication + + '}'; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + CassandraConfig that = (CassandraConfig) o; + return port == that.port && username.equals(that.username) && password.equals(that.password) && + address.equals(that.address) && datacenter.equals(that.datacenter); + } + + @Override + public int hashCode() { + return Objects.hash(username, password, address, port, datacenter); + } + +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraCqlProvider.java b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraCqlProvider.java new file mode 100644 index 0000000000000..19ed9d1267c93 --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraCqlProvider.java @@ -0,0 +1,179 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.cassandra; + +import static com.datastax.oss.driver.api.querybuilder.QueryBuilder.now; + +import com.datastax.oss.driver.api.core.CqlSession; +import com.datastax.oss.driver.api.core.cql.BatchStatement; +import com.datastax.oss.driver.api.core.cql.BatchType; +import com.datastax.oss.driver.api.core.cql.BoundStatement; +import com.datastax.oss.driver.api.core.cql.PreparedStatement; +import com.datastax.oss.driver.api.core.metadata.TokenMap; +import com.datastax.oss.driver.api.core.type.DataTypes; +import com.datastax.oss.driver.api.core.uuid.Uuids; +import com.datastax.oss.driver.api.querybuilder.QueryBuilder; +import com.datastax.oss.driver.api.querybuilder.SchemaBuilder; +import io.airbyte.integrations.base.JavaBaseConstants; +import java.io.Closeable; +import java.time.Instant; +import java.util.List; +import java.util.UUID; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.stream.Collectors; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +class CassandraCqlProvider implements Closeable { + + private static final Logger LOGGER = LoggerFactory.getLogger(CassandraCqlProvider.class); + + private static final int N_THREADS = Runtime.getRuntime().availableProcessors(); + + private final ExecutorService executorService; + + private final CqlSession cqlSession; + + private final CassandraConfig cassandraConfig; + + private final String columnId; + + private final String columnData; + + private final String columnTimestamp; + + public CassandraCqlProvider(CassandraConfig cassandraConfig) { + this.cassandraConfig = cassandraConfig; + this.cqlSession = SessionManager.initSession(cassandraConfig); + var nameTransformer = new CassandraNameTransformer(cassandraConfig); + this.columnId = nameTransformer.outputColumn(JavaBaseConstants.COLUMN_NAME_AB_ID); + this.columnData = nameTransformer.outputColumn(JavaBaseConstants.COLUMN_NAME_DATA); + this.columnTimestamp = nameTransformer.outputColumn(JavaBaseConstants.COLUMN_NAME_EMITTED_AT); + this.executorService = Executors.newFixedThreadPool(N_THREADS); + } + + public void createKeySpaceIfNotExists(String keyspace, int replicationFactor) { + var query = SchemaBuilder.createKeyspace(keyspace) + .ifNotExists() + .withSimpleStrategy(replicationFactor) + .build(); + cqlSession.execute(query); + } + + public void createTableIfNotExists(String keyspace, String tableName) { + var query = SchemaBuilder.createTable(keyspace, tableName) + .ifNotExists() + .withPartitionKey(columnId, DataTypes.UUID) + .withColumn(columnData, DataTypes.TEXT) + .withColumn(columnTimestamp, DataTypes.TIMESTAMP) + .build(); + cqlSession.execute(query); + } + + public void dropTableIfExists(String keyspace, String tableName) { + var query = SchemaBuilder.dropTable(keyspace, tableName) + .ifExists() + .build(); + cqlSession.execute(query); + } + + public void insert(String keyspace, String tableName, String jsonData) { + var query = QueryBuilder.insertInto(keyspace, tableName) + .value(columnId, QueryBuilder.literal(Uuids.random())) + .value(columnData, QueryBuilder.literal(jsonData)) + .value(columnTimestamp, QueryBuilder.toTimestamp(now())) + .build(); + cqlSession.execute(query); + } + + public void truncate(String keyspace, String tableName) { + var query = QueryBuilder.truncate(keyspace, tableName).build(); + cqlSession.execute(query); + } + + public List select(String keyspace, String tableName) { + var query = QueryBuilder.selectFrom(keyspace, tableName) + .columns(columnId, columnData, columnTimestamp) + .build(); + return cqlSession.execute(query) + .map(result -> new CassandraRecord( + result.get(columnId, UUID.class), + result.get(columnData, String.class), + result.get(columnTimestamp, Instant.class))) + .all(); + } + + public List>> retrieveMetadata() { + return cqlSession.getMetadata().getKeyspaces().values().stream() + .map(keyspace -> Tuple.of(keyspace.getName().toString(), keyspace.getTables().values() + .stream() + .map(table -> table.getName().toString()) + .collect(Collectors.toList()))) + .collect(Collectors.toList()); + } + + public void copy(String keyspace, String sourceTable, String destinationTable) { + var select = String.format("SELECT * FROM %s.%s WHERE token(%s) > ? AND token(%s) <= ?", + keyspace, sourceTable, columnId, columnId); + + var selectStatement = cqlSession.prepare(select); + + var insert = String.format("INSERT INTO %s.%s (%s, %s, %s) VALUES (?, ?, ?)", + keyspace, destinationTable, columnId, columnData, columnTimestamp); + + var insertStatement = cqlSession.prepare(insert); + + // perform full table scan in parallel using token ranges + // optimal for copying large amounts of data + cqlSession.getMetadata().getTokenMap() + .map(TokenMap::getTokenRanges) + .orElseThrow(IllegalStateException::new) + .stream() + .flatMap(range -> range.unwrap().stream()) + .map(range -> selectStatement.bind(range.getStart(), range.getEnd())) + // explore datastax 4.x async api as an alternative for async processing + .map(selectBoundStatement -> executorService.submit(() -> batchInsert(selectBoundStatement, insertStatement))) + .forEach(this::awaitThread); + + } + + private void batchInsert(BoundStatement select, PreparedStatement insert) { + // unlogged removes the log record for increased insert speed + var batchStatement = BatchStatement.builder(BatchType.UNLOGGED); + + cqlSession.execute(select).all().stream() + .map(r -> CassandraRecord.of( + r.get(columnId, UUID.class), + r.get(columnData, String.class), + r.get(columnTimestamp, Instant.class))) + .map(r -> insert.bind(r.getId(), r.getData(), r.getTimestamp())) + .forEach(batchStatement::addStatement); + + cqlSession.execute(batchStatement.build()); + } + + private void awaitThread(Future future) { + try { + future.get(); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + LOGGER.error("Interrupted thread while copying data with reason: ", e); + } catch (ExecutionException e) { + LOGGER.error("Error while copying data with reason: ", e); + } + } + + @Override + public void close() { + // wait for tasks completion and terminate executor gracefully + executorService.shutdown(); + // close cassandra session for the given config + SessionManager.closeSession(cassandraConfig); + } + +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraDestination.java b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraDestination.java new file mode 100644 index 0000000000000..90fb821b7477a --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraDestination.java @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.cassandra; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.integrations.BaseConnector; +import io.airbyte.integrations.base.AirbyteMessageConsumer; +import io.airbyte.integrations.base.Destination; +import io.airbyte.integrations.base.IntegrationRunner; +import io.airbyte.protocol.models.AirbyteConnectionStatus; +import io.airbyte.protocol.models.AirbyteMessage; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import java.util.UUID; +import java.util.function.Consumer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +class CassandraDestination extends BaseConnector implements Destination { + + private static final Logger LOGGER = LoggerFactory.getLogger(CassandraDestination.class); + + public static void main(String[] args) throws Exception { + new IntegrationRunner(new CassandraDestination()).run(args); + } + + @Override + public AirbyteConnectionStatus check(JsonNode config) { + var cassandraConfig = new CassandraConfig(config); + // add random uuid to avoid conflicts with existing tables. + String tableName = "table_" + UUID.randomUUID().toString().replace("-", ""); + CassandraCqlProvider cassandraCqlProvider = null; + try { + cassandraCqlProvider = new CassandraCqlProvider(cassandraConfig); + // check connection and write permissions + cassandraCqlProvider.createKeySpaceIfNotExists(cassandraConfig.getKeyspace(), + cassandraConfig.getReplication()); + cassandraCqlProvider.createTableIfNotExists(cassandraConfig.getKeyspace(), tableName); + cassandraCqlProvider.insert(cassandraConfig.getKeyspace(), tableName, "{}"); + return new AirbyteConnectionStatus().withStatus(AirbyteConnectionStatus.Status.SUCCEEDED); + } catch (Exception e) { + LOGGER.error("Can't establish Cassandra connection with reason: ", e); + return new AirbyteConnectionStatus().withStatus(AirbyteConnectionStatus.Status.FAILED); + } finally { + if (cassandraCqlProvider != null) { + try { + cassandraCqlProvider.dropTableIfExists(cassandraConfig.getKeyspace(), tableName); + } catch (Exception e) { + LOGGER.error("Error while deleting temp table {} with reason: ", tableName, e); + } + cassandraCqlProvider.close(); + } + } + } + + @Override + public AirbyteMessageConsumer getConsumer(JsonNode config, + ConfiguredAirbyteCatalog configuredCatalog, + Consumer outputRecordCollector) { + return new CassandraMessageConsumer(new CassandraConfig(config), configuredCatalog, outputRecordCollector); + } + +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraMessageConsumer.java b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraMessageConsumer.java new file mode 100644 index 0000000000000..5eb6f8b9f0036 --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraMessageConsumer.java @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.cassandra; + +import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; +import io.airbyte.integrations.base.FailureTrackingAirbyteMessageConsumer; +import io.airbyte.protocol.models.AirbyteMessage; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import java.util.Map; +import java.util.function.Consumer; +import java.util.stream.Collectors; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +class CassandraMessageConsumer extends FailureTrackingAirbyteMessageConsumer { + + private static final Logger LOGGER = LoggerFactory.getLogger(CassandraMessageConsumer.class); + + private final CassandraConfig cassandraConfig; + + private final Consumer outputRecordCollector; + + private final Map cassandraStreams; + + private final CassandraCqlProvider cassandraCqlProvider; + + private AirbyteMessage lastMessage = null; + + public CassandraMessageConsumer(CassandraConfig cassandraConfig, + ConfiguredAirbyteCatalog configuredCatalog, + Consumer outputRecordCollector) { + this.cassandraConfig = cassandraConfig; + this.outputRecordCollector = outputRecordCollector; + this.cassandraCqlProvider = new CassandraCqlProvider(cassandraConfig); + var nameTransformer = new CassandraNameTransformer(cassandraConfig); + this.cassandraStreams = configuredCatalog.getStreams().stream() + .collect(Collectors.toUnmodifiableMap( + AirbyteStreamNameNamespacePair::fromConfiguredAirbyteSteam, + k -> new CassandraStreamConfig( + nameTransformer.outputKeyspace(k.getStream().getNamespace()), + nameTransformer.outputTable(k.getStream().getName()), + nameTransformer.outputTmpTable(k.getStream().getName()), + k.getDestinationSyncMode()))); + } + + @Override + protected void startTracked() { + cassandraStreams.forEach((k, v) -> { + cassandraCqlProvider.createKeySpaceIfNotExists(v.getKeyspace(), cassandraConfig.getReplication()); + cassandraCqlProvider.createTableIfNotExists(v.getKeyspace(), v.getTempTableName()); + }); + } + + @Override + protected void acceptTracked(AirbyteMessage message) { + if (message.getType() == AirbyteMessage.Type.RECORD) { + var messageRecord = message.getRecord(); + var streamConfig = + cassandraStreams.get(AirbyteStreamNameNamespacePair.fromRecordMessage(messageRecord)); + if (streamConfig == null) { + throw new IllegalArgumentException("Unrecognized destination stream"); + } + var data = Jsons.serialize(messageRecord.getData()); + cassandraCqlProvider.insert(streamConfig.getKeyspace(), streamConfig.getTempTableName(), data); + } else if (message.getType() == AirbyteMessage.Type.STATE) { + this.lastMessage = message; + } else { + LOGGER.warn("Unsupported airbyte message type: {}", message.getType()); + } + } + + @Override + protected void close(boolean hasFailed) { + if (!hasFailed) { + cassandraStreams.forEach((k, v) -> { + try { + cassandraCqlProvider.createTableIfNotExists(v.getKeyspace(), v.getTableName()); + switch (v.getDestinationSyncMode()) { + case APPEND -> { + cassandraCqlProvider.copy(v.getKeyspace(), v.getTempTableName(), v.getTableName()); + } + case OVERWRITE -> { + cassandraCqlProvider.truncate(v.getKeyspace(), v.getTableName()); + cassandraCqlProvider.copy(v.getKeyspace(), v.getTempTableName(), v.getTableName()); + } + default -> throw new UnsupportedOperationException(); + } + } catch (Exception e) { + LOGGER.error("Error while copying data to table {}: : ", v.getTableName(), e); + } + }); + outputRecordCollector.accept(lastMessage); + } + + cassandraStreams.forEach((k, v) -> { + try { + cassandraCqlProvider.dropTableIfExists(v.getKeyspace(), v.getTempTableName()); + } catch (Exception e) { + LOGGER.error("Error while deleting temp table {} with reason: ", v.getTempTableName(), e); + } + }); + cassandraCqlProvider.close(); + + } + +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraNameTransformer.java b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraNameTransformer.java new file mode 100644 index 0000000000000..791f6bd50de8c --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraNameTransformer.java @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.cassandra; + +import com.google.common.base.CharMatcher; +import io.airbyte.commons.text.Names; +import io.airbyte.integrations.destination.StandardNameTransformer; + +class CassandraNameTransformer extends StandardNameTransformer { + + private final CassandraConfig cassandraConfig; + + public CassandraNameTransformer(CassandraConfig cassandraConfig) { + this.cassandraConfig = cassandraConfig; + } + + String outputKeyspace(String namespace) { + if (namespace == null || namespace.isBlank()) { + return cassandraConfig.getKeyspace(); + } + return CharMatcher.is('_').trimLeadingFrom(Names.toAlphanumericAndUnderscore(namespace)); + } + + String outputTable(String streamName) { + var tableName = super.getRawTableName(streamName.toLowerCase()).substring(1); + // max allowed length for a cassandra table is 48 characters + return tableName.length() > 48 ? tableName.substring(0, 48) : tableName; + } + + String outputTmpTable(String streamName) { + var tableName = super.getTmpTableName(streamName.toLowerCase()).substring(1); + // max allowed length for a cassandra table is 48 characters + return tableName.length() > 48 ? tableName.substring(0, 48) : tableName; + } + + String outputColumn(String columnName) { + return Names.doubleQuote(columnName.toLowerCase()); + } + +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraRecord.java b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraRecord.java new file mode 100644 index 0000000000000..b30ef015d8378 --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraRecord.java @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.cassandra; + +import java.time.Instant; +import java.util.UUID; + +class CassandraRecord { + + private final UUID id; + + private final String data; + + private final Instant timestamp; + + public CassandraRecord(UUID id, String data, Instant timestamp) { + this.id = id; + this.data = data; + this.timestamp = timestamp; + } + + static CassandraRecord of(UUID id, String data, Instant timestamp) { + return new CassandraRecord(id, data, timestamp); + } + + public UUID getId() { + return id; + } + + public String getData() { + return data; + } + + public Instant getTimestamp() { + return timestamp; + } + + @Override + public String toString() { + return "CassandraRecord{" + + "id=" + id + + ", data='" + data + '\'' + + ", timestamp=" + timestamp + + '}'; + } + +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraStreamConfig.java b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraStreamConfig.java new file mode 100644 index 0000000000000..88a6334227985 --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraStreamConfig.java @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.cassandra; + +import io.airbyte.protocol.models.DestinationSyncMode; + +/* + * Immutable configuration class for storing destination stream config. + */ +class CassandraStreamConfig { + + private final String keyspace; + + private final String tableName; + + private final String tempTableName; + + private final DestinationSyncMode destinationSyncMode; + + public CassandraStreamConfig(String keyspace, + String tableName, + String tempTableName, + DestinationSyncMode destinationSyncMode) { + this.keyspace = keyspace; + this.tableName = tableName; + this.tempTableName = tempTableName; + this.destinationSyncMode = destinationSyncMode; + } + + public String getKeyspace() { + return keyspace; + } + + public String getTableName() { + return tableName; + } + + public String getTempTableName() { + return tempTableName; + } + + public DestinationSyncMode getDestinationSyncMode() { + return destinationSyncMode; + } + + @Override + public String toString() { + return "CassandraStreamConfig{" + + "keyspace='" + keyspace + '\'' + + ", tableName='" + tableName + '\'' + + ", tempTableName='" + tempTableName + '\'' + + ", destinationSyncMode=" + destinationSyncMode + + '}'; + } + +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/SessionManager.java b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/SessionManager.java new file mode 100644 index 0000000000000..43dc86f4fe83d --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/SessionManager.java @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.cassandra; + +import com.datastax.oss.driver.api.core.CqlSession; +import java.net.InetSocketAddress; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicInteger; + +class SessionManager { + + // AtomicInteger is used for convenience, this class is not thread safe + // and needs additional synchronization for that. + private static final ConcurrentHashMap> sessions; + + static { + sessions = new ConcurrentHashMap<>(); + } + + private SessionManager() { + + } + + /* + * CqlSession objects are heavyweight and can hold several tcp connections to the Cassandra cluster, + * for that reason it is better if sessions are reused per configuration. Sessions are thread-safe + * and can be accessed from different threads. + * + */ + public static CqlSession initSession(CassandraConfig cassandraConfig) { + var cachedSession = sessions.get(cassandraConfig); + if (cachedSession != null) { + cachedSession.value2().incrementAndGet(); + return cachedSession.value1(); + } else { + var session = CqlSession.builder() + .withLocalDatacenter(cassandraConfig.getDatacenter()) + .addContactPoint(new InetSocketAddress(cassandraConfig.getAddress(), cassandraConfig.getPort())) + .withAuthCredentials(cassandraConfig.getUsername(), cassandraConfig.getPassword()) + .build(); + sessions.put(cassandraConfig, Tuple.of(session, new AtomicInteger(1))); + return session; + } + } + + /* + * Close session configured with cassandra config. if the session is being used by more than one + * external instance only decrease the usage count, otherwise close the session and remove it from + * the map. + * + */ + public static void closeSession(CassandraConfig cassandraConfig) { + var cachedSession = sessions.get(cassandraConfig); + if (cachedSession == null) { + throw new IllegalStateException("No session for the provided config"); + } + int count = cachedSession.value2().decrementAndGet(); + if (count < 1) { + cachedSession.value1().close(); + sessions.remove(cassandraConfig); + } + } + +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/Tuple.java b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/Tuple.java new file mode 100644 index 0000000000000..8968138a43535 --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/Tuple.java @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.cassandra; + +public class Tuple { + + private final V1 value1; + + private final V2 value2; + + public Tuple(V1 value1, V2 value2) { + this.value1 = value1; + this.value2 = value2; + } + + public static Tuple of(V1 value1, V2 value2) { + return new Tuple<>(value1, value2); + } + + public V1 value1() { + return value1; + } + + public V2 value2() { + return value2; + } + + @Override + public String toString() { + return "Tuple{" + + "value1=" + value1 + + ", value2=" + value2 + + '}'; + } + +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-cassandra/src/main/resources/spec.json new file mode 100644 index 0000000000000..61e3c0a7ab727 --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/main/resources/spec.json @@ -0,0 +1,65 @@ +{ + "documentationUrl": "https://docs.airbyte.io/integrations/destinations/cassandra", + "supportsIncremental": true, + "supportsNormalization": false, + "supportsDBT": false, + "supported_destination_sync_modes": ["overwrite", "append"], + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Cassandra Destination Spec", + "type": "object", + "required": ["keyspace", "username", "password", "address", "port"], + "additionalProperties": true, + "properties": { + "keyspace": { + "title": "Keyspace", + "description": "Default Cassandra keyspace to create data in.", + "type": "string", + "order": 0 + }, + "username": { + "title": "Username", + "description": "Username to use to access Cassandra.", + "type": "string", + "order": 1 + }, + "password": { + "title": "Password", + "description": "Password associated with Cassandra.", + "type": "string", + "airbyte_secret": true, + "order": 2 + }, + "address": { + "title": "Address", + "description": "Address to connect to.", + "type": "string", + "examples": ["localhost,127.0.0.1"], + "order": 3 + }, + "port": { + "title": "Port", + "description": "Port of Cassandra.", + "type": "integer", + "minimum": 0, + "maximum": 65536, + "default": 9042, + "order": 4 + }, + "datacenter": { + "title": "Datacenter", + "description": "Datacenter of the cassandra cluster.", + "type": "string", + "default": "datacenter1", + "order": 5 + }, + "replication": { + "title": "Replication factor", + "type": "integer", + "description": "Indicates to how many nodes the data should be replicated to.", + "default": 1, + "order": 6 + } + } + } +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraContainerInitializr.java b/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraContainerInitializr.java new file mode 100644 index 0000000000000..145a8f89da976 --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraContainerInitializr.java @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.cassandra; + +import org.testcontainers.containers.CassandraContainer; + +class CassandraContainerInitializr { + + private static ConfiguredCassandraContainer cassandraContainer; + + private CassandraContainerInitializr() { + + } + + public static ConfiguredCassandraContainer initContainer() { + if (cassandraContainer == null) { + cassandraContainer = new ConfiguredCassandraContainer(); + } + cassandraContainer.start(); + return cassandraContainer; + } + + public static class ConfiguredCassandraContainer extends CassandraContainer { + + ConfiguredCassandraContainer() { + // latest compatible version with the internal testcontainers datastax driver. + super("cassandra:3.11.11"); + } + + } + +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraCqlProviderIT.java b/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraCqlProviderIT.java new file mode 100644 index 0000000000000..b5e38367510d9 --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraCqlProviderIT.java @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.cassandra; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import com.datastax.oss.driver.api.core.servererrors.InvalidQueryException; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; + +@TestInstance(TestInstance.Lifecycle.PER_CLASS) +class CassandraCqlProviderIT { + + private static final String CASSANDRA_KEYSPACE = "cassandra_keyspace"; + + private static final String CASSANDRA_TABLE = "cassandra_table"; + + private CassandraCqlProvider cassandraCqlProvider; + + private CassandraNameTransformer nameTransformer; + + @BeforeAll + void setup() { + var cassandraContainer = CassandraContainerInitializr.initContainer(); + var cassandraConfig = TestDataFactory.createCassandraConfig( + cassandraContainer.getUsername(), + cassandraContainer.getPassword(), + cassandraContainer.getHost(), + cassandraContainer.getFirstMappedPort()); + this.cassandraCqlProvider = new CassandraCqlProvider(cassandraConfig); + this.nameTransformer = new CassandraNameTransformer(cassandraConfig); + cassandraCqlProvider.createKeySpaceIfNotExists(CASSANDRA_KEYSPACE, 1); + cassandraCqlProvider.createTableIfNotExists(CASSANDRA_KEYSPACE, CASSANDRA_TABLE); + } + + @AfterEach + void clean() { + cassandraCqlProvider.truncate(CASSANDRA_KEYSPACE, CASSANDRA_TABLE); + } + + @Test + void testCreateKeySpaceIfNotExists() { + String keyspace = nameTransformer.outputKeyspace("test_keyspace"); + assertDoesNotThrow(() -> cassandraCqlProvider.createKeySpaceIfNotExists(keyspace, 1)); + } + + @Test + void testCreateTableIfNotExists() { + String table = nameTransformer.outputTable("test_stream"); + assertDoesNotThrow(() -> cassandraCqlProvider.createTableIfNotExists(CASSANDRA_KEYSPACE, table)); + } + + @Test + void testInsert() { + // given + cassandraCqlProvider.insert(CASSANDRA_KEYSPACE, CASSANDRA_TABLE, "{\"property\":\"data1\"}"); + cassandraCqlProvider.insert(CASSANDRA_KEYSPACE, CASSANDRA_TABLE, "{\"property\":\"data2\"}"); + cassandraCqlProvider.insert(CASSANDRA_KEYSPACE, CASSANDRA_TABLE, "{\"property\":\"data3\"}"); + + // when + var resultSet = cassandraCqlProvider.select(CASSANDRA_KEYSPACE, CASSANDRA_TABLE); + + // then + assertThat(resultSet) + .isNotNull() + .hasSize(3) + .anyMatch(r -> r.getData().equals("{\"property\":\"data1\"}")) + .anyMatch(r -> r.getData().equals("{\"property\":\"data2\"}")) + .anyMatch(r -> r.getData().equals("{\"property\":\"data3\"}")); + + } + + @Test + void testTruncate() { + // given + cassandraCqlProvider.insert(CASSANDRA_KEYSPACE, CASSANDRA_TABLE, "{\"property\":\"data1\"}"); + cassandraCqlProvider.insert(CASSANDRA_KEYSPACE, CASSANDRA_TABLE, "{\"property\":\"data2\"}"); + cassandraCqlProvider.insert(CASSANDRA_KEYSPACE, CASSANDRA_TABLE, "{\"property\":\"data3\"}"); + + // when + cassandraCqlProvider.truncate(CASSANDRA_KEYSPACE, CASSANDRA_TABLE); + var resultSet = cassandraCqlProvider.select(CASSANDRA_KEYSPACE, CASSANDRA_TABLE); + + // then + assertThat(resultSet) + .isNotNull() + .isEmpty(); + } + + @Test + void testDropTableIfExists() { + // given + String table = nameTransformer.outputTmpTable("test_stream"); + cassandraCqlProvider.createTableIfNotExists(CASSANDRA_KEYSPACE, table); + + // when + cassandraCqlProvider.dropTableIfExists(CASSANDRA_KEYSPACE, table); + + // then + assertThrows(InvalidQueryException.class, () -> cassandraCqlProvider.select(CASSANDRA_KEYSPACE, table)); + } + + @Test + void testCopy() { + // given + String tmpTable = nameTransformer.outputTmpTable("test_stream_copy"); + cassandraCqlProvider.createTableIfNotExists(CASSANDRA_KEYSPACE, tmpTable); + cassandraCqlProvider.insert(CASSANDRA_KEYSPACE, tmpTable, "{\"property\":\"data1\"}"); + cassandraCqlProvider.insert(CASSANDRA_KEYSPACE, tmpTable, "{\"property\":\"data2\"}"); + cassandraCqlProvider.insert(CASSANDRA_KEYSPACE, tmpTable, "{\"property\":\"data3\"}"); + + String rawTable = nameTransformer.outputTable("test_stream_copy"); + cassandraCqlProvider.createTableIfNotExists(CASSANDRA_KEYSPACE, rawTable); + + // when + cassandraCqlProvider.copy(CASSANDRA_KEYSPACE, tmpTable, rawTable); + var resultSet = cassandraCqlProvider.select(CASSANDRA_KEYSPACE, rawTable); + + // then + assertThat(resultSet) + .isNotNull() + .hasSize(3) + .anyMatch(r -> r.getData().equals("{\"property\":\"data1\"}")) + .anyMatch(r -> r.getData().equals("{\"property\":\"data2\"}")) + .anyMatch(r -> r.getData().equals("{\"property\":\"data3\"}")); + + } + +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraDestinationAcceptanceTest.java new file mode 100644 index 0000000000000..83e7846c1ef2f --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraDestinationAcceptanceTest.java @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.cassandra; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.standardtest.destination.DestinationAcceptanceTest; +import java.util.Comparator; +import java.util.List; +import java.util.stream.Collectors; +import org.junit.jupiter.api.BeforeAll; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class CassandraDestinationAcceptanceTest extends DestinationAcceptanceTest { + + private static final Logger LOGGER = LoggerFactory.getLogger(CassandraDestinationAcceptanceTest.class); + + private JsonNode configJson; + + private CassandraCqlProvider cassandraCqlProvider; + + private CassandraNameTransformer cassandraNameTransformer; + + private static CassandraContainerInitializr.ConfiguredCassandraContainer cassandraContainer; + + @BeforeAll + static void initContainer() { + cassandraContainer = CassandraContainerInitializr.initContainer(); + } + + @Override + protected void setup(TestDestinationEnv testEnv) { + configJson = TestDataFactory.createJsonConfig( + cassandraContainer.getUsername(), + cassandraContainer.getPassword(), + cassandraContainer.getHost(), + cassandraContainer.getFirstMappedPort()); + var cassandraConfig = new CassandraConfig(configJson); + cassandraCqlProvider = new CassandraCqlProvider(cassandraConfig); + cassandraNameTransformer = new CassandraNameTransformer(cassandraConfig); + } + + @Override + protected void tearDown(TestDestinationEnv testEnv) { + cassandraCqlProvider.retrieveMetadata().forEach(meta -> { + var keyspace = meta.value1(); + meta.value2().forEach(table -> cassandraCqlProvider.truncate(keyspace, table)); + }); + } + + @Override + protected String getImageName() { + return "airbyte/destination-cassandra:dev"; + } + + @Override + protected JsonNode getConfig() { + return configJson; + } + + @Override + protected boolean implementsNamespaces() { + return true; + } + + @Override + protected JsonNode getFailCheckConfig() { + return TestDataFactory.createJsonConfig( + "usr", + "pw", + "127.0.192.1", + 8080); + } + + @Override + protected List retrieveRecords(TestDestinationEnv testEnv, + String streamName, + String namespace, + JsonNode streamSchema) { + var keyspace = cassandraNameTransformer.outputKeyspace(namespace); + var table = cassandraNameTransformer.outputTable(streamName); + return cassandraCqlProvider.select(keyspace, table).stream() + .sorted(Comparator.comparing(CassandraRecord::getTimestamp)) + .map(CassandraRecord::getData) + .map(Jsons::deserialize) + .collect(Collectors.toList()); + } + +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraDestinationIT.java b/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraDestinationIT.java new file mode 100644 index 0000000000000..715900d9555bf --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraDestinationIT.java @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.cassandra; + +import static org.assertj.core.api.Assertions.assertThat; + +import io.airbyte.integrations.destination.cassandra.CassandraContainerInitializr.ConfiguredCassandraContainer; +import io.airbyte.protocol.models.AirbyteConnectionStatus; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; + +@TestInstance(TestInstance.Lifecycle.PER_CLASS) +class CassandraDestinationIT { + + private CassandraDestination cassandraDestination; + + private ConfiguredCassandraContainer cassandraContainer; + + @BeforeAll + void setup() { + this.cassandraContainer = CassandraContainerInitializr.initContainer(); + this.cassandraDestination = new CassandraDestination(); + } + + @Test + void testCheckWithStatusSucceeded() { + + var jsonConfiguration = TestDataFactory.createJsonConfig( + cassandraContainer.getUsername(), + cassandraContainer.getPassword(), + cassandraContainer.getHost(), + cassandraContainer.getFirstMappedPort()); + + var connectionStatus = cassandraDestination.check(jsonConfiguration); + + assertThat(connectionStatus.getStatus()).isEqualTo(AirbyteConnectionStatus.Status.SUCCEEDED); + } + + @Test + void testCheckWithStatusFailed() { + + var jsonConfiguration = TestDataFactory.createJsonConfig( + "usr", + "pw", + "192.0.2.1", + 8080); + + var connectionStatus = cassandraDestination.check(jsonConfiguration); + + assertThat(connectionStatus.getStatus()).isEqualTo(AirbyteConnectionStatus.Status.FAILED); + + } + +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraMessageConsumerIT.java b/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraMessageConsumerIT.java new file mode 100644 index 0000000000000..6e065affcde86 --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraMessageConsumerIT.java @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.cassandra; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; +import io.airbyte.commons.json.Jsons; +import io.airbyte.protocol.models.AirbyteMessage; +import io.airbyte.protocol.models.DestinationSyncMode; +import java.util.function.Function; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.MethodOrderer.OrderAnnotation; +import org.junit.jupiter.api.Order; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; +import org.junit.jupiter.api.TestMethodOrder; + +@TestMethodOrder(OrderAnnotation.class) +@TestInstance(TestInstance.Lifecycle.PER_CLASS) +class CassandraMessageConsumerIT { + + private static final String AIRBYTE_NAMESPACE_1 = "airbyte_namespace_1"; + private static final String AIRBYTE_NAMESPACE_2 = "airbyte_namespace_2"; + + private static final String AIRBYTE_STREAM_1 = "airbyte_stream_1"; + private static final String AIRBYTE_STREAM_2 = "airbyte_stream_2"; + + private CassandraMessageConsumer cassandraMessageConsumer; + + private CassandraCqlProvider cassandraCqlProvider; + + private CassandraNameTransformer nameTransformer; + + @BeforeAll + void setup() { + var cassandraContainer = CassandraContainerInitializr.initContainer(); + var cassandraConfig = TestDataFactory.createCassandraConfig( + cassandraContainer.getUsername(), + cassandraContainer.getPassword(), + cassandraContainer.getHost(), + cassandraContainer.getFirstMappedPort()); + + var stream1 = TestDataFactory.createAirbyteStream(AIRBYTE_STREAM_1, AIRBYTE_NAMESPACE_1); + var stream2 = TestDataFactory.createAirbyteStream(AIRBYTE_STREAM_2, AIRBYTE_NAMESPACE_2); + + var cStream1 = TestDataFactory.createConfiguredAirbyteStream(DestinationSyncMode.APPEND, stream1); + var cStream2 = TestDataFactory.createConfiguredAirbyteStream(DestinationSyncMode.OVERWRITE, stream2); + + var catalog = TestDataFactory.createConfiguredAirbyteCatalog(cStream1, cStream2); + + cassandraMessageConsumer = new CassandraMessageConsumer(cassandraConfig, catalog, message -> {}); + cassandraCqlProvider = new CassandraCqlProvider(cassandraConfig); + nameTransformer = new CassandraNameTransformer(cassandraConfig); + } + + @Test + @Order(1) + void testStartTracked() { + + assertDoesNotThrow(() -> cassandraMessageConsumer.startTracked()); + + } + + @Test + @Order(2) + void testAcceptTracked() { + + Function function = + data -> Jsons.jsonNode(ImmutableMap.builder().put("property", data).build()); + + assertDoesNotThrow(() -> { + cassandraMessageConsumer.acceptTracked( + TestDataFactory.createAirbyteMessage(AirbyteMessage.Type.RECORD, AIRBYTE_STREAM_1, AIRBYTE_NAMESPACE_1, + function.apply("data1"))); + cassandraMessageConsumer.acceptTracked( + TestDataFactory.createAirbyteMessage(AirbyteMessage.Type.RECORD, AIRBYTE_STREAM_1, AIRBYTE_NAMESPACE_1, + function.apply("data2"))); + cassandraMessageConsumer.acceptTracked( + TestDataFactory.createAirbyteMessage(AirbyteMessage.Type.RECORD, AIRBYTE_STREAM_2, AIRBYTE_NAMESPACE_2, + function.apply("data3"))); + cassandraMessageConsumer.acceptTracked( + TestDataFactory.createAirbyteMessage(AirbyteMessage.Type.RECORD, AIRBYTE_STREAM_2, AIRBYTE_NAMESPACE_2, + function.apply("data4"))); + cassandraMessageConsumer.acceptTracked( + TestDataFactory.createAirbyteMessage(AirbyteMessage.Type.STATE, AIRBYTE_STREAM_2, AIRBYTE_NAMESPACE_2, + function.apply("data5"))); + }); + + } + + @Test + @Order(3) + void testClose() { + + assertDoesNotThrow(() -> cassandraMessageConsumer.close(false)); + + } + + @Test + @Order(4) + void testFinalState() { + + var keyspace1 = nameTransformer.outputKeyspace(AIRBYTE_NAMESPACE_1); + var keyspace2 = nameTransformer.outputKeyspace(AIRBYTE_NAMESPACE_2); + var table1 = nameTransformer.outputTable(AIRBYTE_STREAM_1); + var table2 = nameTransformer.outputTable(AIRBYTE_STREAM_2); + + var resultSet1 = cassandraCqlProvider.select(keyspace1, table1); + var resultSet2 = cassandraCqlProvider.select(keyspace2, table2); + + assertThat(resultSet1) + .isNotNull() + .hasSize(2) + .anyMatch(r -> r.getData().equals("{\"property\":\"data1\"}")) + .anyMatch(r -> r.getData().equals("{\"property\":\"data2\"}")); + + assertThat(resultSet2) + .isNotNull() + .hasSize(2) + .anyMatch(r -> r.getData().equals("{\"property\":\"data3\"}")) + .anyMatch(r -> r.getData().equals("{\"property\":\"data4\"}")); + + } + +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/TestDataFactory.java b/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/TestDataFactory.java new file mode 100644 index 0000000000000..b460b6963314a --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/TestDataFactory.java @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.cassandra; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; +import io.airbyte.commons.json.Jsons; +import io.airbyte.protocol.models.AirbyteMessage; +import io.airbyte.protocol.models.AirbyteRecordMessage; +import io.airbyte.protocol.models.AirbyteStream; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.ConfiguredAirbyteStream; +import io.airbyte.protocol.models.DestinationSyncMode; +import java.time.Instant; +import java.util.List; + +public class TestDataFactory { + + private TestDataFactory() { + + } + + static CassandraConfig createCassandraConfig(String username, String password, String address, int port) { + return new CassandraConfig( + "default_keyspace", + username, + password, + address, + port, + "datacenter1", + 1); + } + + static JsonNode createJsonConfig(String username, String password, String address, int port) { + return Jsons.jsonNode(ImmutableMap.builder() + .put("keyspace", "default_keyspace") + .put("username", username) + .put("password", password) + .put("address", address) + .put("port", port) + .put("datacenter", "datacenter1") + .put("replication", 1) + .build()); + } + + static AirbyteMessage createAirbyteMessage(AirbyteMessage.Type type, + String streamName, + String namespace, + JsonNode data) { + return new AirbyteMessage() + .withType(type) + .withRecord(new AirbyteRecordMessage() + .withStream(streamName) + .withNamespace(namespace) + .withData(data) + .withEmittedAt(Instant.now().toEpochMilli())); + } + + static AirbyteStream createAirbyteStream(String name, String namespace) { + return new AirbyteStream() + .withName(name) + .withNamespace(namespace); + } + + static ConfiguredAirbyteStream createConfiguredAirbyteStream(DestinationSyncMode syncMode, AirbyteStream stream) { + return new ConfiguredAirbyteStream() + .withDestinationSyncMode(syncMode) + .withStream(stream); + } + + static ConfiguredAirbyteCatalog createConfiguredAirbyteCatalog(ConfiguredAirbyteStream... configuredStreams) { + return new ConfiguredAirbyteCatalog().withStreams(List.of(configuredStreams)); + } + +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/test/java/io/airbyte/integrations/destination/cassandra/CassandraConfigTest.java b/airbyte-integrations/connectors/destination-cassandra/src/test/java/io/airbyte/integrations/destination/cassandra/CassandraConfigTest.java new file mode 100644 index 0000000000000..a7249d2814a9a --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/test/java/io/airbyte/integrations/destination/cassandra/CassandraConfigTest.java @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.cassandra; + +import static org.assertj.core.api.Assertions.assertThat; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +class CassandraConfigTest { + + private CassandraConfig cassandraConfig; + + @BeforeEach + void setup() { + var jsonNode = TestDataFactory.createJsonConfig( + "usr", + "pw", + "127.0.0.1", + 9042); + this.cassandraConfig = new CassandraConfig(jsonNode); + } + + @Test + void testConfig() { + + assertThat(cassandraConfig) + .hasFieldOrPropertyWithValue("keyspace", "default_keyspace") + .hasFieldOrPropertyWithValue("username", "usr") + .hasFieldOrPropertyWithValue("password", "pw") + .hasFieldOrPropertyWithValue("address", "127.0.0.1") + .hasFieldOrPropertyWithValue("port", 9042) + .hasFieldOrPropertyWithValue("datacenter", "datacenter1") + .hasFieldOrPropertyWithValue("replication", 1); + + } + +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/test/java/io/airbyte/integrations/destination/cassandra/CassandraNameTransformerTest.java b/airbyte-integrations/connectors/destination-cassandra/src/test/java/io/airbyte/integrations/destination/cassandra/CassandraNameTransformerTest.java new file mode 100644 index 0000000000000..b456ace632f1c --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/test/java/io/airbyte/integrations/destination/cassandra/CassandraNameTransformerTest.java @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.cassandra; + +import static org.assertj.core.api.Assertions.assertThat; + +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; + +@TestInstance(TestInstance.Lifecycle.PER_CLASS) +class CassandraNameTransformerTest { + + private CassandraNameTransformer cassandraNameTransformer; + + @BeforeAll + void setup() { + var cassandraConfig = TestDataFactory.createCassandraConfig( + "usr", + "pw", + "127.0.0.1", + 9042); + this.cassandraNameTransformer = new CassandraNameTransformer(cassandraConfig); + } + + @Test + void testOutputTable() { + + var table = cassandraNameTransformer.outputTable("stream_name"); + + assertThat(table).matches("airbyte_raw_stream_name"); + + } + + @Test + void testOutputTmpTable() { + + var table = cassandraNameTransformer.outputTmpTable("stream_name"); + + assertThat(table).matches("airbyte_tmp_+[a-z]+_stream_name"); + + } + + @Test + void testOutputKeyspace() { + + var keyspace = cassandraNameTransformer.outputKeyspace("***keyspace^h"); + + assertThat(keyspace).matches("keyspace_h"); + + } + + @Test + void outputColumn() { + + var column = cassandraNameTransformer.outputColumn("_airbyte_data"); + + assertThat(column).matches("\"_airbyte_data\""); + + } + +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/test/java/io/airbyte/integrations/destination/cassandra/TestDataFactory.java b/airbyte-integrations/connectors/destination-cassandra/src/test/java/io/airbyte/integrations/destination/cassandra/TestDataFactory.java new file mode 100644 index 0000000000000..b460b6963314a --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/test/java/io/airbyte/integrations/destination/cassandra/TestDataFactory.java @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.cassandra; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; +import io.airbyte.commons.json.Jsons; +import io.airbyte.protocol.models.AirbyteMessage; +import io.airbyte.protocol.models.AirbyteRecordMessage; +import io.airbyte.protocol.models.AirbyteStream; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.ConfiguredAirbyteStream; +import io.airbyte.protocol.models.DestinationSyncMode; +import java.time.Instant; +import java.util.List; + +public class TestDataFactory { + + private TestDataFactory() { + + } + + static CassandraConfig createCassandraConfig(String username, String password, String address, int port) { + return new CassandraConfig( + "default_keyspace", + username, + password, + address, + port, + "datacenter1", + 1); + } + + static JsonNode createJsonConfig(String username, String password, String address, int port) { + return Jsons.jsonNode(ImmutableMap.builder() + .put("keyspace", "default_keyspace") + .put("username", username) + .put("password", password) + .put("address", address) + .put("port", port) + .put("datacenter", "datacenter1") + .put("replication", 1) + .build()); + } + + static AirbyteMessage createAirbyteMessage(AirbyteMessage.Type type, + String streamName, + String namespace, + JsonNode data) { + return new AirbyteMessage() + .withType(type) + .withRecord(new AirbyteRecordMessage() + .withStream(streamName) + .withNamespace(namespace) + .withData(data) + .withEmittedAt(Instant.now().toEpochMilli())); + } + + static AirbyteStream createAirbyteStream(String name, String namespace) { + return new AirbyteStream() + .withName(name) + .withNamespace(namespace); + } + + static ConfiguredAirbyteStream createConfiguredAirbyteStream(DestinationSyncMode syncMode, AirbyteStream stream) { + return new ConfiguredAirbyteStream() + .withDestinationSyncMode(syncMode) + .withStream(stream); + } + + static ConfiguredAirbyteCatalog createConfiguredAirbyteCatalog(ConfiguredAirbyteStream... configuredStreams) { + return new ConfiguredAirbyteCatalog().withStreams(List.of(configuredStreams)); + } + +} diff --git a/airbyte-integrations/connectors/destination-databricks/Dockerfile b/airbyte-integrations/connectors/destination-databricks/Dockerfile index 6f09d59bf9b5b..1ef415915e481 100644 --- a/airbyte-integrations/connectors/destination-databricks/Dockerfile +++ b/airbyte-integrations/connectors/destination-databricks/Dockerfile @@ -7,5 +7,5 @@ COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar RUN tar xf ${APPLICATION}.tar --strip-components=1 -LABEL io.airbyte.version=0.1.1 +LABEL io.airbyte.version=0.1.2 LABEL io.airbyte.name=airbyte/destination-databricks diff --git a/airbyte-integrations/connectors/destination-databricks/build.gradle b/airbyte-integrations/connectors/destination-databricks/build.gradle index 24f6b9a9f062c..a685f9655a40c 100644 --- a/airbyte-integrations/connectors/destination-databricks/build.gradle +++ b/airbyte-integrations/connectors/destination-databricks/build.gradle @@ -24,7 +24,11 @@ dependencies { implementation group: 'org.apache.hadoop', name: 'hadoop-aws', version: '3.3.0' implementation group: 'org.apache.hadoop', name: 'hadoop-mapreduce-client-core', version: '3.3.0' implementation group: 'org.apache.parquet', name: 'parquet-avro', version: '1.12.0' - implementation group: 'tech.allegro.schema.json2avro', name: 'converter', version: '0.2.10' + implementation('tech.allegro.schema.json2avro:converter') { + version { + branch = 'master' + } + } integrationTestJavaImplementation project(':airbyte-integrations:bases:standard-destination-test') integrationTestJavaImplementation project(':airbyte-integrations:connectors:destination-databricks') diff --git a/airbyte-integrations/connectors/destination-databricks/src/main/java/io/airbyte/integrations/destination/databricks/DatabricksStreamCopier.java b/airbyte-integrations/connectors/destination-databricks/src/main/java/io/airbyte/integrations/destination/databricks/DatabricksStreamCopier.java index f72c10ad6177c..3f58adbdc4637 100644 --- a/airbyte-integrations/connectors/destination-databricks/src/main/java/io/airbyte/integrations/destination/databricks/DatabricksStreamCopier.java +++ b/airbyte-integrations/connectors/destination-databricks/src/main/java/io/airbyte/integrations/destination/databricks/DatabricksStreamCopier.java @@ -90,7 +90,7 @@ public DatabricksStreamCopier(final String stagingFolder, s3Config.getBucketName(), s3Config.getBucketPath(), databricksConfig.getDatabaseSchema(), streamName); LOGGER.info("[Stream {}] Database schema: {}", streamName, schemaName); - LOGGER.info("[Stream {}] Parquet schema: {}", streamName, parquetWriter.getParquetSchema()); + LOGGER.info("[Stream {}] Parquet schema: {}", streamName, parquetWriter.getSchema()); LOGGER.info("[Stream {}] Tmp table {} location: {}", streamName, tmpTableName, tmpTableLocation); LOGGER.info("[Stream {}] Data table {} location: {}", streamName, destTableName, destTableLocation); diff --git a/airbyte-integrations/connectors/destination-databricks/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-databricks/src/main/resources/spec.json index 14e72127fe02a..4112fb86fd89e 100644 --- a/airbyte-integrations/connectors/destination-databricks/src/main/resources/spec.json +++ b/airbyte-integrations/connectors/destination-databricks/src/main/resources/spec.json @@ -84,7 +84,7 @@ "examples": ["airbyte.staging"] }, "s3_bucket_path": { - "Title": "S3 Bucket Path", + "title": "S3 Bucket Path", "type": "string", "description": "The directory under the S3 bucket where data will be written.", "examples": ["data_sync/test"] diff --git a/airbyte-integrations/connectors/destination-databricks/src/test-integration/java/io/airbyte/integrations/destination/databricks/DatabricksDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-databricks/src/test-integration/java/io/airbyte/integrations/destination/databricks/DatabricksDestinationAcceptanceTest.java index 4fc11cf3d7d07..1bdda60327006 100644 --- a/airbyte-integrations/connectors/destination-databricks/src/test-integration/java/io/airbyte/integrations/destination/databricks/DatabricksDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-databricks/src/test-integration/java/io/airbyte/integrations/destination/databricks/DatabricksDestinationAcceptanceTest.java @@ -18,6 +18,7 @@ import io.airbyte.commons.json.Jsons; import io.airbyte.db.Database; import io.airbyte.db.Databases; +import io.airbyte.db.jdbc.JdbcUtils; import io.airbyte.integrations.base.JavaBaseConstants; import io.airbyte.integrations.destination.ExtendedNameTransformer; import io.airbyte.integrations.destination.jdbc.copy.StreamCopierFactory; @@ -31,8 +32,6 @@ import java.util.List; import java.util.stream.Collectors; import org.apache.commons.lang3.RandomStringUtils; -import org.jooq.JSONFormat; -import org.jooq.JSONFormat.RecordFormat; import org.jooq.SQLDialect; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -41,7 +40,6 @@ public class DatabricksDestinationAcceptanceTest extends DestinationAcceptanceTe private static final Logger LOGGER = LoggerFactory.getLogger(DatabricksDestinationAcceptanceTest.class); private static final String SECRETS_CONFIG_JSON = "secrets/config.json"; - private static final JSONFormat JSON_FORMAT = new JSONFormat().recordFormat(RecordFormat.OBJECT); private final ExtendedNameTransformer nameTransformer = new DatabricksNameTransformer(); private JsonNode configJson; @@ -85,7 +83,7 @@ protected List retrieveRecords(final TestDestinationEnv testEnv, .orderBy(field(JavaBaseConstants.COLUMN_NAME_EMITTED_AT).asc()) .fetch().stream() .map(record -> { - final JsonNode json = Jsons.deserialize(record.formatJSON(JSON_FORMAT)); + final JsonNode json = Jsons.deserialize(record.formatJSON(JdbcUtils.getDefaultJSONFormat())); final JsonNode jsonWithOriginalFields = nameUpdater.getJsonWithOriginalFieldNames(json); return AvroRecordHelper.pruneAirbyteJson(jsonWithOriginalFields); }) diff --git a/airbyte-integrations/connectors/destination-gcs/Dockerfile b/airbyte-integrations/connectors/destination-gcs/Dockerfile index f4141aa02fb79..af9f3aadbad4e 100644 --- a/airbyte-integrations/connectors/destination-gcs/Dockerfile +++ b/airbyte-integrations/connectors/destination-gcs/Dockerfile @@ -7,5 +7,5 @@ COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar RUN tar xf ${APPLICATION}.tar --strip-components=1 -LABEL io.airbyte.version=0.1.2 +LABEL io.airbyte.version=0.1.3 LABEL io.airbyte.name=airbyte/destination-gcs diff --git a/airbyte-integrations/connectors/destination-gcs/build.gradle b/airbyte-integrations/connectors/destination-gcs/build.gradle index bf27f8686edc6..36c46d80ffc5b 100644 --- a/airbyte-integrations/connectors/destination-gcs/build.gradle +++ b/airbyte-integrations/connectors/destination-gcs/build.gradle @@ -30,7 +30,11 @@ dependencies { implementation group: 'org.apache.hadoop', name: 'hadoop-aws', version: '3.3.0' implementation group: 'org.apache.hadoop', name: 'hadoop-mapreduce-client-core', version: '3.3.0' implementation group: 'org.apache.parquet', name: 'parquet-avro', version: '1.12.0' - implementation group: 'tech.allegro.schema.json2avro', name: 'converter', version: '0.2.10' + implementation('tech.allegro.schema.json2avro:converter') { + version { + branch = 'master' + } + } testImplementation 'org.apache.commons:commons-lang3:3.11' diff --git a/airbyte-integrations/connectors/destination-gcs/src/main/java/io/airbyte/integrations/destination/gcs/avro/GcsAvroWriter.java b/airbyte-integrations/connectors/destination-gcs/src/main/java/io/airbyte/integrations/destination/gcs/avro/GcsAvroWriter.java index f8b4dcaa1065d..49c87bd72e5d9 100644 --- a/airbyte-integrations/connectors/destination-gcs/src/main/java/io/airbyte/integrations/destination/gcs/avro/GcsAvroWriter.java +++ b/airbyte-integrations/connectors/destination-gcs/src/main/java/io/airbyte/integrations/destination/gcs/avro/GcsAvroWriter.java @@ -11,7 +11,6 @@ import io.airbyte.integrations.destination.gcs.writer.BaseGcsWriter; import io.airbyte.integrations.destination.s3.S3Format; import io.airbyte.integrations.destination.s3.avro.AvroRecordFactory; -import io.airbyte.integrations.destination.s3.avro.JsonFieldNameUpdater; import io.airbyte.integrations.destination.s3.avro.S3AvroFormatConfig; import io.airbyte.integrations.destination.s3.util.S3StreamTransferManagerHelper; import io.airbyte.integrations.destination.s3.writer.S3Writer; @@ -27,6 +26,7 @@ import org.apache.avro.generic.GenericDatumWriter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import tech.allegro.schema.json2avro.converter.JsonAvroConverter; public class GcsAvroWriter extends BaseGcsWriter implements S3Writer { @@ -42,7 +42,7 @@ public GcsAvroWriter(final GcsDestinationConfig config, final ConfiguredAirbyteStream configuredStream, final Timestamp uploadTimestamp, final Schema schema, - final JsonFieldNameUpdater nameUpdater) + final JsonAvroConverter converter) throws IOException { super(config, s3Client, configuredStream); @@ -52,7 +52,7 @@ public GcsAvroWriter(final GcsDestinationConfig config, LOGGER.info("Full GCS path for stream '{}': {}/{}", stream.getName(), config.getBucketName(), objectKey); - this.avroRecordFactory = new AvroRecordFactory(schema, nameUpdater); + this.avroRecordFactory = new AvroRecordFactory(schema, converter); this.uploadManager = S3StreamTransferManagerHelper.getDefault( config.getBucketName(), objectKey, s3Client, config.getFormatConfig().getPartSize()); // We only need one output stream as we only have one input stream. This is reasonably performant. diff --git a/airbyte-integrations/connectors/destination-gcs/src/main/java/io/airbyte/integrations/destination/gcs/parquet/GcsParquetWriter.java b/airbyte-integrations/connectors/destination-gcs/src/main/java/io/airbyte/integrations/destination/gcs/parquet/GcsParquetWriter.java index 536b014e93392..e72e3613108d3 100644 --- a/airbyte-integrations/connectors/destination-gcs/src/main/java/io/airbyte/integrations/destination/gcs/parquet/GcsParquetWriter.java +++ b/airbyte-integrations/connectors/destination-gcs/src/main/java/io/airbyte/integrations/destination/gcs/parquet/GcsParquetWriter.java @@ -5,16 +5,13 @@ package io.airbyte.integrations.destination.gcs.parquet; import com.amazonaws.services.s3.AmazonS3; -import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectWriter; -import com.fasterxml.jackson.databind.node.ObjectNode; -import io.airbyte.integrations.base.JavaBaseConstants; import io.airbyte.integrations.destination.gcs.GcsDestinationConfig; import io.airbyte.integrations.destination.gcs.credential.GcsHmacKeyCredentialConfig; import io.airbyte.integrations.destination.gcs.writer.BaseGcsWriter; import io.airbyte.integrations.destination.s3.S3Format; -import io.airbyte.integrations.destination.s3.avro.JsonFieldNameUpdater; +import io.airbyte.integrations.destination.s3.avro.AvroRecordFactory; import io.airbyte.integrations.destination.s3.parquet.S3ParquetFormatConfig; import io.airbyte.integrations.destination.s3.writer.S3Writer; import io.airbyte.protocol.models.AirbyteRecordMessage; @@ -42,21 +39,17 @@ public class GcsParquetWriter extends BaseGcsWriter implements S3Writer { private static final ObjectMapper MAPPER = new ObjectMapper(); private static final ObjectWriter WRITER = MAPPER.writer(); - private final Schema schema; - private final JsonFieldNameUpdater nameUpdater; private final ParquetWriter parquetWriter; - private final JsonAvroConverter converter = new JsonAvroConverter(); + private final AvroRecordFactory avroRecordFactory; public GcsParquetWriter(final GcsDestinationConfig config, final AmazonS3 s3Client, final ConfiguredAirbyteStream configuredStream, final Timestamp uploadTimestamp, final Schema schema, - final JsonFieldNameUpdater nameUpdater) + final JsonAvroConverter converter) throws URISyntaxException, IOException { super(config, s3Client, configuredStream); - this.schema = schema; - this.nameUpdater = nameUpdater; final String outputFilename = BaseGcsWriter.getOutputFilename(uploadTimestamp, S3Format.PARQUET); final String objectKey = String.join("/", outputPrefix, outputFilename); @@ -78,6 +71,7 @@ public GcsParquetWriter(final GcsDestinationConfig config, .withDictionaryPageSize(formatConfig.getDictionaryPageSize()) .withDictionaryEncoding(formatConfig.isDictionaryEncoding()) .build(); + this.avroRecordFactory = new AvroRecordFactory(schema, converter); } public static Configuration getHadoopConfig(final GcsDestinationConfig config) { @@ -99,16 +93,7 @@ public static Configuration getHadoopConfig(final GcsDestinationConfig config) { @Override public void write(final UUID id, final AirbyteRecordMessage recordMessage) throws IOException { - JsonNode inputData = recordMessage.getData(); - inputData = nameUpdater.getJsonWithStandardizedFieldNames(inputData); - - final ObjectNode jsonRecord = MAPPER.createObjectNode(); - jsonRecord.put(JavaBaseConstants.COLUMN_NAME_AB_ID, UUID.randomUUID().toString()); - jsonRecord.put(JavaBaseConstants.COLUMN_NAME_EMITTED_AT, recordMessage.getEmittedAt()); - jsonRecord.setAll((ObjectNode) inputData); - - final GenericData.Record avroRecord = converter.convertToGenericDataRecord(WRITER.writeValueAsBytes(jsonRecord), schema); - parquetWriter.write(avroRecord); + parquetWriter.write(avroRecordFactory.getAvroRecord(id, recordMessage)); } @Override diff --git a/airbyte-integrations/connectors/destination-gcs/src/main/java/io/airbyte/integrations/destination/gcs/writer/ProductionWriterFactory.java b/airbyte-integrations/connectors/destination-gcs/src/main/java/io/airbyte/integrations/destination/gcs/writer/ProductionWriterFactory.java index bf76b36d78513..45d1e334d2dcb 100644 --- a/airbyte-integrations/connectors/destination-gcs/src/main/java/io/airbyte/integrations/destination/gcs/writer/ProductionWriterFactory.java +++ b/airbyte-integrations/connectors/destination-gcs/src/main/java/io/airbyte/integrations/destination/gcs/writer/ProductionWriterFactory.java @@ -11,7 +11,7 @@ import io.airbyte.integrations.destination.gcs.jsonl.GcsJsonlWriter; import io.airbyte.integrations.destination.gcs.parquet.GcsParquetWriter; import io.airbyte.integrations.destination.s3.S3Format; -import io.airbyte.integrations.destination.s3.avro.JsonFieldNameUpdater; +import io.airbyte.integrations.destination.s3.avro.AvroConstants; import io.airbyte.integrations.destination.s3.avro.JsonToAvroSchemaConverter; import io.airbyte.integrations.destination.s3.writer.S3Writer; import io.airbyte.protocol.models.AirbyteStream; @@ -35,20 +35,17 @@ public S3Writer create(final GcsDestinationConfig config, if (format == S3Format.AVRO || format == S3Format.PARQUET) { final AirbyteStream stream = configuredStream.getStream(); + LOGGER.info("Json schema for stream {}: {}", stream.getName(), stream.getJsonSchema()); final JsonToAvroSchemaConverter schemaConverter = new JsonToAvroSchemaConverter(); final Schema avroSchema = schemaConverter.getAvroSchema(stream.getJsonSchema(), stream.getName(), stream.getNamespace(), true); - final JsonFieldNameUpdater nameUpdater = new JsonFieldNameUpdater(schemaConverter.getStandardizedNames()); - LOGGER.info("Paquet schema for stream {}: {}", stream.getName(), avroSchema.toString(false)); - if (nameUpdater.hasNameUpdate()) { - LOGGER.info("The following field names will be standardized: {}", nameUpdater); - } + LOGGER.info("Avro schema for stream {}: {}", stream.getName(), avroSchema.toString(false)); if (format == S3Format.AVRO) { - return new GcsAvroWriter(config, s3Client, configuredStream, uploadTimestamp, avroSchema, nameUpdater); + return new GcsAvroWriter(config, s3Client, configuredStream, uploadTimestamp, avroSchema, AvroConstants.JSON_CONVERTER); } else { - return new GcsParquetWriter(config, s3Client, configuredStream, uploadTimestamp, avroSchema, nameUpdater); + return new GcsParquetWriter(config, s3Client, configuredStream, uploadTimestamp, avroSchema, AvroConstants.JSON_CONVERTER); } } diff --git a/airbyte-integrations/connectors/destination-gcs/src/test-integration/java/io/airbyte/integrations/destination/gcs/GcsAvroDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-gcs/src/test-integration/java/io/airbyte/integrations/destination/gcs/GcsAvroDestinationAcceptanceTest.java index d8ead36743695..a62a4e7f0b2c0 100644 --- a/airbyte-integrations/connectors/destination-gcs/src/test-integration/java/io/airbyte/integrations/destination/gcs/GcsAvroDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-gcs/src/test-integration/java/io/airbyte/integrations/destination/gcs/GcsAvroDestinationAcceptanceTest.java @@ -10,6 +10,7 @@ import com.fasterxml.jackson.databind.ObjectReader; import io.airbyte.commons.json.Jsons; import io.airbyte.integrations.destination.s3.S3Format; +import io.airbyte.integrations.destination.s3.avro.AvroConstants; import io.airbyte.integrations.destination.s3.avro.JsonFieldNameUpdater; import io.airbyte.integrations.destination.s3.util.AvroRecordHelper; import java.util.LinkedList; @@ -19,12 +20,9 @@ import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericData.Record; import org.apache.avro.generic.GenericDatumReader; -import tech.allegro.schema.json2avro.converter.JsonAvroConverter; public class GcsAvroDestinationAcceptanceTest extends GcsDestinationAcceptanceTest { - private final JsonAvroConverter converter = new JsonAvroConverter(); - protected GcsAvroDestinationAcceptanceTest() { super(S3Format.AVRO); } @@ -56,7 +54,7 @@ protected List retrieveRecords(final TestDestinationEnv testEnv, final ObjectReader jsonReader = MAPPER.reader(); while (dataFileReader.hasNext()) { final GenericData.Record record = dataFileReader.next(); - final byte[] jsonBytes = converter.convertToJson(record); + final byte[] jsonBytes = AvroConstants.JSON_CONVERTER.convertToJson(record); JsonNode jsonRecord = jsonReader.readTree(jsonBytes); jsonRecord = nameUpdater.getJsonWithOriginalFieldNames(jsonRecord); jsonRecords.add(AvroRecordHelper.pruneAirbyteJson(jsonRecord)); diff --git a/airbyte-integrations/connectors/destination-gcs/src/test-integration/java/io/airbyte/integrations/destination/gcs/GcsDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-gcs/src/test-integration/java/io/airbyte/integrations/destination/gcs/GcsDestinationAcceptanceTest.java index 5072da17dd29f..13bc2ec2c3791 100644 --- a/airbyte-integrations/connectors/destination-gcs/src/test-integration/java/io/airbyte/integrations/destination/gcs/GcsDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-gcs/src/test-integration/java/io/airbyte/integrations/destination/gcs/GcsDestinationAcceptanceTest.java @@ -4,8 +4,6 @@ package io.airbyte.integrations.destination.gcs; -import static io.airbyte.integrations.destination.s3.S3DestinationConstants.NAME_TRANSFORMER; - import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.model.DeleteObjectsRequest.KeyVersion; import com.amazonaws.services.s3.model.S3ObjectSummary; @@ -15,6 +13,7 @@ import io.airbyte.commons.io.IOs; import io.airbyte.commons.jackson.MoreMappers; import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.destination.s3.S3DestinationConstants; import io.airbyte.integrations.destination.s3.S3Format; import io.airbyte.integrations.destination.s3.S3FormatConfig; import io.airbyte.integrations.destination.s3.util.S3OutputPathHelper; @@ -89,7 +88,7 @@ protected List getAllSyncedObjects(final String streamName, fin .listObjects(config.getBucketName(), outputPrefix) .getObjectSummaries() .stream() - .filter(o -> o.getKey().contains(NAME_TRANSFORMER.convertStreamName(streamName) + "/")) + .filter(o -> o.getKey().contains(S3DestinationConstants.NAME_TRANSFORMER.convertStreamName(streamName) + "/")) .sorted(Comparator.comparingLong(o -> o.getLastModified().getTime())) .collect(Collectors.toList()); LOGGER.info( diff --git a/airbyte-integrations/connectors/destination-gcs/src/test-integration/java/io/airbyte/integrations/destination/gcs/GcsParquetDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-gcs/src/test-integration/java/io/airbyte/integrations/destination/gcs/GcsParquetDestinationAcceptanceTest.java index f04c3bb3b3fb3..6db884528f298 100644 --- a/airbyte-integrations/connectors/destination-gcs/src/test-integration/java/io/airbyte/integrations/destination/gcs/GcsParquetDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-gcs/src/test-integration/java/io/airbyte/integrations/destination/gcs/GcsParquetDestinationAcceptanceTest.java @@ -11,6 +11,7 @@ import io.airbyte.commons.json.Jsons; import io.airbyte.integrations.destination.gcs.parquet.GcsParquetWriter; import io.airbyte.integrations.destination.s3.S3Format; +import io.airbyte.integrations.destination.s3.avro.AvroConstants; import io.airbyte.integrations.destination.s3.avro.JsonFieldNameUpdater; import io.airbyte.integrations.destination.s3.util.AvroRecordHelper; import java.io.IOException; @@ -22,12 +23,9 @@ import org.apache.hadoop.conf.Configuration; import org.apache.parquet.avro.AvroReadSupport; import org.apache.parquet.hadoop.ParquetReader; -import tech.allegro.schema.json2avro.converter.JsonAvroConverter; public class GcsParquetDestinationAcceptanceTest extends GcsDestinationAcceptanceTest { - private final JsonAvroConverter converter = new JsonAvroConverter(); - protected GcsParquetDestinationAcceptanceTest() { super(S3Format.PARQUET); } @@ -63,7 +61,7 @@ protected List retrieveRecords(final TestDestinationEnv testEnv, final ObjectReader jsonReader = MAPPER.reader(); GenericData.Record record; while ((record = parquetReader.read()) != null) { - final byte[] jsonBytes = converter.convertToJson(record); + final byte[] jsonBytes = AvroConstants.JSON_CONVERTER.convertToJson(record); JsonNode jsonRecord = jsonReader.readTree(jsonBytes); jsonRecord = nameUpdater.getJsonWithOriginalFieldNames(jsonRecord); jsonRecords.add(AvroRecordHelper.pruneAirbyteJson(jsonRecord)); diff --git a/airbyte-integrations/connectors/destination-jdbc/src/test-integration/java/io/airbyte/integrations/destination/jdbc/JdbcDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-jdbc/src/test-integration/java/io/airbyte/integrations/destination/jdbc/JdbcDestinationAcceptanceTest.java index d8a4681fa43ce..a49b1664be4f6 100644 --- a/airbyte-integrations/connectors/destination-jdbc/src/test-integration/java/io/airbyte/integrations/destination/jdbc/JdbcDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-jdbc/src/test-integration/java/io/airbyte/integrations/destination/jdbc/JdbcDestinationAcceptanceTest.java @@ -8,6 +8,7 @@ import com.google.common.collect.ImmutableMap; import io.airbyte.commons.json.Jsons; import io.airbyte.db.Databases; +import io.airbyte.db.jdbc.JdbcUtils; import io.airbyte.integrations.base.JavaBaseConstants; import io.airbyte.integrations.destination.ExtendedNameTransformer; import io.airbyte.integrations.standardtest.destination.DestinationAcceptanceTest; @@ -15,14 +16,10 @@ import java.util.ArrayList; import java.util.List; import java.util.stream.Collectors; -import org.jooq.JSONFormat; -import org.jooq.JSONFormat.RecordFormat; import org.testcontainers.containers.PostgreSQLContainer; public class JdbcDestinationAcceptanceTest extends DestinationAcceptanceTest { - private static final JSONFormat JSON_FORMAT = new JSONFormat().recordFormat(RecordFormat.OBJECT); - private PostgreSQLContainer db; private final ExtendedNameTransformer namingResolver = new ExtendedNameTransformer(); @@ -99,7 +96,7 @@ private List retrieveRecordsFromTable(final String tableName, final St ctx -> ctx .fetch(String.format("SELECT * FROM %s.%s ORDER BY %s ASC;", schema, tableName, JavaBaseConstants.COLUMN_NAME_EMITTED_AT)) .stream() - .map(r -> r.formatJSON(JSON_FORMAT)) + .map(r -> r.formatJSON(JdbcUtils.getDefaultJSONFormat())) .map(Jsons::deserialize) .collect(Collectors.toList())); } diff --git a/airbyte-integrations/connectors/destination-mssql-strict-encrypt/src/test-integration/java/io/airbyte/integrations/destination/mssql_strict_encrypt/MssqlStrictEncryptDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-mssql-strict-encrypt/src/test-integration/java/io/airbyte/integrations/destination/mssql_strict_encrypt/MssqlStrictEncryptDestinationAcceptanceTest.java index 1cc76ff2ae683..ace31323c4a6d 100644 --- a/airbyte-integrations/connectors/destination-mssql-strict-encrypt/src/test-integration/java/io/airbyte/integrations/destination/mssql_strict_encrypt/MssqlStrictEncryptDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-mssql-strict-encrypt/src/test-integration/java/io/airbyte/integrations/destination/mssql_strict_encrypt/MssqlStrictEncryptDestinationAcceptanceTest.java @@ -14,6 +14,7 @@ import io.airbyte.commons.string.Strings; import io.airbyte.db.Database; import io.airbyte.db.Databases; +import io.airbyte.db.jdbc.JdbcUtils; import io.airbyte.integrations.base.JavaBaseConstants; import io.airbyte.integrations.base.ssh.SshHelpers; import io.airbyte.integrations.destination.ExtendedNameTransformer; @@ -23,7 +24,6 @@ import java.util.ArrayList; import java.util.List; import java.util.stream.Collectors; -import org.jooq.JSONFormat; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; @@ -31,8 +31,6 @@ public class MssqlStrictEncryptDestinationAcceptanceTest extends DestinationAcceptanceTest { - private static final JSONFormat JSON_FORMAT = new JSONFormat().recordFormat(JSONFormat.RecordFormat.OBJECT); - private static MSSQLServerContainer db; private final ExtendedNameTransformer namingResolver = new ExtendedNameTransformer(); private JsonNode config; @@ -130,7 +128,7 @@ private List retrieveRecordsFromTable(final String tableName, final St return ctx .fetch(String.format("SELECT * FROM %s.%s ORDER BY %s ASC;", schemaName, tableName, JavaBaseConstants.COLUMN_NAME_EMITTED_AT)) .stream() - .map(r -> r.formatJSON(JSON_FORMAT)) + .map(r -> r.formatJSON(JdbcUtils.getDefaultJSONFormat())) .map(Jsons::deserialize) .collect(Collectors.toList()); }); diff --git a/airbyte-integrations/connectors/destination-mssql/src/test-integration/java/io/airbyte/integrations/destination/mssql/MSSQLDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-mssql/src/test-integration/java/io/airbyte/integrations/destination/mssql/MSSQLDestinationAcceptanceTest.java index 8b3222e3fb0fd..2376a434c22a4 100644 --- a/airbyte-integrations/connectors/destination-mssql/src/test-integration/java/io/airbyte/integrations/destination/mssql/MSSQLDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-mssql/src/test-integration/java/io/airbyte/integrations/destination/mssql/MSSQLDestinationAcceptanceTest.java @@ -11,6 +11,7 @@ import io.airbyte.commons.string.Strings; import io.airbyte.db.Database; import io.airbyte.db.Databases; +import io.airbyte.db.jdbc.JdbcUtils; import io.airbyte.integrations.base.JavaBaseConstants; import io.airbyte.integrations.destination.ExtendedNameTransformer; import io.airbyte.integrations.standardtest.destination.DestinationAcceptanceTest; @@ -18,16 +19,12 @@ import java.util.ArrayList; import java.util.List; import java.util.stream.Collectors; -import org.jooq.JSONFormat; -import org.jooq.JSONFormat.RecordFormat; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.testcontainers.containers.MSSQLServerContainer; public class MSSQLDestinationAcceptanceTest extends DestinationAcceptanceTest { - private static final JSONFormat JSON_FORMAT = new JSONFormat().recordFormat(RecordFormat.OBJECT); - private static MSSQLServerContainer db; private final ExtendedNameTransformer namingResolver = new ExtendedNameTransformer(); private JsonNode configWithoutDbName; @@ -120,7 +117,7 @@ private List retrieveRecordsFromTable(final String tableName, final St return ctx .fetch(String.format("SELECT * FROM %s.%s ORDER BY %s ASC;", schemaName, tableName, JavaBaseConstants.COLUMN_NAME_EMITTED_AT)) .stream() - .map(r -> r.formatJSON(JSON_FORMAT)) + .map(r -> r.formatJSON(JdbcUtils.getDefaultJSONFormat())) .map(Jsons::deserialize) .collect(Collectors.toList()); }); diff --git a/airbyte-integrations/connectors/destination-mssql/src/test-integration/java/io/airbyte/integrations/destination/mssql/MSSQLDestinationAcceptanceTestSSL.java b/airbyte-integrations/connectors/destination-mssql/src/test-integration/java/io/airbyte/integrations/destination/mssql/MSSQLDestinationAcceptanceTestSSL.java index bb6af0a0b4554..8717a8c7f6c7f 100644 --- a/airbyte-integrations/connectors/destination-mssql/src/test-integration/java/io/airbyte/integrations/destination/mssql/MSSQLDestinationAcceptanceTestSSL.java +++ b/airbyte-integrations/connectors/destination-mssql/src/test-integration/java/io/airbyte/integrations/destination/mssql/MSSQLDestinationAcceptanceTestSSL.java @@ -11,6 +11,7 @@ import io.airbyte.commons.string.Strings; import io.airbyte.db.Database; import io.airbyte.db.Databases; +import io.airbyte.db.jdbc.JdbcUtils; import io.airbyte.integrations.base.JavaBaseConstants; import io.airbyte.integrations.destination.ExtendedNameTransformer; import io.airbyte.integrations.standardtest.destination.DestinationAcceptanceTest; @@ -18,8 +19,6 @@ import java.util.ArrayList; import java.util.List; import java.util.stream.Collectors; -import org.jooq.JSONFormat; -import org.jooq.JSONFormat.RecordFormat; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.testcontainers.containers.MSSQLServerContainer; @@ -27,8 +26,6 @@ public class MSSQLDestinationAcceptanceTestSSL extends DestinationAcceptanceTest { - private static final JSONFormat JSON_FORMAT = new JSONFormat().recordFormat(RecordFormat.OBJECT); - private static MSSQLServerContainer db; private final ExtendedNameTransformer namingResolver = new ExtendedNameTransformer(); private JsonNode configWithoutDbName; @@ -129,7 +126,7 @@ private List retrieveRecordsFromTable(final String tableName, final St return ctx .fetch(String.format("SELECT * FROM %s.%s ORDER BY %s ASC;", schemaName, tableName, JavaBaseConstants.COLUMN_NAME_EMITTED_AT)) .stream() - .map(r -> r.formatJSON(JSON_FORMAT)) + .map(r -> r.formatJSON(JdbcUtils.getDefaultJSONFormat())) .map(Jsons::deserialize) .collect(Collectors.toList()); }); diff --git a/airbyte-integrations/connectors/destination-mssql/src/test-integration/java/io/airbyte/integrations/destination/mssql/SshMSSQLDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-mssql/src/test-integration/java/io/airbyte/integrations/destination/mssql/SshMSSQLDestinationAcceptanceTest.java index 805105394fa58..2f1e4ab42afda 100644 --- a/airbyte-integrations/connectors/destination-mssql/src/test-integration/java/io/airbyte/integrations/destination/mssql/SshMSSQLDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-mssql/src/test-integration/java/io/airbyte/integrations/destination/mssql/SshMSSQLDestinationAcceptanceTest.java @@ -11,6 +11,7 @@ import io.airbyte.commons.json.Jsons; import io.airbyte.db.Database; import io.airbyte.db.Databases; +import io.airbyte.db.jdbc.JdbcUtils; import io.airbyte.integrations.base.JavaBaseConstants; import io.airbyte.integrations.base.ssh.SshBastionContainer; import io.airbyte.integrations.base.ssh.SshTunnel; @@ -21,8 +22,6 @@ import java.util.Objects; import java.util.stream.Collectors; import org.apache.commons.lang3.RandomStringUtils; -import org.jooq.JSONFormat; -import org.jooq.JSONFormat.RecordFormat; import org.testcontainers.containers.JdbcDatabaseContainer; import org.testcontainers.containers.MSSQLServerContainer; import org.testcontainers.containers.Network; @@ -33,8 +32,6 @@ */ public abstract class SshMSSQLDestinationAcceptanceTest extends DestinationAcceptanceTest { - private static final JSONFormat JSON_FORMAT = new JSONFormat().recordFormat(RecordFormat.OBJECT); - private final ExtendedNameTransformer namingResolver = new ExtendedNameTransformer(); private final String schemaName = RandomStringUtils.randomAlphabetic(8).toLowerCase(); @@ -148,7 +145,7 @@ private List retrieveRecordsFromTable(final String tableName, final St database, schema, tableName.toLowerCase(), JavaBaseConstants.COLUMN_NAME_EMITTED_AT)) .stream() - .map(r -> r.formatJSON(JSON_FORMAT)) + .map(r -> r.formatJSON(JdbcUtils.getDefaultJSONFormat())) .map(Jsons::deserialize) .collect(Collectors.toList()))); } diff --git a/airbyte-integrations/connectors/destination-mysql-strict-encrypt/src/test-integration/java/io/airbyte/integrations/destination/mysql/MySQLStrictEncryptDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-mysql-strict-encrypt/src/test-integration/java/io/airbyte/integrations/destination/mysql/MySQLStrictEncryptDestinationAcceptanceTest.java index a4796b527bb3d..0c29d20a296b8 100644 --- a/airbyte-integrations/connectors/destination-mysql-strict-encrypt/src/test-integration/java/io/airbyte/integrations/destination/mysql/MySQLStrictEncryptDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-mysql-strict-encrypt/src/test-integration/java/io/airbyte/integrations/destination/mysql/MySQLStrictEncryptDestinationAcceptanceTest.java @@ -11,6 +11,7 @@ import com.google.common.collect.Lists; import io.airbyte.commons.json.Jsons; import io.airbyte.db.Databases; +import io.airbyte.db.jdbc.JdbcUtils; import io.airbyte.integrations.base.JavaBaseConstants; import io.airbyte.integrations.destination.ExtendedNameTransformer; import io.airbyte.integrations.standardtest.destination.DestinationAcceptanceTest; @@ -26,16 +27,12 @@ import java.util.ArrayList; import java.util.List; import java.util.stream.Collectors; -import org.jooq.JSONFormat; -import org.jooq.JSONFormat.RecordFormat; import org.jooq.SQLDialect; import org.junit.jupiter.api.Test; import org.testcontainers.containers.MySQLContainer; public class MySQLStrictEncryptDestinationAcceptanceTest extends DestinationAcceptanceTest { - private static final JSONFormat JSON_FORMAT = new JSONFormat().recordFormat(RecordFormat.OBJECT); - private MySQLContainer db; private final ExtendedNameTransformer namingResolver = new MySQLNameTransformer(); @@ -115,7 +112,7 @@ private List retrieveRecordsFromTable(final String tableName, final St .fetch(String.format("SELECT * FROM %s.%s ORDER BY %s ASC;", schemaName, tableName, JavaBaseConstants.COLUMN_NAME_EMITTED_AT)) .stream() - .map(r -> r.formatJSON(JSON_FORMAT)) + .map(r -> r.formatJSON(JdbcUtils.getDefaultJSONFormat())) .map(Jsons::deserialize) .collect(Collectors.toList())); } diff --git a/airbyte-integrations/connectors/destination-mysql/src/test-integration/java/io/airbyte/integrations/destination/mysql/MySQLDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-mysql/src/test-integration/java/io/airbyte/integrations/destination/mysql/MySQLDestinationAcceptanceTest.java index 6b4ed5de48f74..a9f491059c558 100644 --- a/airbyte-integrations/connectors/destination-mysql/src/test-integration/java/io/airbyte/integrations/destination/mysql/MySQLDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-mysql/src/test-integration/java/io/airbyte/integrations/destination/mysql/MySQLDestinationAcceptanceTest.java @@ -11,6 +11,7 @@ import com.google.common.collect.Lists; import io.airbyte.commons.json.Jsons; import io.airbyte.db.Databases; +import io.airbyte.db.jdbc.JdbcUtils; import io.airbyte.integrations.base.JavaBaseConstants; import io.airbyte.integrations.destination.ExtendedNameTransformer; import io.airbyte.integrations.standardtest.destination.DestinationAcceptanceTest; @@ -26,16 +27,12 @@ import java.util.ArrayList; import java.util.List; import java.util.stream.Collectors; -import org.jooq.JSONFormat; -import org.jooq.JSONFormat.RecordFormat; import org.jooq.SQLDialect; import org.junit.jupiter.api.Test; import org.testcontainers.containers.MySQLContainer; public class MySQLDestinationAcceptanceTest extends DestinationAcceptanceTest { - private static final JSONFormat JSON_FORMAT = new JSONFormat().recordFormat(RecordFormat.OBJECT); - private MySQLContainer db; private final ExtendedNameTransformer namingResolver = new MySQLNameTransformer(); @@ -117,7 +114,7 @@ private List retrieveRecordsFromTable(final String tableName, final St .fetch(String.format("SELECT * FROM %s.%s ORDER BY %s ASC;", schemaName, tableName, JavaBaseConstants.COLUMN_NAME_EMITTED_AT)) .stream() - .map(r -> r.formatJSON(JSON_FORMAT)) + .map(r -> r.formatJSON(JdbcUtils.getDefaultJSONFormat())) .map(Jsons::deserialize) .collect(Collectors.toList())); } diff --git a/airbyte-integrations/connectors/destination-mysql/src/test-integration/java/io/airbyte/integrations/destination/mysql/SshMySQLDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-mysql/src/test-integration/java/io/airbyte/integrations/destination/mysql/SshMySQLDestinationAcceptanceTest.java index c668151ee2c12..409737eaf9deb 100644 --- a/airbyte-integrations/connectors/destination-mysql/src/test-integration/java/io/airbyte/integrations/destination/mysql/SshMySQLDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-mysql/src/test-integration/java/io/airbyte/integrations/destination/mysql/SshMySQLDestinationAcceptanceTest.java @@ -13,6 +13,7 @@ import io.airbyte.commons.json.Jsons; import io.airbyte.db.Database; import io.airbyte.db.Databases; +import io.airbyte.db.jdbc.JdbcUtils; import io.airbyte.integrations.base.JavaBaseConstants; import io.airbyte.integrations.base.ssh.SshTunnel; import io.airbyte.integrations.destination.ExtendedNameTransformer; @@ -22,7 +23,6 @@ import java.util.List; import java.util.stream.Collectors; import org.apache.commons.lang3.RandomStringUtils; -import org.jooq.JSONFormat; /** * Abstract class that allows us to avoid duplicating testing logic for testing SSH with a key file @@ -30,8 +30,6 @@ */ public abstract class SshMySQLDestinationAcceptanceTest extends DestinationAcceptanceTest { - private static final JSONFormat JSON_FORMAT = new JSONFormat().recordFormat(JSONFormat.RecordFormat.OBJECT); - private final ExtendedNameTransformer namingResolver = new MySQLNameTransformer(); private String schemaName; @@ -131,7 +129,7 @@ private List retrieveRecordsFromTable(final String tableName, final St .fetch(String.format("SELECT * FROM %s.%s ORDER BY %s ASC;", schema, tableName.toLowerCase(), JavaBaseConstants.COLUMN_NAME_EMITTED_AT)) .stream() - .map(r -> r.formatJSON(JSON_FORMAT)) + .map(r -> r.formatJSON(JdbcUtils.getDefaultJSONFormat())) .map(Jsons::deserialize) .collect(Collectors.toList()))); } diff --git a/airbyte-integrations/connectors/destination-mysql/src/test-integration/java/io/airbyte/integrations/destination/mysql/SslMySQLDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-mysql/src/test-integration/java/io/airbyte/integrations/destination/mysql/SslMySQLDestinationAcceptanceTest.java index fcadc2909ddf9..b60ac9b2950b5 100644 --- a/airbyte-integrations/connectors/destination-mysql/src/test-integration/java/io/airbyte/integrations/destination/mysql/SslMySQLDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-mysql/src/test-integration/java/io/airbyte/integrations/destination/mysql/SslMySQLDestinationAcceptanceTest.java @@ -8,21 +8,18 @@ import com.google.common.collect.ImmutableMap; import io.airbyte.commons.json.Jsons; import io.airbyte.db.Databases; +import io.airbyte.db.jdbc.JdbcUtils; import io.airbyte.integrations.base.JavaBaseConstants; import io.airbyte.integrations.destination.ExtendedNameTransformer; import java.sql.SQLException; import java.util.List; import java.util.stream.Collectors; -import org.jooq.JSONFormat; -import org.jooq.JSONFormat.RecordFormat; import org.jooq.SQLDialect; import org.junit.jupiter.api.Test; import org.testcontainers.containers.MySQLContainer; public class SslMySQLDestinationAcceptanceTest extends MySQLDestinationAcceptanceTest { - private static final JSONFormat JSON_FORMAT = new JSONFormat().recordFormat(RecordFormat.OBJECT); - private MySQLContainer db; private final ExtendedNameTransformer namingResolver = new MySQLNameTransformer(); @@ -108,7 +105,7 @@ private List retrieveRecordsFromTable(final String tableName, final St .fetch(String.format("SELECT * FROM %s.%s ORDER BY %s ASC;", schemaName, tableName, JavaBaseConstants.COLUMN_NAME_EMITTED_AT)) .stream() - .map(r -> r.formatJSON(JSON_FORMAT)) + .map(r -> r.formatJSON(JdbcUtils.getDefaultJSONFormat())) .map(Jsons::deserialize) .collect(Collectors.toList())); } diff --git a/airbyte-integrations/connectors/destination-oracle/src/test-integration/java/io/airbyte/integrations/destination/oracle/SshOracleDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-oracle/src/test-integration/java/io/airbyte/integrations/destination/oracle/SshOracleDestinationAcceptanceTest.java index 233734419aafc..1646e2f1dc0e8 100644 --- a/airbyte-integrations/connectors/destination-oracle/src/test-integration/java/io/airbyte/integrations/destination/oracle/SshOracleDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-oracle/src/test-integration/java/io/airbyte/integrations/destination/oracle/SshOracleDestinationAcceptanceTest.java @@ -11,6 +11,7 @@ import io.airbyte.commons.json.Jsons; import io.airbyte.db.Database; import io.airbyte.db.Databases; +import io.airbyte.db.jdbc.JdbcUtils; import io.airbyte.integrations.base.JavaBaseConstants; import io.airbyte.integrations.base.ssh.SshBastionContainer; import io.airbyte.integrations.base.ssh.SshTunnel; @@ -21,13 +22,10 @@ import java.util.List; import java.util.Objects; import java.util.stream.Collectors; -import org.jooq.JSONFormat; import org.testcontainers.containers.Network; public abstract class SshOracleDestinationAcceptanceTest extends DestinationAcceptanceTest { - private static final JSONFormat JSON_FORMAT = new JSONFormat().recordFormat(JSONFormat.RecordFormat.OBJECT); - private final ExtendedNameTransformer namingResolver = new OracleNameTransformer(); private final String schemaName = "TEST_ORCL"; @@ -116,7 +114,7 @@ private List retrieveRecordsFromTable(final String tableName, final St ctx -> ctx .fetch(String.format("SELECT * FROM %s.%s ORDER BY %s ASC", schemaName, tableName, OracleDestination.COLUMN_NAME_EMITTED_AT))) .stream() - .map(r -> r.formatJSON(JSON_FORMAT)) + .map(r -> r.formatJSON(JdbcUtils.getDefaultJSONFormat())) .map(Jsons::deserialize) .collect(Collectors.toList())); } diff --git a/airbyte-integrations/connectors/destination-oracle/src/test-integration/java/io/airbyte/integrations/destination/oracle/UnencryptedOracleDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-oracle/src/test-integration/java/io/airbyte/integrations/destination/oracle/UnencryptedOracleDestinationAcceptanceTest.java index dd3612ea72909..8e57e31ef7ffc 100644 --- a/airbyte-integrations/connectors/destination-oracle/src/test-integration/java/io/airbyte/integrations/destination/oracle/UnencryptedOracleDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-oracle/src/test-integration/java/io/airbyte/integrations/destination/oracle/UnencryptedOracleDestinationAcceptanceTest.java @@ -14,20 +14,17 @@ import io.airbyte.db.Database; import io.airbyte.db.Databases; import io.airbyte.db.jdbc.JdbcDatabase; +import io.airbyte.db.jdbc.JdbcUtils; import io.airbyte.integrations.destination.ExtendedNameTransformer; import io.airbyte.integrations.standardtest.destination.DestinationAcceptanceTest; import java.sql.SQLException; import java.util.ArrayList; import java.util.List; import java.util.stream.Collectors; -import org.jooq.JSONFormat; -import org.jooq.JSONFormat.RecordFormat; import org.junit.Test; public class UnencryptedOracleDestinationAcceptanceTest extends DestinationAcceptanceTest { - private static final JSONFormat JSON_FORMAT = new JSONFormat().recordFormat(RecordFormat.OBJECT); - private final ExtendedNameTransformer namingResolver = new OracleNameTransformer(); private static OracleContainer db; private static JsonNode config; @@ -120,7 +117,7 @@ private List retrieveRecordsFromTable(final String tableName, final St .collect(Collectors.toList())); return result .stream() - .map(r -> r.formatJSON(JSON_FORMAT)) + .map(r -> r.formatJSON(JdbcUtils.getDefaultJSONFormat())) .map(Jsons::deserialize) .collect(Collectors.toList()); } diff --git a/airbyte-integrations/connectors/destination-postgres-strict-encrypt/src/test-integration/java/io/airbyte/integrations/destination/postgres/PostgresDestinationStrictEncryptAcceptanceTest.java b/airbyte-integrations/connectors/destination-postgres-strict-encrypt/src/test-integration/java/io/airbyte/integrations/destination/postgres/PostgresDestinationStrictEncryptAcceptanceTest.java index 9e1be4d78e0bf..ce71fbf7ec97c 100644 --- a/airbyte-integrations/connectors/destination-postgres-strict-encrypt/src/test-integration/java/io/airbyte/integrations/destination/postgres/PostgresDestinationStrictEncryptAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-postgres-strict-encrypt/src/test-integration/java/io/airbyte/integrations/destination/postgres/PostgresDestinationStrictEncryptAcceptanceTest.java @@ -8,6 +8,7 @@ import com.google.common.collect.ImmutableMap; import io.airbyte.commons.json.Jsons; import io.airbyte.db.Databases; +import io.airbyte.db.jdbc.JdbcUtils; import io.airbyte.integrations.base.JavaBaseConstants; import io.airbyte.integrations.destination.ExtendedNameTransformer; import io.airbyte.integrations.standardtest.destination.DestinationAcceptanceTest; @@ -15,16 +16,12 @@ import java.util.ArrayList; import java.util.List; import java.util.stream.Collectors; -import org.jooq.JSONFormat; -import org.jooq.JSONFormat.RecordFormat; import org.testcontainers.containers.PostgreSQLContainer; import org.testcontainers.utility.DockerImageName; // todo (cgardens) - DRY this up with PostgresDestinationAcceptanceTest public class PostgresDestinationStrictEncryptAcceptanceTest extends DestinationAcceptanceTest { - private static final JSONFormat JSON_FORMAT = new JSONFormat().recordFormat(RecordFormat.OBJECT); - private PostgreSQLContainer db; private final ExtendedNameTransformer namingResolver = new ExtendedNameTransformer(); @@ -117,7 +114,7 @@ private List retrieveRecordsFromTable(final String tableName, final St ctx -> ctx .fetch(String.format("SELECT * FROM %s.%s ORDER BY %s ASC;", schemaName, tableName, JavaBaseConstants.COLUMN_NAME_EMITTED_AT)) .stream() - .map(r -> r.formatJSON(JSON_FORMAT)) + .map(r -> r.formatJSON(JdbcUtils.getDefaultJSONFormat())) .map(Jsons::deserialize) .collect(Collectors.toList())); } diff --git a/airbyte-integrations/connectors/destination-postgres/src/test-integration/java/io/airbyte/integrations/destination/postgres/PostgresDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-postgres/src/test-integration/java/io/airbyte/integrations/destination/postgres/PostgresDestinationAcceptanceTest.java index aabac7d93f6ac..17463b521acc2 100644 --- a/airbyte-integrations/connectors/destination-postgres/src/test-integration/java/io/airbyte/integrations/destination/postgres/PostgresDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-postgres/src/test-integration/java/io/airbyte/integrations/destination/postgres/PostgresDestinationAcceptanceTest.java @@ -8,6 +8,7 @@ import com.google.common.collect.ImmutableMap; import io.airbyte.commons.json.Jsons; import io.airbyte.db.Databases; +import io.airbyte.db.jdbc.JdbcUtils; import io.airbyte.integrations.base.JavaBaseConstants; import io.airbyte.integrations.destination.ExtendedNameTransformer; import io.airbyte.integrations.standardtest.destination.DestinationAcceptanceTest; @@ -15,14 +16,10 @@ import java.util.ArrayList; import java.util.List; import java.util.stream.Collectors; -import org.jooq.JSONFormat; -import org.jooq.JSONFormat.RecordFormat; import org.testcontainers.containers.PostgreSQLContainer; public class PostgresDestinationAcceptanceTest extends DestinationAcceptanceTest { - private static final JSONFormat JSON_FORMAT = new JSONFormat().recordFormat(RecordFormat.OBJECT); - private PostgreSQLContainer db; private final ExtendedNameTransformer namingResolver = new ExtendedNameTransformer(); @@ -116,7 +113,7 @@ private List retrieveRecordsFromTable(final String tableName, final St ctx -> ctx .fetch(String.format("SELECT * FROM %s.%s ORDER BY %s ASC;", schemaName, tableName, JavaBaseConstants.COLUMN_NAME_EMITTED_AT)) .stream() - .map(r -> r.formatJSON(JSON_FORMAT)) + .map(r -> r.formatJSON(JdbcUtils.getDefaultJSONFormat())) .map(Jsons::deserialize) .collect(Collectors.toList())); } diff --git a/airbyte-integrations/connectors/destination-postgres/src/test-integration/java/io/airbyte/integrations/destination/postgres/SshPostgresDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-postgres/src/test-integration/java/io/airbyte/integrations/destination/postgres/SshPostgresDestinationAcceptanceTest.java index 762fb9aef760e..4ee8d0ed0f79c 100644 --- a/airbyte-integrations/connectors/destination-postgres/src/test-integration/java/io/airbyte/integrations/destination/postgres/SshPostgresDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-postgres/src/test-integration/java/io/airbyte/integrations/destination/postgres/SshPostgresDestinationAcceptanceTest.java @@ -10,6 +10,7 @@ import io.airbyte.commons.json.Jsons; import io.airbyte.db.Database; import io.airbyte.db.Databases; +import io.airbyte.db.jdbc.JdbcUtils; import io.airbyte.integrations.base.JavaBaseConstants; import io.airbyte.integrations.base.ssh.SshBastionContainer; import io.airbyte.integrations.base.ssh.SshTunnel; @@ -19,8 +20,6 @@ import java.util.List; import java.util.stream.Collectors; import org.apache.commons.lang3.RandomStringUtils; -import org.jooq.JSONFormat; -import org.jooq.JSONFormat.RecordFormat; import org.testcontainers.containers.PostgreSQLContainer; // todo (cgardens) - likely some of this could be further de-duplicated with @@ -32,8 +31,6 @@ */ public abstract class SshPostgresDestinationAcceptanceTest extends DestinationAcceptanceTest { - private static final JSONFormat JSON_FORMAT = new JSONFormat().recordFormat(RecordFormat.OBJECT); - private final ExtendedNameTransformer namingResolver = new ExtendedNameTransformer(); private static final String schemaName = RandomStringUtils.randomAlphabetic(8).toLowerCase(); private static PostgreSQLContainer db; @@ -130,7 +127,7 @@ private List retrieveRecordsFromTable(final String tableName, final St ctx -> ctx .fetch(String.format("SELECT * FROM %s.%s ORDER BY %s ASC;", schemaName, tableName, JavaBaseConstants.COLUMN_NAME_EMITTED_AT)) .stream() - .map(r -> r.formatJSON(JSON_FORMAT)) + .map(r -> r.formatJSON(JdbcUtils.getDefaultJSONFormat())) .map(Jsons::deserialize) .collect(Collectors.toList()))); } diff --git a/airbyte-integrations/connectors/destination-pulsar/.dockerignore b/airbyte-integrations/connectors/destination-pulsar/.dockerignore new file mode 100644 index 0000000000000..65c7d0ad3e73c --- /dev/null +++ b/airbyte-integrations/connectors/destination-pulsar/.dockerignore @@ -0,0 +1,3 @@ +* +!Dockerfile +!build diff --git a/airbyte-integrations/connectors/destination-pulsar/Dockerfile b/airbyte-integrations/connectors/destination-pulsar/Dockerfile new file mode 100644 index 0000000000000..c5ffa3415f27f --- /dev/null +++ b/airbyte-integrations/connectors/destination-pulsar/Dockerfile @@ -0,0 +1,12 @@ +FROM airbyte/integration-base-java:dev + +WORKDIR /airbyte + +ENV APPLICATION destination-pulsar + +COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar + +RUN tar xf ${APPLICATION}.tar --strip-components=1 + +LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.name=airbyte/destination-pulsar diff --git a/airbyte-integrations/connectors/destination-pulsar/README.md b/airbyte-integrations/connectors/destination-pulsar/README.md new file mode 100644 index 0000000000000..a291e2c6680d5 --- /dev/null +++ b/airbyte-integrations/connectors/destination-pulsar/README.md @@ -0,0 +1,68 @@ +# Destination Pulsar + +This is the repository for the Pulsar destination connector in Java. +For information about how to use this connector within Airbyte, see [the User Documentation](https://docs.airbyte.io/integrations/destinations/pulsar). + +## Local development + +#### Building via Gradle +From the Airbyte repository root, run: +``` +./gradlew :airbyte-integrations:connectors:destination-pulsar:build +``` + +#### Create credentials +**If you are a community contributor**, generate the necessary credentials and place them in `secrets/config.json` conforming to the spec file in `src/main/resources/spec.json`. +Note that the `secrets` directory is git-ignored by default, so there is no danger of accidentally checking in sensitive information. + +**If you are an Airbyte core member**, follow the [instructions](https://docs.airbyte.io/connector-development#using-credentials-in-ci) to set up the credentials. + +### Locally running the connector docker image + +#### Build +Build the connector image via Gradle: +``` +./gradlew :airbyte-integrations:connectors:destination-pulsar:airbyteDocker +``` +When building via Gradle, the docker image name and tag, respectively, are the values of the `io.airbyte.name` and `io.airbyte.version` `LABEL`s in +the Dockerfile. + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/destination-pulsar:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-pulsar:dev check --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-pulsar:dev discover --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/destination-pulsar:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json +``` + +## Testing +We use `JUnit` for Java tests. + +### Unit and Integration Tests +Place unit tests under `src/test/io/airbyte/integrations/destinations/pulsar`. + +#### Acceptance Tests +Airbyte has a standard test suite that all destination connectors must pass. Implement the `TODO`s in +`src/test-integration/java/io/airbyte/integrations/destinations/PulsarDestinationAcceptanceTest.java`. + +### Using gradle to run tests +All commands should be run from airbyte project root. +To run unit tests: +``` +./gradlew :airbyte-integrations:connectors:destination-pulsar:unitTest +``` +To run acceptance and custom integration tests: +``` +./gradlew :airbyte-integrations:connectors:destination-pulsar:integrationTest +``` + +## Dependency Management + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing unit and integration tests. +1. Bump the connector version in `Dockerfile` -- just increment the value of the `LABEL io.airbyte.version` appropriately (we use [SemVer](https://semver.org/)). +1. Create a Pull Request. +1. Pat yourself on the back for being an awesome contributor. +1. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. diff --git a/airbyte-integrations/connectors/destination-pulsar/build.gradle b/airbyte-integrations/connectors/destination-pulsar/build.gradle new file mode 100644 index 0000000000000..835f9dfaaa9e4 --- /dev/null +++ b/airbyte-integrations/connectors/destination-pulsar/build.gradle @@ -0,0 +1,25 @@ +plugins { + id 'application' + id 'airbyte-docker' + id 'airbyte-integration-test-java' +} + +application { + mainClass = 'io.airbyte.integrations.destination.pulsar.PulsarDestination' + applicationDefaultJvmArgs = ['-XX:MaxRAMPercentage=75.0'] +} + +dependencies { + implementation project(':airbyte-config:models') + implementation project(':airbyte-protocol:models') + implementation project(':airbyte-integrations:bases:base-java') + + implementation files(project(':airbyte-integrations:bases:base-java').airbyteDocker.outputs) + + implementation 'org.apache.pulsar:pulsar-client:2.8.1' + + testImplementation "org.testcontainers:pulsar:1.16.2" + + integrationTestJavaImplementation project(':airbyte-integrations:bases:standard-destination-test') + integrationTestJavaImplementation project(':airbyte-integrations:connectors:destination-pulsar') +} diff --git a/airbyte-integrations/connectors/destination-pulsar/src/main/java/io/airbyte/integrations/destination/pulsar/PulsarDestination.java b/airbyte-integrations/connectors/destination-pulsar/src/main/java/io/airbyte/integrations/destination/pulsar/PulsarDestination.java new file mode 100644 index 0000000000000..5b00b99d34f88 --- /dev/null +++ b/airbyte-integrations/connectors/destination-pulsar/src/main/java/io/airbyte/integrations/destination/pulsar/PulsarDestination.java @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.pulsar; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; +import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.BaseConnector; +import io.airbyte.integrations.base.AirbyteMessageConsumer; +import io.airbyte.integrations.base.Destination; +import io.airbyte.integrations.base.IntegrationRunner; +import io.airbyte.integrations.base.JavaBaseConstants; +import io.airbyte.integrations.destination.StandardNameTransformer; +import io.airbyte.protocol.models.AirbyteConnectionStatus; +import io.airbyte.protocol.models.AirbyteConnectionStatus.Status; +import io.airbyte.protocol.models.AirbyteMessage; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import java.util.UUID; +import java.util.function.Consumer; +import org.apache.pulsar.client.api.MessageId; +import org.apache.pulsar.client.api.Producer; +import org.apache.pulsar.client.api.PulsarClient; +import org.apache.pulsar.client.api.Schema; +import org.apache.pulsar.client.api.schema.GenericRecord; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class PulsarDestination extends BaseConnector implements Destination { + + private static final Logger LOGGER = LoggerFactory.getLogger(PulsarDestination.class); + + public static final String COLUMN_NAME_AB_ID = JavaBaseConstants.COLUMN_NAME_AB_ID; + public static final String COLUMN_NAME_EMITTED_AT = JavaBaseConstants.COLUMN_NAME_EMITTED_AT; + public static final String COLUMN_NAME_DATA = JavaBaseConstants.COLUMN_NAME_DATA; + public static final String COLUMN_NAME_STREAM = "_airbyte_stream"; + + private final StandardNameTransformer namingResolver; + + public PulsarDestination() { + this.namingResolver = new StandardNameTransformer(); + } + + @Override + public AirbyteConnectionStatus check(final JsonNode config) { + try { + final PulsarDestinationConfig pulsarConfig = PulsarDestinationConfig.getPulsarDestinationConfig(config); + final String testTopic = pulsarConfig.getTestTopic(); + if (!testTopic.isBlank()) { + final String key = UUID.randomUUID().toString(); + final GenericRecord value = Schema.generic(PulsarDestinationConfig.getSchemaInfo()) + .newRecordBuilder() + .set(PulsarDestination.COLUMN_NAME_AB_ID, key) + .set(PulsarDestination.COLUMN_NAME_STREAM, "test-topic-stream") + .set(PulsarDestination.COLUMN_NAME_EMITTED_AT, System.currentTimeMillis()) + .set(PulsarDestination.COLUMN_NAME_DATA, Jsons.jsonNode(ImmutableMap.of("test-key", "test-value"))) + .build(); + + try (final PulsarClient client = PulsarUtils.buildClient(pulsarConfig.getServiceUrl()); + final Producer producer = PulsarUtils.buildProducer(client, Schema.generic(PulsarDestinationConfig.getSchemaInfo()), + pulsarConfig.getProducerConfig(), pulsarConfig.uriForTopic(testTopic))) { + final MessageId messageId = producer.send(value); + + producer.flush(); + + LOGGER.info("Successfully sent message id '{}' to Pulsar brokers for topic '{}'.", messageId, testTopic); + } + } + return new AirbyteConnectionStatus().withStatus(Status.SUCCEEDED); + } catch (final Exception e) { + LOGGER.error("Exception attempting to connect to the Pulsar brokers: ", e); + return new AirbyteConnectionStatus() + .withStatus(Status.FAILED) + .withMessage("Could not connect to the Pulsar brokers with provided configuration. \n" + e.getMessage()); + } + } + + @Override + public AirbyteMessageConsumer getConsumer(final JsonNode config, + final ConfiguredAirbyteCatalog catalog, + final Consumer outputRecordCollector) { + return new PulsarRecordConsumer(PulsarDestinationConfig.getPulsarDestinationConfig(config), + catalog, + outputRecordCollector, + namingResolver); + } + + public static void main(final String[] args) throws Exception { + final Destination destination = new PulsarDestination(); + LOGGER.info("Starting destination: {}", PulsarDestination.class); + new IntegrationRunner(destination).run(args); + LOGGER.info("Completed destination: {}", PulsarDestination.class); + } + +} diff --git a/airbyte-integrations/connectors/destination-pulsar/src/main/java/io/airbyte/integrations/destination/pulsar/PulsarDestinationConfig.java b/airbyte-integrations/connectors/destination-pulsar/src/main/java/io/airbyte/integrations/destination/pulsar/PulsarDestinationConfig.java new file mode 100644 index 0000000000000..c67056a1f35b7 --- /dev/null +++ b/airbyte-integrations/connectors/destination-pulsar/src/main/java/io/airbyte/integrations/destination/pulsar/PulsarDestinationConfig.java @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.pulsar; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; +import java.util.Map; +import org.apache.pulsar.client.api.CompressionType; +import org.apache.pulsar.client.api.schema.RecordSchemaBuilder; +import org.apache.pulsar.client.api.schema.SchemaBuilder; +import org.apache.pulsar.common.schema.SchemaInfo; +import org.apache.pulsar.common.schema.SchemaType; + +public class PulsarDestinationConfig { + + private final String serviceUrl; + private final String topicPattern; + private final String topicPrefix; + private final String testTopic; + private final Map producerConfig; + private final boolean sync; + + private PulsarDestinationConfig(final JsonNode config) { + this.serviceUrl = buildServiceUrl(config); + this.topicPattern = buildTopicPattern(config); + this.topicPrefix = buildTopicPrefix(config); + this.testTopic = buildTestTopic(config); + this.producerConfig = buildProducerConfig(config); + this.sync = isSyncProducer(config); + } + + public static PulsarDestinationConfig getPulsarDestinationConfig(final JsonNode config) { + return new PulsarDestinationConfig(config); + } + + public Map getProducerConfig() { + return producerConfig; + } + + public String getServiceUrl() { + return serviceUrl; + } + + public static SchemaInfo getSchemaInfo() { + RecordSchemaBuilder recordSchemaBuilder = SchemaBuilder.record("airbyte"); + recordSchemaBuilder.field(PulsarDestination.COLUMN_NAME_AB_ID).type(SchemaType.STRING).required(); + recordSchemaBuilder.field(PulsarDestination.COLUMN_NAME_STREAM).type(SchemaType.STRING).required(); + recordSchemaBuilder.field(PulsarDestination.COLUMN_NAME_EMITTED_AT).type(SchemaType.TIMESTAMP).required(); + recordSchemaBuilder.field(PulsarDestination.COLUMN_NAME_DATA).type(SchemaType.BYTES).required(); + + return recordSchemaBuilder.build(SchemaType.JSON); + } + + public String uriForTopic(final String topic) { + return topicPrefix + topic; + } + + public String getTestTopic() { + return testTopic; + } + + public String getTopicPattern() { + return topicPattern; + } + + public boolean isSync() { + return sync; + } + + private String buildServiceUrl(final JsonNode config) { + return String.format("pulsar%s://%s", + config.get("use_tls").asBoolean() ? "+ssl" : "", + config.get("brokers").asText()); + } + + private String buildTestTopic(final JsonNode config) { + return config.has("test_topic") ? config.get("test_topic").asText() : ""; + } + + private String buildTopicPattern(final JsonNode config) { + return config.get("topic_pattern").asText(); + } + + private String buildTopicPrefix(final JsonNode config) { + return String.format("%s://%s/%s/", + config.get("topic_type").asText(), + config.get("topic_tenant").asText(), + config.get("topic_namespace").asText()); + } + + private Map buildProducerConfig(final JsonNode config) { + final ImmutableMap.Builder conf = ImmutableMap.builder(); + if (config.has("producer_name")) { + conf.put("producerName", config.get("producer_name").asText()); + } + conf.put("compressionType", CompressionType.valueOf(config.get("compression_type").asText())); + conf.put("sendTimeoutMs", config.get("send_timeout_ms").asInt()); + conf.put("maxPendingMessages", config.get("max_pending_messages").asInt()); + conf.put("maxPendingMessagesAcrossPartitions", config.get("max_pending_messages_across_partitions").asInt()); + conf.put("batchingEnabled", config.get("batching_enabled").asBoolean()); + conf.put("batchingMaxMessages", config.get("batching_max_messages").asInt()); + conf.put("batchingMaxPublishDelayMicros", config.get("batching_max_publish_delay").asInt() * 1000); + conf.put("blockIfQueueFull", config.get("block_if_queue_full").asBoolean()); + + return conf.build(); + } + + private boolean isSyncProducer(final JsonNode config) { + return config.has("producer_sync") && config.get("producer_sync").asBoolean(); + } + +} diff --git a/airbyte-integrations/connectors/destination-pulsar/src/main/java/io/airbyte/integrations/destination/pulsar/PulsarRecordConsumer.java b/airbyte-integrations/connectors/destination-pulsar/src/main/java/io/airbyte/integrations/destination/pulsar/PulsarRecordConsumer.java new file mode 100644 index 0000000000000..c22ac5c056c5b --- /dev/null +++ b/airbyte-integrations/connectors/destination-pulsar/src/main/java/io/airbyte/integrations/destination/pulsar/PulsarRecordConsumer.java @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.pulsar; + +import io.airbyte.commons.lang.Exceptions; +import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; +import io.airbyte.integrations.base.FailureTrackingAirbyteMessageConsumer; +import io.airbyte.integrations.destination.NamingConventionTransformer; +import io.airbyte.protocol.models.AirbyteMessage; +import io.airbyte.protocol.models.AirbyteRecordMessage; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; +import java.util.UUID; +import java.util.function.Consumer; +import java.util.function.Function; +import java.util.stream.Collectors; +import org.apache.pulsar.client.api.Producer; +import org.apache.pulsar.client.api.PulsarClient; +import org.apache.pulsar.client.api.PulsarClientException; +import org.apache.pulsar.client.api.Schema; +import org.apache.pulsar.client.api.schema.GenericRecord; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class PulsarRecordConsumer extends FailureTrackingAirbyteMessageConsumer { + + private static final Logger LOGGER = LoggerFactory.getLogger(PulsarRecordConsumer.class); + + private final PulsarDestinationConfig config; + private final Map> producerMap; + private final ConfiguredAirbyteCatalog catalog; + private final Consumer outputRecordCollector; + private final NamingConventionTransformer nameTransformer; + private final PulsarClient client; + + private AirbyteMessage lastStateMessage = null; + + public PulsarRecordConsumer(final PulsarDestinationConfig pulsarDestinationConfig, + final ConfiguredAirbyteCatalog catalog, + final Consumer outputRecordCollector, + final NamingConventionTransformer nameTransformer) { + this.config = pulsarDestinationConfig; + this.producerMap = new HashMap<>(); + this.catalog = catalog; + this.outputRecordCollector = outputRecordCollector; + this.nameTransformer = nameTransformer; + this.client = PulsarUtils.buildClient(this.config.getServiceUrl()); + } + + @Override + protected void startTracked() { + producerMap.putAll(buildProducerMap()); + } + + @Override + protected void acceptTracked(final AirbyteMessage airbyteMessage) { + if (airbyteMessage.getType() == AirbyteMessage.Type.STATE) { + lastStateMessage = airbyteMessage; + } else if (airbyteMessage.getType() == AirbyteMessage.Type.RECORD) { + final AirbyteRecordMessage recordMessage = airbyteMessage.getRecord(); + final Producer producer = producerMap.get(AirbyteStreamNameNamespacePair.fromRecordMessage(recordMessage)); + final String key = UUID.randomUUID().toString(); + final GenericRecord value = Schema.generic(PulsarDestinationConfig.getSchemaInfo()) + .newRecordBuilder() + .set(PulsarDestination.COLUMN_NAME_AB_ID, key) + .set(PulsarDestination.COLUMN_NAME_STREAM, recordMessage.getStream()) + .set(PulsarDestination.COLUMN_NAME_EMITTED_AT, recordMessage.getEmittedAt()) + .set(PulsarDestination.COLUMN_NAME_DATA, recordMessage.getData().toString().getBytes()) + .build(); + + sendRecord(producer, value); + } else { + LOGGER.warn("Unexpected message: " + airbyteMessage.getType()); + } + } + + Map> buildProducerMap() { + return catalog.getStreams().stream() + .map(stream -> AirbyteStreamNameNamespacePair.fromAirbyteSteam(stream.getStream())) + .collect(Collectors.toMap(Function.identity(), pair -> { + String topic = nameTransformer.getIdentifier(config.getTopicPattern() + .replaceAll("\\{namespace}", Optional.ofNullable(pair.getNamespace()).orElse("")) + .replaceAll("\\{stream}", Optional.ofNullable(pair.getName()).orElse(""))); + return PulsarUtils.buildProducer(client, Schema.generic(PulsarDestinationConfig.getSchemaInfo()), config.getProducerConfig(), + config.uriForTopic(topic)); + }, (existing, newValue) -> existing)); + } + + private void sendRecord(final Producer producer, final GenericRecord record) { + producer.sendAsync(record); + if (config.isSync()) { + try { + producer.flush(); + } catch (PulsarClientException e) { + LOGGER.error("Error sending message to topic.", e); + throw new RuntimeException("Cannot send message to Pulsar. Error: " + e.getMessage(), e); + } + if (lastStateMessage != null) { + outputRecordCollector.accept(lastStateMessage); + } + } + } + + @Override + protected void close(final boolean hasFailed) { + producerMap.values().forEach(producer -> { + Exceptions.swallow(producer::flush); + Exceptions.swallow(producer::close); + }); + Exceptions.swallow(client::close); + + if (lastStateMessage != null) { + outputRecordCollector.accept(lastStateMessage); + } + } + +} diff --git a/airbyte-integrations/connectors/destination-pulsar/src/main/java/io/airbyte/integrations/destination/pulsar/PulsarUtils.java b/airbyte-integrations/connectors/destination-pulsar/src/main/java/io/airbyte/integrations/destination/pulsar/PulsarUtils.java new file mode 100644 index 0000000000000..fed932ee41818 --- /dev/null +++ b/airbyte-integrations/connectors/destination-pulsar/src/main/java/io/airbyte/integrations/destination/pulsar/PulsarUtils.java @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.pulsar; + +import java.util.Map; +import org.apache.pulsar.client.api.Producer; +import org.apache.pulsar.client.api.PulsarClient; +import org.apache.pulsar.client.api.PulsarClientException; +import org.apache.pulsar.client.api.Schema; +import org.apache.pulsar.client.api.schema.GenericRecord; + +class PulsarUtils { + + static PulsarClient buildClient(final String serviceUrl) { + try { + return PulsarClient.builder() + .serviceUrl(serviceUrl) + .build(); + } catch (PulsarClientException e) { + throw new RuntimeException("Error creating the Pulsar client", e); + } + } + + static Producer buildProducer(final PulsarClient client, + final Schema schema, + final Map config, + final String topic) { + try { + return client.newProducer(schema) + .loadConf(config) + .topic(topic) + .create(); + } catch (PulsarClientException e) { + throw new RuntimeException("Error creating the Pulsar producer", e); + } + } + +} diff --git a/airbyte-integrations/connectors/destination-pulsar/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-pulsar/src/main/resources/spec.json new file mode 100644 index 0000000000000..7dc40a064f490 --- /dev/null +++ b/airbyte-integrations/connectors/destination-pulsar/src/main/resources/spec.json @@ -0,0 +1,137 @@ +{ + "documentationUrl": "https://docs.airbyte.io/integrations/destinations/pulsar", + "supportsIncremental": true, + "supportsNormalization": false, + "supportsDBT": false, + "supported_destination_sync_modes": ["append"], + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Pulsar Destination Spec", + "type": "object", + "required": [ + "brokers", + "use_tls", + "topic_type", + "topic_tenant", + "topic_namespace", + "topic_pattern", + "compression_type", + "send_timeout_ms", + "max_pending_messages", + "max_pending_messages_across_partitions", + "batching_enabled", + "batching_max_messages", + "batching_max_publish_delay", + "block_if_queue_full" + ], + "additionalProperties": true, + "properties": { + "brokers": { + "title": "Pulsar brokers", + "description": "A list of host/port pairs to use for establishing the initial connection to the Pulsar cluster.", + "type": "string", + "examples": ["broker1:6650,broker2:6650"] + }, + "use_tls": { + "title": "Use TLS", + "description": "Whether to use TLS encryption on the connection.", + "type": "boolean", + "default": false + }, + "topic_type": { + "title": "Topic type", + "description": "It identifies type of topic. Pulsar supports two kind of topics: persistent and non-persistent. In persistent topic, all messages are durably persisted on disk (that means on multiple disks unless the broker is standalone), whereas non-persistent topic does not persist message into storage disk.", + "type": "string", + "default": "persistent", + "enum": ["persistent", "non-persistent"] + }, + "topic_tenant": { + "title": "Topic tenant", + "description": "The topic tenant within the instance. Tenants are essential to multi-tenancy in Pulsar, and spread across clusters.", + "type": "string", + "default": "public", + "examples": ["public"] + }, + "topic_namespace": { + "title": "Topic namespace", + "description": "The administrative unit of the topic, which acts as a grouping mechanism for related topics. Most topic configuration is performed at the namespace level. Each tenant has one or multiple namespaces.", + "type": "string", + "default": "default", + "examples": ["default"] + }, + "topic_pattern": { + "title": "Topic pattern", + "description": "Topic pattern in which the records will be sent. You can use patterns like '{namespace}' and/or '{stream}' to send the message to a specific topic based on these values. Notice that the topic name will be transformed to a standard naming convention.", + "type": "string", + "examples": ["sample.topic", "{namespace}.{stream}.sample"] + }, + "topic_test": { + "title": "Test topic", + "description": "Topic to test if Airbyte can produce messages.", + "type": "string", + "examples": ["test.topic"] + }, + "producer_name": { + "title": "Producer name", + "description": "Name for the producer. If not filled, the system will generate a globally unique name which can be accessed with.", + "type": "string", + "examples": ["airbyte-producer"] + }, + "producer_sync": { + "title": "Sync producer", + "description": "Wait synchronously until the record has been sent to Pulsar.", + "type": "boolean", + "default": false + }, + "compression_type": { + "title": "Compression type", + "description": "Compression type for the producer.", + "type": "string", + "default": "NONE", + "enum": ["NONE", "LZ4", "ZLIB", "ZSTD", "SNAPPY"] + }, + "send_timeout_ms": { + "title": "Message send timeout", + "description": "If a message is not acknowledged by a server before the send-timeout expires, an error occurs (in ms).", + "type": "integer", + "default": 30000 + }, + "max_pending_messages": { + "title": "Max pending messages", + "description": "The maximum size of a queue holding pending messages.", + "type": "integer", + "default": 1000 + }, + "max_pending_messages_across_partitions": { + "title": "Max pending messages across partitions", + "description": "The maximum number of pending messages across partitions.", + "type": "integer", + "default": 50000 + }, + "batching_enabled": { + "title": "Enable batching", + "description": "Control whether automatic batching of messages is enabled for the producer.", + "type": "boolean", + "default": true + }, + "batching_max_messages": { + "title": "Batching max messages", + "description": "Maximum number of messages permitted in a batch.", + "type": "integer", + "default": 1000 + }, + "batching_max_publish_delay": { + "title": "Batching max publish delay", + "description": " Time period in milliseconds within which the messages sent will be batched.", + "type": "integer", + "default": 1 + }, + "block_if_queue_full": { + "title": "Block if queue is full", + "description": "If the send operation should block when the outgoing message queue is full.", + "type": "boolean", + "default": false + } + } + } +} diff --git a/airbyte-integrations/connectors/destination-pulsar/src/test-integration/java/io/airbyte/integrations/destination/pulsar/PulsarDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-pulsar/src/test-integration/java/io/airbyte/integrations/destination/pulsar/PulsarDestinationAcceptanceTest.java new file mode 100644 index 0000000000000..26dae59de485c --- /dev/null +++ b/airbyte-integrations/connectors/destination-pulsar/src/test-integration/java/io/airbyte/integrations/destination/pulsar/PulsarDestinationAcceptanceTest.java @@ -0,0 +1,170 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.pulsar; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.ObjectReader; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Streams; +import com.google.common.net.InetAddresses; +import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.lang.Exceptions; +import io.airbyte.integrations.destination.NamingConventionTransformer; +import io.airbyte.integrations.destination.StandardNameTransformer; +import io.airbyte.integrations.standardtest.destination.DestinationAcceptanceTest; +import java.io.IOException; +import java.net.InetAddress; +import java.net.NetworkInterface; +import java.net.SocketException; +import java.net.UnknownHostException; +import java.util.ArrayList; +import java.util.Base64; +import java.util.Collections; +import java.util.List; +import java.util.UUID; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.apache.pulsar.client.api.Consumer; +import org.apache.pulsar.client.api.Message; +import org.apache.pulsar.client.api.PulsarClient; +import org.apache.pulsar.client.api.Schema; +import org.apache.pulsar.client.api.SubscriptionInitialPosition; +import org.apache.pulsar.client.api.SubscriptionType; +import org.apache.pulsar.client.api.schema.GenericRecord; +import org.testcontainers.containers.PulsarContainer; +import org.testcontainers.utility.DockerImageName; + +public class PulsarDestinationAcceptanceTest extends DestinationAcceptanceTest { + + private static final String TOPIC_NAME = "test.topic"; + private static final ObjectReader READER = new ObjectMapper().reader(); + + private static PulsarContainer PULSAR; + + private final NamingConventionTransformer namingResolver = new StandardNameTransformer(); + + @Override + protected String getImageName() { + return "airbyte/destination-pulsar:dev"; + } + + @Override + protected JsonNode getConfig() throws UnknownHostException { + String brokers = Stream.concat(getIpAddresses().stream(), Stream.of("localhost")) + .map(ip -> ip + ":" + PULSAR.getMappedPort(PulsarContainer.BROKER_PORT)) + .collect(Collectors.joining(",")); + return Jsons.jsonNode(ImmutableMap.builder() + .put("brokers", brokers) + .put("use_tls", false) + .put("topic_type", "persistent") + .put("topic_tenant", "public") + .put("topic_namespace", "default") + .put("topic_pattern", "{namespace}.{stream}." + TOPIC_NAME) + .put("producer_name", "test-producer-" + UUID.randomUUID()) + .put("producer_sync", true) + .put("compression_type", "NONE") + .put("send_timeout_ms", 30000) + .put("max_pending_messages", 1000) + .put("max_pending_messages_across_partitions", 50000) + .put("batching_enabled", false) + .put("batching_max_messages", 1000) + .put("batching_max_publish_delay", 1) + .put("block_if_queue_full", true) + .build()); + } + + @Override + protected JsonNode getFailCheckConfig() { + return Jsons.jsonNode(ImmutableMap.builder() + .put("brokers", PULSAR.getHost() + ":" + PULSAR.getMappedPort(PulsarContainer.BROKER_PORT)) + .put("use_tls", false) + .put("topic_pattern", "{namespace}.{stream}." + TOPIC_NAME) + .put("producer_sync", true) + .put("producer_name", "test-producer") + .put("compression_type", "NONE") + .put("send_timeout_ms", 30000) + .put("max_pending_messages", 1000) + .put("max_pending_messages_across_partitions", 50000) + .put("block_if_queue_full", true) + .build()); + } + + @Override + protected boolean implementsNamespaces() { + return true; + } + + @Override + protected String getDefaultSchema(final JsonNode config) { + return ""; + } + + @Override + protected List retrieveNormalizedRecords(final TestDestinationEnv testEnv, final String streamName, final String namespace) + throws IOException { + return retrieveRecords(testEnv, streamName, namespace, null); + } + + @Override + protected List retrieveRecords(final TestDestinationEnv testEnv, + final String streamName, + final String namespace, + final JsonNode streamSchema) + throws IOException { + final PulsarClient client = PulsarClient.builder() + .serviceUrl(PULSAR.getPulsarBrokerUrl()) + .build(); + final String topic = namingResolver.getIdentifier(namespace + "." + streamName + "." + TOPIC_NAME); + final Consumer consumer = client.newConsumer(Schema.AUTO_CONSUME()) + .topic(topic) + .subscriptionName("test-subscription-" + UUID.randomUUID()) + .enableRetry(true) + .subscriptionType(SubscriptionType.Exclusive) + .subscriptionInitialPosition(SubscriptionInitialPosition.Earliest) + .subscribe(); + + final List records = new ArrayList<>(); + while (!consumer.hasReachedEndOfTopic()) { + Message message = consumer.receive(5, TimeUnit.SECONDS); + if (message == null) { + break; + } + records.add(READER.readTree(Base64.getDecoder().decode(message.getValue().getField(PulsarDestination.COLUMN_NAME_DATA).toString()))); + Exceptions.swallow(() -> consumer.acknowledge(message)); + } + consumer.unsubscribe(); + consumer.close(); + client.close(); + + return records; + } + + @SuppressWarnings("UnstableApiUsage") + private List getIpAddresses() throws UnknownHostException { + try { + return Streams.stream(NetworkInterface.getNetworkInterfaces().asIterator()) + .flatMap(ni -> Streams.stream(ni.getInetAddresses().asIterator())) + .map(InetAddress::getHostAddress) + .filter(InetAddresses::isUriInetAddress) + .collect(Collectors.toList()); + } catch (SocketException e) { + return Collections.singletonList(InetAddress.getLocalHost().getHostAddress()); + } + } + + @Override + protected void setup(final TestDestinationEnv testEnv) { + PULSAR = new PulsarContainer(DockerImageName.parse("apachepulsar/pulsar:2.8.1")); + PULSAR.start(); + } + + @Override + protected void tearDown(final TestDestinationEnv testEnv) { + PULSAR.close(); + } + +} diff --git a/airbyte-integrations/connectors/destination-pulsar/src/test/java/io/airbyte/integrations/destination/pulsar/PulsarRecordConsumerTest.java b/airbyte-integrations/connectors/destination-pulsar/src/test/java/io/airbyte/integrations/destination/pulsar/PulsarRecordConsumerTest.java new file mode 100644 index 0000000000000..f61c8c4d05dc7 --- /dev/null +++ b/airbyte-integrations/connectors/destination-pulsar/src/test/java/io/airbyte/integrations/destination/pulsar/PulsarRecordConsumerTest.java @@ -0,0 +1,225 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.pulsar; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.mockito.Mockito.mock; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Sets; +import com.google.common.collect.Streams; +import com.google.common.net.InetAddresses; +import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; +import io.airbyte.integrations.destination.StandardNameTransformer; +import io.airbyte.protocol.models.AirbyteMessage; +import io.airbyte.protocol.models.AirbyteRecordMessage; +import io.airbyte.protocol.models.AirbyteStateMessage; +import io.airbyte.protocol.models.AirbyteStream; +import io.airbyte.protocol.models.CatalogHelpers; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.ConfiguredAirbyteStream; +import io.airbyte.protocol.models.Field; +import io.airbyte.protocol.models.JsonSchemaPrimitive; +import java.net.InetAddress; +import java.net.NetworkInterface; +import java.net.SocketException; +import java.net.UnknownHostException; +import java.time.Instant; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.function.Consumer; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import java.util.stream.Stream; +import org.apache.pulsar.client.api.Producer; +import org.apache.pulsar.client.api.schema.GenericRecord; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtensionContext; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.ArgumentsProvider; +import org.junit.jupiter.params.provider.ArgumentsSource; +import org.testcontainers.containers.PulsarContainer; +import org.testcontainers.utility.DockerImageName; + +@DisplayName("PulsarRecordConsumer") +public class PulsarRecordConsumerTest { + + private static final StandardNameTransformer NAMING_RESOLVER = new StandardNameTransformer(); + + private static PulsarContainer PULSAR; + + @ParameterizedTest + @ArgumentsSource(TopicMapArgumentsProvider.class) + @SuppressWarnings("unchecked") + public void testBuildProducerMap(final ConfiguredAirbyteCatalog catalog, + final String streamName, + final String namespace, + final String topicPattern, + final String expectedTopic) + throws UnknownHostException { + String brokers = Stream.concat(getIpAddresses().stream(), Stream.of("localhost")) + .map(ip -> ip + ":" + PULSAR.getMappedPort(PulsarContainer.BROKER_PORT)) + .collect(Collectors.joining(",")); + final PulsarDestinationConfig config = PulsarDestinationConfig + .getPulsarDestinationConfig(getConfig(brokers, topicPattern)); + + final PulsarRecordConsumer recordConsumer = new PulsarRecordConsumer(config, catalog, mock(Consumer.class), NAMING_RESOLVER); + final Map> producerMap = recordConsumer.buildProducerMap(); + assertEquals(Sets.newHashSet(catalog.getStreams()).size(), producerMap.size()); + + final AirbyteStreamNameNamespacePair streamNameNamespacePair = new AirbyteStreamNameNamespacePair(streamName, namespace); + assertEquals(expectedTopic, producerMap.get(streamNameNamespacePair).getTopic()); + } + + @Test + @SuppressWarnings("unchecked") + void testCannotConnectToBrokers() throws Exception { + final PulsarDestinationConfig config = PulsarDestinationConfig + .getPulsarDestinationConfig(getConfig(PULSAR.getHost() + ":" + (PULSAR.getMappedPort(PulsarContainer.BROKER_PORT) + 10), "test-topic")); + + final String streamName = "test-stream"; + final String namespace = "test-schema"; + final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog().withStreams(List.of( + CatalogHelpers.createConfiguredAirbyteStream( + streamName, + namespace, + Field.of("id", JsonSchemaPrimitive.NUMBER), + Field.of("name", JsonSchemaPrimitive.STRING)))); + final PulsarRecordConsumer consumer = new PulsarRecordConsumer(config, catalog, mock(Consumer.class), NAMING_RESOLVER); + final List expectedRecords = getNRecords(10, streamName, namespace); + + assertThrows(RuntimeException.class, consumer::start); + + expectedRecords.forEach(m -> assertThrows(RuntimeException.class, () -> consumer.accept(m))); + + consumer.accept(new AirbyteMessage() + .withType(AirbyteMessage.Type.STATE) + .withState(new AirbyteStateMessage().withData(Jsons.jsonNode(ImmutableMap.of(namespace + "." + streamName, 0))))); + consumer.close(); + } + + private JsonNode getConfig(final String brokers, final String topic) { + return Jsons.jsonNode(ImmutableMap.builder() + .put("brokers", brokers) + .put("use_tls", false) + .put("topic_type", "non-persistent") + .put("topic_tenant", "public") + .put("topic_namespace", "default") + .put("topic_pattern", topic) + .put("producer_sync", true) + .put("compression_type", "NONE") + .put("send_timeout_ms", 30000) + .put("max_pending_messages", 1000) + .put("max_pending_messages_across_partitions", 50000) + .put("batching_enabled", true) + .put("batching_max_messages", 1000) + .put("batching_max_publish_delay", 1) + .put("block_if_queue_full", true) + .build()); + } + + private List getNRecords(final int n, final String streamName, final String namespace) { + return IntStream.range(0, n) + .boxed() + .map(i -> new AirbyteMessage() + .withType(AirbyteMessage.Type.RECORD) + .withRecord(new AirbyteRecordMessage() + .withStream(streamName) + .withNamespace(namespace) + .withEmittedAt(Instant.now().toEpochMilli()) + .withData(Jsons.jsonNode(ImmutableMap.of("id", i, "name", "human " + i))))) + .collect(Collectors.toList()); + + } + + @SuppressWarnings("UnstableApiUsage") + private List getIpAddresses() throws UnknownHostException { + try { + return Streams.stream(NetworkInterface.getNetworkInterfaces().asIterator()) + .flatMap(ni -> Streams.stream(ni.getInetAddresses().asIterator())) + .map(InetAddress::getHostAddress) + .filter(InetAddresses::isUriInetAddress) + .collect(Collectors.toList()); + } catch (SocketException e) { + return Collections.singletonList(InetAddress.getLocalHost().getHostAddress()); + } + } + + public static class TopicMapArgumentsProvider implements ArgumentsProvider { + + private static final String TOPIC_NAME = "test.topic"; + private static final String SCHEMA_NAME1 = "public"; + private static final String STREAM_NAME1 = "id_and_name"; + private static final String SCHEMA_NAME2 = SCHEMA_NAME1 + 2; + private static final String STREAM_NAME2 = STREAM_NAME1 + 2; + + private final ConfiguredAirbyteStream stream1 = CatalogHelpers.createConfiguredAirbyteStream( + SCHEMA_NAME1, + STREAM_NAME1, + Field.of("id", JsonSchemaPrimitive.NUMBER), + Field.of("name", JsonSchemaPrimitive.STRING)); + private final ConfiguredAirbyteStream stream2 = CatalogHelpers.createConfiguredAirbyteStream( + SCHEMA_NAME2, + STREAM_NAME2, + Field.of("id", JsonSchemaPrimitive.NUMBER), + Field.of("name", JsonSchemaPrimitive.STRING)); + + @Override + public Stream provideArguments(final ExtensionContext context) { + final String prefix = "non-persistent://public/default/"; + + final List catalogs = new ArrayList<>(); + catalogs.add(new ConfiguredAirbyteCatalog().withStreams(List.of(stream1))); + catalogs.add(new ConfiguredAirbyteCatalog().withStreams(List.of(stream1, stream1))); + catalogs.add(new ConfiguredAirbyteCatalog().withStreams(List.of(stream1, stream2))); + + return catalogs.stream() + .flatMap(catalog -> catalog.getStreams().stream() + .map(stream -> buildArgs(catalog, stream.getStream(), prefix)) + .flatMap(Collection::stream)); + } + + private List buildArgs(final ConfiguredAirbyteCatalog catalog, final AirbyteStream stream, final String prefix) { + final String transformedTopic = NAMING_RESOLVER.getIdentifier(TOPIC_NAME); + final String transformedName = NAMING_RESOLVER.getIdentifier(stream.getName()); + final String transformedNamespace = NAMING_RESOLVER.getIdentifier(stream.getNamespace()); + + return ImmutableList.of( + Arguments.of(catalog, stream.getName(), stream.getNamespace(), TOPIC_NAME, prefix + "test_topic"), + Arguments.of(catalog, stream.getName(), stream.getNamespace(), "test-topic", prefix + "test_topic"), + Arguments.of(catalog, stream.getName(), stream.getNamespace(), "{namespace}", prefix + transformedNamespace), + Arguments.of(catalog, stream.getName(), stream.getNamespace(), "{stream}", prefix + transformedName), + Arguments.of(catalog, stream.getName(), stream.getNamespace(), "{namespace}.{stream}." + TOPIC_NAME, + prefix + transformedNamespace + "_" + transformedName + "_" + transformedTopic), + Arguments.of(catalog, stream.getName(), stream.getNamespace(), "{namespace}-{stream}-" + TOPIC_NAME, + prefix + transformedNamespace + "_" + transformedName + "_" + transformedTopic), + Arguments.of(catalog, stream.getName(), stream.getNamespace(), "topic with spaces", prefix + "topic_with_spaces")); + } + + } + + @BeforeEach + void setup() { + PULSAR = new PulsarContainer(DockerImageName.parse("apachepulsar/pulsar:2.8.1")); + PULSAR.start(); + } + + @AfterEach + void tearDown() { + PULSAR.close(); + } + +} diff --git a/airbyte-integrations/connectors/destination-redshift/src/test-integration/java/io/airbyte/integrations/destination/redshift/RedshiftCopyDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-redshift/src/test-integration/java/io/airbyte/integrations/destination/redshift/RedshiftCopyDestinationAcceptanceTest.java index 44d232a526b0f..a6fdd5f877000 100644 --- a/airbyte-integrations/connectors/destination-redshift/src/test-integration/java/io/airbyte/integrations/destination/redshift/RedshiftCopyDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-redshift/src/test-integration/java/io/airbyte/integrations/destination/redshift/RedshiftCopyDestinationAcceptanceTest.java @@ -11,6 +11,7 @@ import io.airbyte.commons.string.Strings; import io.airbyte.db.Database; import io.airbyte.db.Databases; +import io.airbyte.db.jdbc.JdbcUtils; import io.airbyte.integrations.base.JavaBaseConstants; import io.airbyte.integrations.standardtest.destination.DestinationAcceptanceTest; import java.nio.file.Path; @@ -18,8 +19,6 @@ import java.util.ArrayList; import java.util.List; import java.util.stream.Collectors; -import org.jooq.JSONFormat; -import org.jooq.JSONFormat.RecordFormat; /** * Integration test testing {@link RedshiftCopyS3Destination}. The default Redshift integration test @@ -27,7 +26,6 @@ */ public class RedshiftCopyDestinationAcceptanceTest extends DestinationAcceptanceTest { - private static final JSONFormat JSON_FORMAT = new JSONFormat().recordFormat(RecordFormat.OBJECT); // config from which to create / delete schemas. private JsonNode baseConfig; // config which refers to the schema that the test is being run in. @@ -111,7 +109,7 @@ private List retrieveRecordsFromTable(final String tableName, final St ctx -> ctx .fetch(String.format("SELECT * FROM %s.%s ORDER BY %s ASC;", schemaName, tableName, JavaBaseConstants.COLUMN_NAME_EMITTED_AT)) .stream() - .map(r -> r.formatJSON(JSON_FORMAT)) + .map(r -> r.formatJSON(JdbcUtils.getDefaultJSONFormat())) .map(Jsons::deserialize) .collect(Collectors.toList())); } diff --git a/airbyte-integrations/connectors/destination-s3/Dockerfile b/airbyte-integrations/connectors/destination-s3/Dockerfile index c27f60d88243e..aab1915a9fbdc 100644 --- a/airbyte-integrations/connectors/destination-s3/Dockerfile +++ b/airbyte-integrations/connectors/destination-s3/Dockerfile @@ -7,5 +7,5 @@ COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar RUN tar xf ${APPLICATION}.tar --strip-components=1 -LABEL io.airbyte.version=0.1.12 +LABEL io.airbyte.version=0.1.13 LABEL io.airbyte.name=airbyte/destination-s3 diff --git a/airbyte-integrations/connectors/destination-s3/build.gradle b/airbyte-integrations/connectors/destination-s3/build.gradle index 6900ba3e8112c..547e83765d541 100644 --- a/airbyte-integrations/connectors/destination-s3/build.gradle +++ b/airbyte-integrations/connectors/destination-s3/build.gradle @@ -26,7 +26,11 @@ dependencies { implementation group: 'org.apache.hadoop', name: 'hadoop-aws', version: '3.3.0' implementation group: 'org.apache.hadoop', name: 'hadoop-mapreduce-client-core', version: '3.3.0' implementation group: 'org.apache.parquet', name: 'parquet-avro', version: '1.12.0' - implementation group: 'tech.allegro.schema.json2avro', name: 'converter', version: '0.2.10' + implementation('tech.allegro.schema.json2avro:converter') { + version { + branch = 'master' + } + } testImplementation 'org.apache.commons:commons-lang3:3.11' diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/AvroConstants.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/AvroConstants.java new file mode 100644 index 0000000000000..50b9012fbbd99 --- /dev/null +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/AvroConstants.java @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.s3.avro; + +import java.util.Set; +import tech.allegro.schema.json2avro.converter.JsonAvroConverter; + +public class AvroConstants { + + // Field name with special character + public static final String DOC_KEY_VALUE_DELIMITER = ":"; + public static final String DOC_KEY_ORIGINAL_NAME = "_airbyte_original_name"; + + public static final String AVRO_EXTRA_PROPS_FIELD = "_airbyte_additional_properties"; + // This set must include _ab_additional_col in source_s3/source_files_abstract/stream.py + public static final Set JSON_EXTRA_PROPS_FIELDS = Set.of("_ab_additional_properties", AVRO_EXTRA_PROPS_FIELD); + public static final AvroNameTransformer NAME_TRANSFORMER = new AvroNameTransformer(); + public static final JsonAvroConverter JSON_CONVERTER = JsonAvroConverter.builder() + .setNameTransformer(NAME_TRANSFORMER::getIdentifier) + .setJsonAdditionalPropsFieldNames(JSON_EXTRA_PROPS_FIELDS) + .setAvroAdditionalPropsFieldName(AVRO_EXTRA_PROPS_FIELD) + .build(); + +} diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/S3NameTransformer.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/AvroNameTransformer.java similarity index 83% rename from airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/S3NameTransformer.java rename to airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/AvroNameTransformer.java index 936dc6b27c1ec..c1dc15a076d14 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/S3NameTransformer.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/AvroNameTransformer.java @@ -2,11 +2,11 @@ * Copyright (c) 2021 Airbyte, Inc., all rights reserved. */ -package io.airbyte.integrations.destination.s3; +package io.airbyte.integrations.destination.s3.avro; import io.airbyte.integrations.destination.ExtendedNameTransformer; -public class S3NameTransformer extends ExtendedNameTransformer { +public class AvroNameTransformer extends ExtendedNameTransformer { @Override protected String applyDefaultCase(final String input) { diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/AvroRecordFactory.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/AvroRecordFactory.java index 94611e32bcf4b..791df02105424 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/AvroRecordFactory.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/AvroRecordFactory.java @@ -5,7 +5,6 @@ package io.airbyte.integrations.destination.s3.avro; import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectWriter; import com.fasterxml.jackson.databind.node.ObjectNode; @@ -23,22 +22,18 @@ public class AvroRecordFactory { private static final ObjectWriter WRITER = MAPPER.writer(); private final Schema schema; - private final JsonFieldNameUpdater nameUpdater; - private final JsonAvroConverter converter = new JsonAvroConverter(); + private final JsonAvroConverter converter; - public AvroRecordFactory(final Schema schema, final JsonFieldNameUpdater nameUpdater) { + public AvroRecordFactory(final Schema schema, final JsonAvroConverter converter) { this.schema = schema; - this.nameUpdater = nameUpdater; + this.converter = converter; } public GenericData.Record getAvroRecord(final UUID id, final AirbyteRecordMessage recordMessage) throws JsonProcessingException { - JsonNode inputData = recordMessage.getData(); - inputData = nameUpdater.getJsonWithStandardizedFieldNames(inputData); - final ObjectNode jsonRecord = MAPPER.createObjectNode(); jsonRecord.put(JavaBaseConstants.COLUMN_NAME_AB_ID, id.toString()); jsonRecord.put(JavaBaseConstants.COLUMN_NAME_EMITTED_AT, recordMessage.getEmittedAt()); - jsonRecord.setAll((ObjectNode) inputData); + jsonRecord.setAll((ObjectNode) recordMessage.getData()); return converter.convertToGenericDataRecord(WRITER.writeValueAsBytes(jsonRecord), schema); } diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/JsonFieldNameUpdater.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/JsonFieldNameUpdater.java index e6386f2beb52f..1a4377b3bbc05 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/JsonFieldNameUpdater.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/JsonFieldNameUpdater.java @@ -10,8 +10,9 @@ import java.util.Map; /** - * This helper class tracks whether a Json has special field name that needs to be replaced with a - * standardized one, and can perform the replacement when necessary. + * This helper class is for testing only. It tracks the original and standardized names, and revert + * them when necessary, so that the tests can correctly compare the generated json with the original + * input. */ public class JsonFieldNameUpdater { @@ -22,23 +23,8 @@ public JsonFieldNameUpdater(final Map standardizedNames) { this.standardizedNames = ImmutableMap.copyOf(standardizedNames); } - public boolean hasNameUpdate() { - return standardizedNames.size() > 0; - } - - public JsonNode getJsonWithStandardizedFieldNames(final JsonNode input) { - if (!hasNameUpdate()) { - return input; - } - String jsonString = Jsons.serialize(input); - for (final Map.Entry entry : standardizedNames.entrySet()) { - jsonString = jsonString.replaceAll(quote(entry.getKey()), quote(entry.getValue())); - } - return Jsons.deserialize(jsonString); - } - public JsonNode getJsonWithOriginalFieldNames(final JsonNode input) { - if (!hasNameUpdate()) { + if (standardizedNames.size() == 0) { return input; } String jsonString = Jsons.serialize(input); diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/JsonToAvroSchemaConverter.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/JsonToAvroSchemaConverter.java index dd271277b482e..7280b89cbc59a 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/JsonToAvroSchemaConverter.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/JsonToAvroSchemaConverter.java @@ -9,7 +9,6 @@ import com.google.common.base.Preconditions; import io.airbyte.commons.util.MoreIterators; import io.airbyte.integrations.base.JavaBaseConstants; -import io.airbyte.integrations.destination.s3.S3NameTransformer; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; @@ -21,28 +20,28 @@ import javax.annotation.Nullable; import org.apache.avro.LogicalTypes; import org.apache.avro.Schema; -import org.apache.avro.Schema.Type; import org.apache.avro.SchemaBuilder; import org.apache.avro.SchemaBuilder.RecordBuilder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import tech.allegro.schema.json2avro.converter.AdditionalPropertyField; /** * The main function of this class is to convert a JsonSchema to Avro schema. It can also * standardize schema names, and keep track of a mapping from the original names to the standardized - * ones. + * ones, which is needed for unit tests. *

* For limitations of this converter, see the README of this connector: * https://docs.airbyte.io/integrations/destinations/s3#avro */ public class JsonToAvroSchemaConverter { - public static final Schema UUID_SCHEMA = LogicalTypes.uuid() - .addToSchema(Schema.create(Type.STRING)); + private static final Schema UUID_SCHEMA = LogicalTypes.uuid() + .addToSchema(Schema.create(Schema.Type.STRING)); + private static final Schema NULL_SCHEMA = Schema.create(Schema.Type.NULL); private static final Logger LOGGER = LoggerFactory.getLogger(JsonToAvroSchemaConverter.class); private static final Schema TIMESTAMP_MILLIS_SCHEMA = LogicalTypes.timestampMillis() - .addToSchema(Schema.create(Type.LONG)); - private static final S3NameTransformer NAME_TRANSFORMER = new S3NameTransformer(); + .addToSchema(Schema.create(Schema.Type.LONG)); private final Map standardizedNames = new HashMap<>(); @@ -99,7 +98,7 @@ public Schema getAvroSchema(final JsonNode jsonSchema, final String name, @Nullable final String namespace, final boolean appendAirbyteFields) { - final String stdName = NAME_TRANSFORMER.getIdentifier(name); + final String stdName = AvroConstants.NAME_TRANSFORMER.getIdentifier(name); RecordBuilder builder = SchemaBuilder.record(stdName); if (!stdName.equals(name)) { standardizedNames.put(name, stdName); @@ -107,8 +106,8 @@ public Schema getAvroSchema(final JsonNode jsonSchema, stdName); builder = builder.doc( String.format("%s%s%s", - S3AvroConstants.DOC_KEY_ORIGINAL_NAME, - S3AvroConstants.DOC_KEY_VALUE_DELIMITER, + AvroConstants.DOC_KEY_ORIGINAL_NAME, + AvroConstants.DOC_KEY_VALUE_DELIMITER, name)); } if (namespace != null) { @@ -116,7 +115,11 @@ public Schema getAvroSchema(final JsonNode jsonSchema, } final JsonNode properties = jsonSchema.get("properties"); - final List fieldNames = new ArrayList<>(MoreIterators.toList(properties.fieldNames())); + // object field with no "properties" will be handled by the default additional properties + // field during object conversion; so it is fine if there is no "properties" + final List fieldNames = properties == null + ? Collections.emptyList() + : new ArrayList<>(MoreIterators.toList(properties.fieldNames())); SchemaBuilder.FieldAssembler assembler = builder.fields(); @@ -127,7 +130,13 @@ public Schema getAvroSchema(final JsonNode jsonSchema, } for (final String fieldName : fieldNames) { - final String stdFieldName = NAME_TRANSFORMER.getIdentifier(fieldName); + // ignore additional properties fields, which will be consolidated + // into one field at the end + if (AvroConstants.JSON_EXTRA_PROPS_FIELDS.contains(fieldName)) { + continue; + } + + final String stdFieldName = AvroConstants.NAME_TRANSFORMER.getIdentifier(fieldName); final JsonNode fieldDefinition = properties.get(fieldName); SchemaBuilder.FieldBuilder fieldBuilder = assembler.name(stdFieldName); if (!stdFieldName.equals(fieldName)) { @@ -135,14 +144,18 @@ public Schema getAvroSchema(final JsonNode jsonSchema, LOGGER.warn("Field name contains illegal character(s) and is standardized: {} -> {}", fieldName, stdFieldName); fieldBuilder = fieldBuilder.doc(String.format("%s%s%s", - S3AvroConstants.DOC_KEY_ORIGINAL_NAME, - S3AvroConstants.DOC_KEY_VALUE_DELIMITER, + AvroConstants.DOC_KEY_ORIGINAL_NAME, + AvroConstants.DOC_KEY_VALUE_DELIMITER, fieldName)); } assembler = fieldBuilder.type(getNullableFieldTypes(fieldName, fieldDefinition)) .withDefault(null); } + // support additional properties in one field + assembler = assembler.name(AvroConstants.AVRO_EXTRA_PROPS_FIELD) + .type(AdditionalPropertyField.FIELD_SCHEMA).withDefault(null); + return assembler.endRecord(); } @@ -150,6 +163,12 @@ Schema getSingleFieldType(final String fieldName, final JsonSchemaType fieldType Preconditions .checkState(fieldType != JsonSchemaType.NULL, "Null types should have been filtered out"); + // the additional properties fields are filtered out and never passed into this method; + // but this method is able to handle them for completeness + if (AvroConstants.JSON_EXTRA_PROPS_FIELDS.contains(fieldName)) { + return AdditionalPropertyField.FIELD_SCHEMA; + } + final Schema fieldSchema; switch (fieldType) { case STRING, NUMBER, INTEGER, BOOLEAN -> fieldSchema = Schema.create(fieldType.getAvroType()); @@ -166,7 +185,7 @@ Schema getSingleFieldType(final String fieldName, final JsonSchemaType fieldType fieldSchema = Schema.createArray(getNullableFieldTypes(String.format("%s.items", fieldName), items)); } else if (items.isArray()) { final List arrayElementTypes = getSchemasFromTypes(fieldName, (ArrayNode) items); - arrayElementTypes.add(0, Schema.create(Type.NULL)); + arrayElementTypes.add(0, NULL_SCHEMA); fieldSchema = Schema.createArray(Schema.createUnion(arrayElementTypes)); } else { throw new IllegalStateException( @@ -217,7 +236,9 @@ Schema getNullableFieldTypes(final String fieldName, final JsonNode fieldDefinit return Schema.create(Schema.Type.NULL); } else { // Mark every field as nullable to prevent missing value exceptions from Avro / Parquet. - nonNullFieldTypes.add(0, Schema.create(Schema.Type.NULL)); + if (!nonNullFieldTypes.contains(NULL_SCHEMA)) { + nonNullFieldTypes.add(0, NULL_SCHEMA); + } return Schema.createUnion(nonNullFieldTypes); } } diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/S3AvroConstants.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/S3AvroConstants.java deleted file mode 100644 index 72be89baa4d80..0000000000000 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/S3AvroConstants.java +++ /dev/null @@ -1,13 +0,0 @@ -/* - * Copyright (c) 2021 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.integrations.destination.s3.avro; - -public class S3AvroConstants { - - // Field name with special character - public static final String DOC_KEY_VALUE_DELIMITER = ":"; - public static final String DOC_KEY_ORIGINAL_NAME = "_airbyte_original_name"; - -} diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/S3AvroWriter.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/S3AvroWriter.java index e9e379d97e40a..7448a48b79084 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/S3AvroWriter.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/S3AvroWriter.java @@ -24,6 +24,7 @@ import org.apache.avro.generic.GenericDatumWriter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import tech.allegro.schema.json2avro.converter.JsonAvroConverter; public class S3AvroWriter extends BaseS3Writer implements S3Writer { @@ -39,7 +40,7 @@ public S3AvroWriter(final S3DestinationConfig config, final ConfiguredAirbyteStream configuredStream, final Timestamp uploadTimestamp, final Schema schema, - final JsonFieldNameUpdater nameUpdater) + final JsonAvroConverter converter) throws IOException { super(config, s3Client, configuredStream); @@ -49,7 +50,7 @@ public S3AvroWriter(final S3DestinationConfig config, LOGGER.info("Full S3 path for stream '{}': s3://{}/{}", stream.getName(), config.getBucketName(), objectKey); - this.avroRecordFactory = new AvroRecordFactory(schema, nameUpdater); + this.avroRecordFactory = new AvroRecordFactory(schema, converter); this.uploadManager = S3StreamTransferManagerHelper.getDefault( config.getBucketName(), objectKey, s3Client, config.getFormatConfig().getPartSize()); // We only need one output stream as we only have one input stream. This is reasonably performant. diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/parquet/S3ParquetWriter.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/parquet/S3ParquetWriter.java index 88f658758541f..65afc0805f14b 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/parquet/S3ParquetWriter.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/parquet/S3ParquetWriter.java @@ -8,7 +8,6 @@ import io.airbyte.integrations.destination.s3.S3DestinationConfig; import io.airbyte.integrations.destination.s3.S3Format; import io.airbyte.integrations.destination.s3.avro.AvroRecordFactory; -import io.airbyte.integrations.destination.s3.avro.JsonFieldNameUpdater; import io.airbyte.integrations.destination.s3.writer.BaseS3Writer; import io.airbyte.integrations.destination.s3.writer.S3Writer; import io.airbyte.protocol.models.AirbyteRecordMessage; @@ -29,6 +28,7 @@ import org.apache.parquet.hadoop.util.HadoopOutputFile; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import tech.allegro.schema.json2avro.converter.JsonAvroConverter; public class S3ParquetWriter extends BaseS3Writer implements S3Writer { @@ -36,7 +36,7 @@ public class S3ParquetWriter extends BaseS3Writer implements S3Writer { private final ParquetWriter parquetWriter; private final AvroRecordFactory avroRecordFactory; - private final Schema parquetSchema; + private final Schema schema; private final String outputFilename; public S3ParquetWriter(final S3DestinationConfig config, @@ -44,7 +44,7 @@ public S3ParquetWriter(final S3DestinationConfig config, final ConfiguredAirbyteStream configuredStream, final Timestamp uploadTimestamp, final Schema schema, - final JsonFieldNameUpdater nameUpdater) + final JsonAvroConverter converter) throws URISyntaxException, IOException { super(config, s3Client, configuredStream); @@ -69,8 +69,8 @@ public S3ParquetWriter(final S3DestinationConfig config, .withDictionaryPageSize(formatConfig.getDictionaryPageSize()) .withDictionaryEncoding(formatConfig.isDictionaryEncoding()) .build(); - this.avroRecordFactory = new AvroRecordFactory(schema, nameUpdater); - this.parquetSchema = schema; + this.avroRecordFactory = new AvroRecordFactory(schema, converter); + this.schema = schema; } public static Configuration getHadoopConfig(final S3DestinationConfig config) { @@ -88,8 +88,8 @@ public static Configuration getHadoopConfig(final S3DestinationConfig config) { return hadoopConfig; } - public Schema getParquetSchema() { - return parquetSchema; + public Schema getSchema() { + return schema; } /** diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/writer/ProductionWriterFactory.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/writer/ProductionWriterFactory.java index f91c6d244cf5b..39041093271c6 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/writer/ProductionWriterFactory.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/writer/ProductionWriterFactory.java @@ -7,7 +7,7 @@ import com.amazonaws.services.s3.AmazonS3; import io.airbyte.integrations.destination.s3.S3DestinationConfig; import io.airbyte.integrations.destination.s3.S3Format; -import io.airbyte.integrations.destination.s3.avro.JsonFieldNameUpdater; +import io.airbyte.integrations.destination.s3.avro.AvroConstants; import io.airbyte.integrations.destination.s3.avro.JsonToAvroSchemaConverter; import io.airbyte.integrations.destination.s3.avro.S3AvroWriter; import io.airbyte.integrations.destination.s3.csv.S3CsvWriter; @@ -34,19 +34,17 @@ public S3Writer create(final S3DestinationConfig config, if (format == S3Format.AVRO || format == S3Format.PARQUET) { final AirbyteStream stream = configuredStream.getStream(); + LOGGER.info("Json schema for stream {}: {}", stream.getName(), stream.getJsonSchema()); + final JsonToAvroSchemaConverter schemaConverter = new JsonToAvroSchemaConverter(); final Schema avroSchema = schemaConverter.getAvroSchema(stream.getJsonSchema(), stream.getName(), stream.getNamespace(), true); - final JsonFieldNameUpdater nameUpdater = new JsonFieldNameUpdater(schemaConverter.getStandardizedNames()); LOGGER.info("Avro schema for stream {}: {}", stream.getName(), avroSchema.toString(false)); - if (nameUpdater.hasNameUpdate()) { - LOGGER.info("The following field names will be standardized: {}", nameUpdater); - } if (format == S3Format.AVRO) { - return new S3AvroWriter(config, s3Client, configuredStream, uploadTimestamp, avroSchema, nameUpdater); + return new S3AvroWriter(config, s3Client, configuredStream, uploadTimestamp, avroSchema, AvroConstants.JSON_CONVERTER); } else { - return new S3ParquetWriter(config, s3Client, configuredStream, uploadTimestamp, avroSchema, nameUpdater); + return new S3ParquetWriter(config, s3Client, configuredStream, uploadTimestamp, avroSchema, AvroConstants.JSON_CONVERTER); } } diff --git a/airbyte-integrations/connectors/destination-s3/src/test-integration/java/io/airbyte/integrations/destination/s3/S3AvroDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-s3/src/test-integration/java/io/airbyte/integrations/destination/s3/S3AvroDestinationAcceptanceTest.java index 9352a73750fdc..28c21602a7cee 100644 --- a/airbyte-integrations/connectors/destination-s3/src/test-integration/java/io/airbyte/integrations/destination/s3/S3AvroDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-s3/src/test-integration/java/io/airbyte/integrations/destination/s3/S3AvroDestinationAcceptanceTest.java @@ -9,6 +9,7 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectReader; import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.destination.s3.avro.AvroConstants; import io.airbyte.integrations.destination.s3.avro.JsonFieldNameUpdater; import io.airbyte.integrations.destination.s3.util.AvroRecordHelper; import java.util.LinkedList; @@ -18,12 +19,9 @@ import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericData.Record; import org.apache.avro.generic.GenericDatumReader; -import tech.allegro.schema.json2avro.converter.JsonAvroConverter; public class S3AvroDestinationAcceptanceTest extends S3DestinationAcceptanceTest { - private final JsonAvroConverter converter = new JsonAvroConverter(); - protected S3AvroDestinationAcceptanceTest() { super(S3Format.AVRO); } @@ -55,7 +53,7 @@ protected List retrieveRecords(final TestDestinationEnv testEnv, final ObjectReader jsonReader = MAPPER.reader(); while (dataFileReader.hasNext()) { final GenericData.Record record = dataFileReader.next(); - final byte[] jsonBytes = converter.convertToJson(record); + final byte[] jsonBytes = AvroConstants.JSON_CONVERTER.convertToJson(record); JsonNode jsonRecord = jsonReader.readTree(jsonBytes); jsonRecord = nameUpdater.getJsonWithOriginalFieldNames(jsonRecord); jsonRecords.add(AvroRecordHelper.pruneAirbyteJson(jsonRecord)); diff --git a/airbyte-integrations/connectors/destination-s3/src/test-integration/java/io/airbyte/integrations/destination/s3/S3DestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-s3/src/test-integration/java/io/airbyte/integrations/destination/s3/S3DestinationAcceptanceTest.java index 8d555fd27738e..e382c3b634a60 100644 --- a/airbyte-integrations/connectors/destination-s3/src/test-integration/java/io/airbyte/integrations/destination/s3/S3DestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-s3/src/test-integration/java/io/airbyte/integrations/destination/s3/S3DestinationAcceptanceTest.java @@ -4,8 +4,6 @@ package io.airbyte.integrations.destination.s3; -import static io.airbyte.integrations.destination.s3.S3DestinationConstants.NAME_TRANSFORMER; - import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.model.DeleteObjectsRequest; import com.amazonaws.services.s3.model.DeleteObjectsRequest.KeyVersion; @@ -17,6 +15,7 @@ import io.airbyte.commons.io.IOs; import io.airbyte.commons.jackson.MoreMappers; import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.destination.s3.avro.AvroConstants; import io.airbyte.integrations.destination.s3.util.S3OutputPathHelper; import io.airbyte.integrations.standardtest.destination.DestinationAcceptanceTest; import java.nio.file.Path; @@ -88,7 +87,7 @@ protected List getAllSyncedObjects(final String streamName, fin .listObjects(config.getBucketName(), outputPrefix) .getObjectSummaries() .stream() - .filter(o -> o.getKey().contains(NAME_TRANSFORMER.convertStreamName(streamName) + "/")) + .filter(o -> o.getKey().contains(AvroConstants.NAME_TRANSFORMER.convertStreamName(streamName) + "/")) .sorted(Comparator.comparingLong(o -> o.getLastModified().getTime())) .collect(Collectors.toList()); LOGGER.info( diff --git a/airbyte-integrations/connectors/destination-s3/src/test-integration/java/io/airbyte/integrations/destination/s3/S3ParquetDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-s3/src/test-integration/java/io/airbyte/integrations/destination/s3/S3ParquetDestinationAcceptanceTest.java index f20b01623ecdf..bec71d1883ab3 100644 --- a/airbyte-integrations/connectors/destination-s3/src/test-integration/java/io/airbyte/integrations/destination/s3/S3ParquetDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-s3/src/test-integration/java/io/airbyte/integrations/destination/s3/S3ParquetDestinationAcceptanceTest.java @@ -9,6 +9,7 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectReader; import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.destination.s3.avro.AvroConstants; import io.airbyte.integrations.destination.s3.avro.JsonFieldNameUpdater; import io.airbyte.integrations.destination.s3.parquet.S3ParquetWriter; import io.airbyte.integrations.destination.s3.util.AvroRecordHelper; @@ -21,12 +22,9 @@ import org.apache.hadoop.conf.Configuration; import org.apache.parquet.avro.AvroReadSupport; import org.apache.parquet.hadoop.ParquetReader; -import tech.allegro.schema.json2avro.converter.JsonAvroConverter; public class S3ParquetDestinationAcceptanceTest extends S3DestinationAcceptanceTest { - private final JsonAvroConverter converter = new JsonAvroConverter(); - protected S3ParquetDestinationAcceptanceTest() { super(S3Format.PARQUET); } @@ -62,7 +60,7 @@ protected List retrieveRecords(final TestDestinationEnv testEnv, final ObjectReader jsonReader = MAPPER.reader(); GenericData.Record record; while ((record = parquetReader.read()) != null) { - final byte[] jsonBytes = converter.convertToJson(record); + final byte[] jsonBytes = AvroConstants.JSON_CONVERTER.convertToJson(record); JsonNode jsonRecord = jsonReader.readTree(jsonBytes); jsonRecord = nameUpdater.getJsonWithOriginalFieldNames(jsonRecord); jsonRecords.add(AvroRecordHelper.pruneAirbyteJson(jsonRecord)); diff --git a/airbyte-integrations/connectors/destination-s3/src/test/java/io/airbyte/integrations/destination/s3/avro/JsonFieldNameUpdaterTest.java b/airbyte-integrations/connectors/destination-s3/src/test/java/io/airbyte/integrations/destination/s3/avro/JsonFieldNameUpdaterTest.java index d92a90e768de3..772a6110ef20e 100644 --- a/airbyte-integrations/connectors/destination-s3/src/test/java/io/airbyte/integrations/destination/s3/avro/JsonFieldNameUpdaterTest.java +++ b/airbyte-integrations/connectors/destination-s3/src/test/java/io/airbyte/integrations/destination/s3/avro/JsonFieldNameUpdaterTest.java @@ -29,7 +29,6 @@ public void testFieldNameUpdate() throws IOException { final JsonNode original = testCase.get("original"); final JsonNode updated = testCase.get("updated"); - assertEquals(updated, nameUpdater.getJsonWithStandardizedFieldNames(original)); assertEquals(original, nameUpdater.getJsonWithOriginalFieldNames(updated)); } } diff --git a/airbyte-integrations/connectors/destination-s3/src/test/java/io/airbyte/integrations/destination/s3/avro/JsonToAvroSchemaConverterTest.java b/airbyte-integrations/connectors/destination-s3/src/test/java/io/airbyte/integrations/destination/s3/avro/JsonToAvroConverterTest.java similarity index 60% rename from airbyte-integrations/connectors/destination-s3/src/test/java/io/airbyte/integrations/destination/s3/avro/JsonToAvroSchemaConverterTest.java rename to airbyte-integrations/connectors/destination-s3/src/test/java/io/airbyte/integrations/destination/s3/avro/JsonToAvroConverterTest.java index 8f5420d11929a..2a90a03c25827 100644 --- a/airbyte-integrations/connectors/destination-s3/src/test/java/io/airbyte/integrations/destination/s3/avro/JsonToAvroSchemaConverterTest.java +++ b/airbyte-integrations/connectors/destination-s3/src/test/java/io/airbyte/integrations/destination/s3/avro/JsonToAvroConverterTest.java @@ -8,12 +8,16 @@ import static org.junit.jupiter.api.Assertions.assertTrue; import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectWriter; import com.google.common.collect.Lists; +import io.airbyte.commons.jackson.MoreMappers; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.resources.MoreResources; import io.airbyte.commons.util.MoreIterators; import java.util.Collections; import java.util.stream.Stream; +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericData; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtensionContext; import org.junit.jupiter.params.ParameterizedTest; @@ -21,7 +25,10 @@ import org.junit.jupiter.params.provider.ArgumentsProvider; import org.junit.jupiter.params.provider.ArgumentsSource; -class JsonToAvroSchemaConverterTest { +class JsonToAvroConverterTest { + + private static final ObjectWriter WRITER = MoreMappers.initMapper().writer(); + private static final JsonToAvroSchemaConverter SCHEMA_CONVERTER = new JsonToAvroSchemaConverter(); @Test public void testGetSingleTypes() { @@ -55,7 +62,7 @@ public static class GetFieldTypeTestCaseProvider implements ArgumentsProvider { @Override public Stream provideArguments(final ExtensionContext context) throws Exception { - final JsonNode testCases = Jsons.deserialize(MoreResources.readResource("parquet/json_schema_converter/get_field_type.json")); + final JsonNode testCases = Jsons.deserialize(MoreResources.readResource("parquet/json_schema_converter/type_conversion_test_cases.json")); return MoreIterators.toList(testCases.elements()).stream().map(testCase -> Arguments.of( testCase.get("fieldName").asText(), testCase.get("jsonFieldSchema"), @@ -66,11 +73,10 @@ public Stream provideArguments(final ExtensionContext conte @ParameterizedTest @ArgumentsSource(GetFieldTypeTestCaseProvider.class) - public void testGetFieldType(final String fieldName, final JsonNode jsonFieldSchema, final JsonNode avroFieldType) { - final JsonToAvroSchemaConverter converter = new JsonToAvroSchemaConverter(); + public void testFieldTypeConversion(final String fieldName, final JsonNode jsonFieldSchema, final JsonNode avroFieldType) { assertEquals( avroFieldType, - Jsons.deserialize(converter.getNullableFieldTypes(fieldName, jsonFieldSchema).toString()), + Jsons.deserialize(SCHEMA_CONVERTER.getNullableFieldTypes(fieldName, jsonFieldSchema).toString()), String.format("Test for %s failed", fieldName)); } @@ -78,30 +84,46 @@ public static class GetAvroSchemaTestCaseProvider implements ArgumentsProvider { @Override public Stream provideArguments(final ExtensionContext context) throws Exception { - final JsonNode testCases = Jsons.deserialize(MoreResources.readResource("parquet/json_schema_converter/get_avro_schema.json")); + final JsonNode testCases = Jsons.deserialize(MoreResources.readResource("parquet/json_schema_converter/json_conversion_test_cases.json")); return MoreIterators.toList(testCases.elements()).stream().map(testCase -> Arguments.of( testCase.get("schemaName").asText(), testCase.get("namespace").asText(), testCase.get("appendAirbyteFields").asBoolean(), testCase.get("jsonSchema"), - testCase.get("avroSchema"))); + testCase.get("jsonObject"), + testCase.get("avroSchema"), + testCase.get("avroObject"))); } } + /** + * This test verifies both the schema and object conversion. + */ @ParameterizedTest @ArgumentsSource(GetAvroSchemaTestCaseProvider.class) - public void testGetAvroSchema( - final String schemaName, - final String namespace, - final boolean appendAirbyteFields, - final JsonNode jsonSchema, - final JsonNode avroSchema) { - final JsonToAvroSchemaConverter converter = new JsonToAvroSchemaConverter(); + public void testJsonAvroConversion(final String schemaName, + final String namespace, + final boolean appendAirbyteFields, + final JsonNode jsonSchema, + final JsonNode jsonObject, + final JsonNode avroSchema, + final JsonNode avroObject) + throws Exception { + final Schema actualAvroSchema = SCHEMA_CONVERTER.getAvroSchema(jsonSchema, schemaName, namespace, appendAirbyteFields); assertEquals( avroSchema, - Jsons.deserialize(converter.getAvroSchema(jsonSchema, schemaName, namespace, appendAirbyteFields).toString()), - String.format("Test for %s failed", schemaName)); + Jsons.deserialize(actualAvroSchema.toString()), + String.format("Schema conversion for %s failed", schemaName)); + + final Schema.Parser schemaParser = new Schema.Parser(); + final GenericData.Record actualAvroObject = AvroConstants.JSON_CONVERTER.convertToGenericDataRecord( + WRITER.writeValueAsBytes(jsonObject), + schemaParser.parse(Jsons.serialize(avroSchema))); + assertEquals( + avroObject, + Jsons.deserialize(actualAvroObject.toString()), + String.format("Object conversion for %s failed", schemaName)); } } diff --git a/airbyte-integrations/connectors/destination-s3/src/test/resources/parquet/json_schema_converter/get_avro_schema.json b/airbyte-integrations/connectors/destination-s3/src/test/resources/parquet/json_schema_converter/json_conversion_test_cases.json similarity index 53% rename from airbyte-integrations/connectors/destination-s3/src/test/resources/parquet/json_schema_converter/get_avro_schema.json rename to airbyte-integrations/connectors/destination-s3/src/test/resources/parquet/json_schema_converter/json_conversion_test_cases.json index 77a415baa2777..c611474f080f5 100644 --- a/airbyte-integrations/connectors/destination-s3/src/test/resources/parquet/json_schema_converter/get_avro_schema.json +++ b/airbyte-integrations/connectors/destination-s3/src/test/resources/parquet/json_schema_converter/json_conversion_test_cases.json @@ -11,6 +11,9 @@ } } }, + "jsonObject": { + "node_id": "abc123" + }, "avroSchema": { "type": "record", "name": "simple_schema", @@ -20,8 +23,17 @@ "name": "node_id", "type": ["null", "string"], "default": null + }, + { + "name": "_airbyte_additional_properties", + "type": ["null", { "type": "map", "values": "string" }], + "default": null } ] + }, + "avroObject": { + "node_id": "abc123", + "_airbyte_additional_properties": null } }, { @@ -47,6 +59,13 @@ } } }, + "jsonObject": { + "node_id": "abc123", + "user": { + "first_name": "charles", + "last_name": "darwin" + } + }, "avroSchema": { "type": "record", "name": "nested_record", @@ -75,13 +94,32 @@ "name": "last_name", "type": ["null", "string"], "default": null + }, + { + "name": "_airbyte_additional_properties", + "type": ["null", { "type": "map", "values": "string" }], + "default": null } ] } ], "default": null + }, + { + "name": "_airbyte_additional_properties", + "type": ["null", { "type": "map", "values": "string" }], + "default": null } ] + }, + "avroObject": { + "node_id": "abc123", + "user": { + "first_name": "charles", + "last_name": "darwin", + "_airbyte_additional_properties": null + }, + "_airbyte_additional_properties": null } }, { @@ -96,6 +134,11 @@ } } }, + "jsonObject": { + "_airbyte_ab_id": "752fcd83-7e46-41da-b7ff-f05cb070c893", + "_airbyte_emitted_at": 1634982000, + "node_id": "abc123" + }, "avroSchema": { "type": "record", "name": "record_with_airbyte_fields", @@ -119,8 +162,19 @@ "name": "node_id", "type": ["null", "string"], "default": null + }, + { + "name": "_airbyte_additional_properties", + "type": ["null", { "type": "map", "values": "string" }], + "default": null } ] + }, + "avroObject": { + "_airbyte_ab_id": "752fcd83-7e46-41da-b7ff-f05cb070c893", + "_airbyte_emitted_at": 1634982000, + "node_id": "abc123", + "_airbyte_additional_properties": null } }, { @@ -135,6 +189,9 @@ } } }, + "jsonObject": { + "node:id": "abc123" + }, "avroSchema": { "type": "record", "name": "name_with_special_characters", @@ -146,8 +203,17 @@ "doc": "_airbyte_original_name:node:id", "type": ["null", "string"], "default": null + }, + { + "name": "_airbyte_additional_properties", + "type": ["null", { "type": "map", "values": "string" }], + "default": null } ] + }, + "avroObject": { + "node_id": "abc123", + "_airbyte_additional_properties": null } }, { @@ -162,6 +228,9 @@ } } }, + "jsonObject": { + "identifier": 65536.0 + }, "avroSchema": { "type": "record", "name": "record_with_union_type", @@ -171,8 +240,17 @@ "name": "identifier", "type": ["null", "double", "string"], "default": null + }, + { + "name": "_airbyte_additional_properties", + "type": ["null", { "type": "map", "values": "string" }], + "default": null } ] + }, + "avroObject": { + "identifier": 65536.0, + "_airbyte_additional_properties": null } }, { @@ -190,6 +268,9 @@ } } }, + "jsonObject": { + "identifier": ["151", "152"] + }, "avroSchema": { "type": "record", "name": "array_with_same_type", @@ -205,8 +286,17 @@ } ], "default": null + }, + { + "name": "_airbyte_additional_properties", + "type": ["null", { "type": "map", "values": "string" }], + "default": null } ] + }, + "avroObject": { + "identifier": ["151", "152"], + "_airbyte_additional_properties": null } }, { @@ -235,6 +325,9 @@ } } }, + "jsonObject": { + "identifiers": ["151", 152, true, false] + }, "avroSchema": { "type": "record", "name": "array_with_union_type", @@ -250,8 +343,17 @@ } ], "default": null + }, + { + "name": "_airbyte_additional_properties", + "type": ["null", { "type": "map", "values": "string" }], + "default": null } ] + }, + "avroObject": { + "identifiers": ["151", 152, true, false], + "_airbyte_additional_properties": null } }, { @@ -276,6 +378,9 @@ } } }, + "jsonObject": { + "created_at": 1634982000 + }, "avroSchema": { "type": "record", "name": "field_with_combined_restriction", @@ -285,8 +390,17 @@ "name": "created_at", "type": ["null", "string", "int"], "default": null + }, + { + "name": "_airbyte_additional_properties", + "type": ["null", { "type": "map", "values": "string" }], + "default": null } ] + }, + "avroObject": { + "created_at": 1634982000, + "_airbyte_additional_properties": null } }, { @@ -316,6 +430,11 @@ } } }, + "jsonObject": { + "user": { + "created_at": "1634982000" + } + }, "avroSchema": { "type": "record", "name": "record_with_combined_restriction_field", @@ -334,13 +453,30 @@ "name": "created_at", "type": ["null", "string", "int"], "default": null + }, + { + "name": "_airbyte_additional_properties", + "type": ["null", { "type": "map", "values": "string" }], + "default": null } ] } ], "default": null + }, + { + "name": "_airbyte_additional_properties", + "type": ["null", { "type": "map", "values": "string" }], + "default": null } ] + }, + "avroObject": { + "user": { + "created_at": "1634982000", + "_airbyte_additional_properties": null + }, + "_airbyte_additional_properties": null } }, { @@ -362,6 +498,9 @@ } } }, + "jsonObject": { + "identifiers": [151, 152, "153", true, false] + }, "avroSchema": { "type": "record", "name": "array_with_combined_restriction_field", @@ -377,8 +516,17 @@ } ], "default": null + }, + { + "name": "_airbyte_additional_properties", + "type": ["null", { "type": "map", "values": "string" }], + "default": null } ] + }, + "avroObject": { + "identifiers": [151, 152, "153", true, false], + "_airbyte_additional_properties": null } }, { @@ -388,23 +536,165 @@ "jsonSchema": { "type": "object", "properties": { - "5filed_name": { + "5field_name": { "type": ["null", "string"] } } }, + "jsonObject": { + "_5field_name": "theory of relativity" + }, "avroSchema": { "type": "record", "name": "field_with_bad_first_char", "namespace": "namespace11", "fields": [ { - "name": "_5filed_name", + "name": "_5field_name", "type": ["null", "string"], - "doc": "_airbyte_original_name:5filed_name", + "doc": "_airbyte_original_name:5field_name", + "default": null + }, + { + "name": "_airbyte_additional_properties", + "type": ["null", { "type": "map", "values": "string" }], "default": null } ] + }, + "avroObject": { + "_5field_name": "theory of relativity", + "_airbyte_additional_properties": null + } + }, + { + "schemaName": "record_with_airbyte_additional_properties", + "namespace": "namespace12", + "appendAirbyteFields": false, + "jsonSchema": { + "type": "object", + "properties": { + "node_id": { + "type": ["null", "string"] + }, + "_airbyte_additional_properties": { + "type": "object" + } + } + }, + "jsonObject": { + "node_id": "abc123", + "_airbyte_additional_properties": { + "username": "343-guilty-spark", + "password": 1439, + "active": true + } + }, + "avroSchema": { + "type": "record", + "name": "record_with_airbyte_additional_properties", + "namespace": "namespace12", + "fields": [ + { + "name": "node_id", + "type": ["null", "string"], + "default": null + }, + { + "name": "_airbyte_additional_properties", + "type": ["null", { "type": "map", "values": "string" }], + "default": null + } + ] + }, + "avroObject": { + "node_id": "abc123", + "_airbyte_additional_properties": { + "username": "343-guilty-spark", + "password": "1439", + "active": "true" + } + } + }, + { + "schemaName": "record_with_ab_additional_properties", + "namespace": "namespace13", + "appendAirbyteFields": false, + "jsonSchema": { + "type": "object", + "properties": { + "node_id": { + "type": ["null", "string"] + }, + "_ab_additional_properties": { + "type": "object" + } + } + }, + "jsonObject": { + "node_id": "abc123", + "_ab_additional_properties": { + "username": "343-guilty-spark", + "password": 1439, + "active": true + } + }, + "avroSchema": { + "type": "record", + "name": "record_with_ab_additional_properties", + "namespace": "namespace13", + "fields": [ + { + "name": "node_id", + "type": ["null", "string"], + "default": null + }, + { + "name": "_airbyte_additional_properties", + "type": ["null", { "type": "map", "values": "string" }], + "default": null + } + ] + }, + "avroObject": { + "node_id": "abc123", + "_airbyte_additional_properties": { + "username": "343-guilty-spark", + "password": "1439", + "active": "true" + } + } + }, + { + "schemaName": "record_without_properties", + "namespace": "namespace14", + "appendAirbyteFields": false, + "jsonSchema": { + "type": "object" + }, + "jsonObject": { + "username": "343-guilty-spark", + "password": 1439, + "active": true + }, + "avroSchema": { + "type": "record", + "name": "record_without_properties", + "namespace": "namespace14", + "fields": [ + { + "name": "_airbyte_additional_properties", + "type": ["null", { "type": "map", "values": "string" }], + "default": null + } + ] + }, + "avroObject": { + "_airbyte_additional_properties": { + "username": "343-guilty-spark", + "password": "1439", + "active": "true" + } } } ] diff --git a/airbyte-integrations/connectors/destination-s3/src/test/resources/parquet/json_schema_converter/get_field_type.json b/airbyte-integrations/connectors/destination-s3/src/test/resources/parquet/json_schema_converter/type_conversion_test_cases.json similarity index 71% rename from airbyte-integrations/connectors/destination-s3/src/test/resources/parquet/json_schema_converter/get_field_type.json rename to airbyte-integrations/connectors/destination-s3/src/test/resources/parquet/json_schema_converter/type_conversion_test_cases.json index 6dd9a503e984b..3171888d27340 100644 --- a/airbyte-integrations/connectors/destination-s3/src/test/resources/parquet/json_schema_converter/get_field_type.json +++ b/airbyte-integrations/connectors/destination-s3/src/test/resources/parquet/json_schema_converter/type_conversion_test_cases.json @@ -99,11 +99,50 @@ "name": "node_id", "type": ["null", "string"], "default": null + }, + { + "name": "_airbyte_additional_properties", + "type": ["null", { "type": "map", "values": "string" }], + "default": null + } + ] + } + ] + }, + { + "fieldName": "object_field_without_properties", + "jsonFieldSchema": { + "type": "object" + }, + "avroFieldType": [ + "null", + { + "type": "record", + "name": "object_field_without_properties", + "fields": [ + { + "name": "_airbyte_additional_properties", + "type": ["null", { "type": "map", "values": "string" }], + "default": null } ] } ] }, + { + "fieldName": "_airbyte_additional_properties", + "jsonFieldSchema": { + "type": "object" + }, + "avroFieldType": ["null", { "type": "map", "values": "string" }] + }, + { + "fieldName": "_ab_additional_properties", + "jsonFieldSchema": { + "type": "object" + }, + "avroFieldType": ["null", { "type": "map", "values": "string" }] + }, { "fieldName": "any_of_field", "jsonFieldSchema": { diff --git a/airbyte-integrations/connectors/source-confluence/.dockerignore b/airbyte-integrations/connectors/source-confluence/.dockerignore new file mode 100644 index 0000000000000..b98d666d0fe7a --- /dev/null +++ b/airbyte-integrations/connectors/source-confluence/.dockerignore @@ -0,0 +1,7 @@ +* +!Dockerfile +!Dockerfile.test +!main.py +!source_confluence +!setup.py +!secrets diff --git a/airbyte-integrations/connectors/source-confluence/Dockerfile b/airbyte-integrations/connectors/source-confluence/Dockerfile new file mode 100644 index 0000000000000..c5a090512f941 --- /dev/null +++ b/airbyte-integrations/connectors/source-confluence/Dockerfile @@ -0,0 +1,38 @@ +FROM python:3.7.11-alpine3.14 as base + +# build and load all requirements +FROM base as builder +WORKDIR /airbyte/integration_code + +# upgrade pip to the latest version +RUN apk --no-cache upgrade \ + && pip install --upgrade pip \ + && apk --no-cache add tzdata build-base + + +COPY setup.py ./ +# install necessary packages to a temporary folder +RUN pip install --prefix=/install . + +# build a clean environment +FROM base +WORKDIR /airbyte/integration_code + +# copy all loaded and built libraries to a pure basic image +COPY --from=builder /install /usr/local +# add default timezone settings +COPY --from=builder /usr/share/zoneinfo/Etc/UTC /etc/localtime +RUN echo "Etc/UTC" > /etc/timezone + +# bash is installed for more convenient debugging. +RUN apk --no-cache add bash + +# copy payload code only +COPY main.py ./ +COPY source_confluence ./source_confluence + +ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" +ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] + +LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.name=airbyte/source-confluence diff --git a/airbyte-integrations/connectors/source-confluence/README.md b/airbyte-integrations/connectors/source-confluence/README.md new file mode 100644 index 0000000000000..2710ecd57c58d --- /dev/null +++ b/airbyte-integrations/connectors/source-confluence/README.md @@ -0,0 +1,132 @@ +# Confluence Source + +This is the repository for the Confluence source connector, written in Python. +For information about how to use this connector within Airbyte, see [the documentation](https://docs.airbyte.io/integrations/sources/confluence). + +## Local development + +### Prerequisites +**To iterate on this connector, make sure to complete this prerequisites section.** + +#### Minimum Python version required `= 3.7.0` + +#### Build & Activate Virtual Environment and install dependencies +From this connector directory, create a virtual environment: +``` +python -m venv .venv +``` + +This will generate a virtualenv for this module in `.venv/`. Make sure this venv is active in your +development environment of choice. To activate it from the terminal, run: +``` +source .venv/bin/activate +pip install -r requirements.txt +pip install '.[tests]' +``` +If you are in an IDE, follow your IDE's instructions to activate the virtualenv. + +Note that while we are installing dependencies from `requirements.txt`, you should only edit `setup.py` for your dependencies. `requirements.txt` is +used for editable installs (`pip install -e`) to pull in Python dependencies from the monorepo and will call `setup.py`. +If this is mumbo jumbo to you, don't worry about it, just put your deps in `setup.py` but install using `pip install -r requirements.txt` and everything +should work as you expect. + +#### Building via Gradle +You can also build the connector in Gradle. This is typically used in CI and not needed for your development workflow. + +To build using Gradle, from the Airbyte repository root, run: +``` +./gradlew :airbyte-integrations:connectors:source-confluence:build +``` + +#### Create credentials +**If you are a community contributor**, follow the instructions in the [documentation](https://docs.airbyte.io/integrations/sources/confluence) +to generate the necessary credentials. Then create a file `secrets/config.json` conforming to the `source_confluence/spec.json` file. +Note that any directory named `secrets` is gitignored across the entire Airbyte repo, so there is no danger of accidentally checking in sensitive information. +See `integration_tests/sample_config.json` for a sample config file. + +**If you are an Airbyte core member**, copy the credentials in Lastpass under the secret name `source confluence test creds` +and place them into `secrets/config.json`. + +### Locally running the connector +``` +python main.py spec +python main.py check --config secrets/config.json +python main.py discover --config secrets/config.json +python main.py read --config secrets/config.json --catalog integration_tests/configured_catalog.json +``` + +### Locally running the connector docker image + +#### Build +First, make sure you build the latest Docker image: +``` +docker build . -t airbyte/source-confluence:dev +``` + +You can also build the connector image via Gradle: +``` +./gradlew :airbyte-integrations:connectors:source-confluence:airbyteDocker +``` +When building via Gradle, the docker image name and tag, respectively, are the values of the `io.airbyte.name` and `io.airbyte.version` `LABEL`s in +the Dockerfile. + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/source-confluence:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-confluence:dev check --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-confluence:dev discover --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/source-confluence:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json +``` +## Testing +Make sure to familiarize yourself with [pytest test discovery](https://docs.pytest.org/en/latest/goodpractices.html#test-discovery) to know how your test files and methods should be named. +First install test dependencies into your virtual environment: +``` +pip install .[tests] +``` +### Unit Tests +To run unit tests locally, from the connector directory run: +``` +python -m pytest unit_tests +``` + +### Integration Tests +There are two types of integration tests: Acceptance Tests (Airbyte's test suite for all source connectors) and custom integration tests (which are specific to this connector). +#### Custom Integration tests +Place custom tests inside `integration_tests/` folder, then, from the connector root, run +``` +python -m pytest integration_tests +``` +#### Acceptance Tests +Customize `acceptance-test-config.yml` file to configure tests. See [Source Acceptance Tests](https://docs.airbyte.io/connector-development/testing-connectors/source-acceptance-tests-reference) for more information. +If your connector requires to create or destroy resources for use during acceptance tests create fixtures for it and place them inside integration_tests/acceptance.py. +To run your integration tests with acceptance tests, from the connector root, run +``` +python -m pytest integration_tests -p integration_tests.acceptance +``` +To run your integration tests with docker + +### Using gradle to run tests +All commands should be run from airbyte project root. +To run unit tests: +``` +./gradlew :airbyte-integrations:connectors:source-confluence:unitTest +``` +To run acceptance and custom integration tests: +``` +./gradlew :airbyte-integrations:connectors:source-confluence:integrationTest +``` + +## Dependency Management +All of your dependencies should go in `setup.py`, NOT `requirements.txt`. The requirements file is only used to connect internal Airbyte dependencies in the monorepo for local development. +We split dependencies between two groups, dependencies that are: +* required for your connector to work need to go to `MAIN_REQUIREMENTS` list. +* required for the testing need to go to `TEST_REQUIREMENTS` list + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing unit and integration tests. +1. Bump the connector version in `Dockerfile` -- just increment the value of the `LABEL io.airbyte.version` appropriately (we use [SemVer](https://semver.org/)). +1. Create a Pull Request. +1. Pat yourself on the back for being an awesome contributor. +1. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. diff --git a/airbyte-integrations/connectors/source-confluence/acceptance-test-config.yml b/airbyte-integrations/connectors/source-confluence/acceptance-test-config.yml new file mode 100644 index 0000000000000..48739b8cf72ee --- /dev/null +++ b/airbyte-integrations/connectors/source-confluence/acceptance-test-config.yml @@ -0,0 +1,20 @@ +# See [Source Acceptance Tests](https://docs.airbyte.io/connector-development/testing-connectors/source-acceptance-tests-reference) +# for more information about how to configure these tests +connector_image: airbyte/source-confluence:dev +tests: + spec: + - spec_path: "source_confluence/spec.json" + connection: + - config_path: "secrets/config.json" + status: "succeed" + - config_path: "integration_tests/invalid_config.json" + status: "failed" + discovery: + - config_path: "secrets/config.json" + basic_read: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" + empty_streams: [] + full_refresh: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" diff --git a/airbyte-integrations/connectors/source-confluence/acceptance-test-docker.sh b/airbyte-integrations/connectors/source-confluence/acceptance-test-docker.sh new file mode 100644 index 0000000000000..e4d8b1cef8961 --- /dev/null +++ b/airbyte-integrations/connectors/source-confluence/acceptance-test-docker.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env sh + +# Build latest connector image +docker build . -t $(cat acceptance-test-config.yml | grep "connector_image" | head -n 1 | cut -d: -f2) + +# Pull latest acctest image +docker pull airbyte/source-acceptance-test:latest + +# Run +docker run --rm -it \ + -v /var/run/docker.sock:/var/run/docker.sock \ + -v /tmp:/tmp \ + -v $(pwd):/test_input \ + airbyte/source-acceptance-test \ + --acceptance-test-config /test_input + diff --git a/airbyte-integrations/connectors/source-confluence/build.gradle b/airbyte-integrations/connectors/source-confluence/build.gradle new file mode 100644 index 0000000000000..7638a29f031b6 --- /dev/null +++ b/airbyte-integrations/connectors/source-confluence/build.gradle @@ -0,0 +1,14 @@ +plugins { + id 'airbyte-python' + id 'airbyte-docker' + id 'airbyte-source-acceptance-test' +} + +airbytePython { + moduleDirectory 'source_confluence' +} + +dependencies { + implementation files(project(':airbyte-integrations:bases:source-acceptance-test').airbyteDocker.outputs) + implementation files(project(':airbyte-integrations:bases:base-python').airbyteDocker.outputs) +} diff --git a/airbyte-integrations/connectors/source-confluence/integration_tests/__init__.py b/airbyte-integrations/connectors/source-confluence/integration_tests/__init__.py new file mode 100644 index 0000000000000..46b7376756ec6 --- /dev/null +++ b/airbyte-integrations/connectors/source-confluence/integration_tests/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# diff --git a/airbyte-integrations/connectors/source-confluence/integration_tests/acceptance.py b/airbyte-integrations/connectors/source-confluence/integration_tests/acceptance.py new file mode 100644 index 0000000000000..58c194c5d1376 --- /dev/null +++ b/airbyte-integrations/connectors/source-confluence/integration_tests/acceptance.py @@ -0,0 +1,16 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +import pytest + +pytest_plugins = ("source_acceptance_test.plugin",) + + +@pytest.fixture(scope="session", autouse=True) +def connector_setup(): + """ This fixture is a placeholder for external resources that acceptance test might require.""" + # TODO: setup test dependencies if needed. otherwise remove the TODO comments + yield + # TODO: clean up test dependencies diff --git a/airbyte-integrations/connectors/source-confluence/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/source-confluence/integration_tests/configured_catalog.json new file mode 100644 index 0000000000000..3b9510160172f --- /dev/null +++ b/airbyte-integrations/connectors/source-confluence/integration_tests/configured_catalog.json @@ -0,0 +1,44 @@ +{ + "streams": [ + { + "stream": { + "name": "pages", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "blog_posts", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "space", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "group", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + } + ] +} diff --git a/airbyte-integrations/connectors/source-confluence/integration_tests/invalid_config.json b/airbyte-integrations/connectors/source-confluence/integration_tests/invalid_config.json new file mode 100644 index 0000000000000..7ca7ac5f19ac5 --- /dev/null +++ b/airbyte-integrations/connectors/source-confluence/integration_tests/invalid_config.json @@ -0,0 +1,5 @@ +{ + "api_token": "", + "domain_name": "example.atlassian.net", + "email": "test@example.com" +} diff --git a/airbyte-integrations/connectors/source-confluence/integration_tests/sample_config.json b/airbyte-integrations/connectors/source-confluence/integration_tests/sample_config.json new file mode 100644 index 0000000000000..bc9993815948c --- /dev/null +++ b/airbyte-integrations/connectors/source-confluence/integration_tests/sample_config.json @@ -0,0 +1,5 @@ +{ + "api_token": "abcd", + "domain_name": "example.atlassian.net", + "email": "test@example.com" +} diff --git a/airbyte-integrations/connectors/source-confluence/main.py b/airbyte-integrations/connectors/source-confluence/main.py new file mode 100644 index 0000000000000..11b575ce0c4a8 --- /dev/null +++ b/airbyte-integrations/connectors/source-confluence/main.py @@ -0,0 +1,13 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +import sys + +from airbyte_cdk.entrypoint import launch +from source_confluence import SourceConfluence + +if __name__ == "__main__": + source = SourceConfluence() + launch(source, sys.argv[1:]) diff --git a/airbyte-integrations/connectors/source-confluence/requirements.txt b/airbyte-integrations/connectors/source-confluence/requirements.txt new file mode 100644 index 0000000000000..0411042aa0911 --- /dev/null +++ b/airbyte-integrations/connectors/source-confluence/requirements.txt @@ -0,0 +1,2 @@ +-e ../../bases/source-acceptance-test +-e . diff --git a/airbyte-integrations/connectors/source-confluence/setup.py b/airbyte-integrations/connectors/source-confluence/setup.py new file mode 100644 index 0000000000000..c5e5646e909fd --- /dev/null +++ b/airbyte-integrations/connectors/source-confluence/setup.py @@ -0,0 +1,30 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +from setuptools import find_packages, setup + +MAIN_REQUIREMENTS = [ + "airbyte-cdk", +] + +TEST_REQUIREMENTS = [ + "pytest~=6.1", + "pytest-mock~=3.6.1", + "source-acceptance-test", + "responses~=0.13.3", +] + +setup( + name="source_confluence", + description="Source implementation for Confluence.", + author="Tuan Nguyen", + author_email="anhtuan.nguyen@me.com", + packages=find_packages(), + install_requires=MAIN_REQUIREMENTS, + package_data={"": ["*.json", "schemas/*.json", "schemas/shared/*.json"]}, + extras_require={ + "tests": TEST_REQUIREMENTS, + }, +) diff --git a/airbyte-integrations/connectors/source-confluence/source_confluence/__init__.py b/airbyte-integrations/connectors/source-confluence/source_confluence/__init__.py new file mode 100644 index 0000000000000..b73058e73def3 --- /dev/null +++ b/airbyte-integrations/connectors/source-confluence/source_confluence/__init__.py @@ -0,0 +1,8 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +from .source import SourceConfluence + +__all__ = ["SourceConfluence"] diff --git a/airbyte-integrations/connectors/source-confluence/source_confluence/schemas/audit.json b/airbyte-integrations/connectors/source-confluence/source_confluence/schemas/audit.json new file mode 100644 index 0000000000000..ee1d7cca3f43a --- /dev/null +++ b/airbyte-integrations/connectors/source-confluence/source_confluence/schemas/audit.json @@ -0,0 +1,17 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "author": { "type": ["null", "object"] }, + "remoteAddress": { "type": ["null", "string"] }, + "creationDate": { "type": ["null", "string"] }, + "summary": { "type": ["null", "string"] }, + "description": { "type": ["null", "string"] }, + "category": { "type": ["null", "string"] }, + "sysAdmin": { "type": ["null", "boolean"] }, + "superAdmin": { "type": ["null", "boolean"] }, + "affectedObject": { "type": ["null", "object"] }, + "changedValues": { "type": ["null", "array"] }, + "associatedObjects": { "type": ["null", "array"] } + } +} diff --git a/airbyte-integrations/connectors/source-confluence/source_confluence/schemas/blog_posts.json b/airbyte-integrations/connectors/source-confluence/source_confluence/schemas/blog_posts.json new file mode 100644 index 0000000000000..023d04af6716e --- /dev/null +++ b/airbyte-integrations/connectors/source-confluence/source_confluence/schemas/blog_posts.json @@ -0,0 +1,199 @@ +{ + "definitions": { + "user": { + "type": "object", + "properties": { + "type": { + "type": ["string", "null"] + }, + "accountId": { + "type": ["string", "null"] + }, + "email": { + "type": ["string", "null"] + }, + "publicName": { + "type": ["string", "null"] + } + } + }, + "content": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "title": { + "type": "string" + }, + "type": { + "type": "string" + }, + "status": { + "type": "string" + } + } + }, + "contentRestriction": { + "type": "object", + "properties": { + "operations": { + "type": ["string", "null"] + }, + "restrictions": { + "user": { + "type": "object", + "properties": { + "results": { + "type": "array", + "items": { + "$ref": "#/definitions/user" + } + } + } + } + } + } + }, + "usersUserKeys": { + "type": "object", + "properties": { + "users": { + "type": "array", + "items": { + "$ref": "#/definitions/user" + } + }, + "userKeys": { + "type": "array", + "items": { + "type": "string" + } + } + } + }, + "version": { + "type": "object", + "properties": { + "by": { + "$ref": "#/definitions/user" + }, + "when": { + "type": ["string", "null"], + "format": "date-time" + }, + "friendlyWhen": { + "type": ["string", "null"] + }, + "message": { + "type": ["string", "null"] + }, + "number": { + "type": ["integer", "null"] + }, + "minorEdit": { + "type": ["boolean", "null"] + }, + "collaborators": { + "$ref": "#/definitions/usersUserKeys" + } + } + } + }, + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "title": { + "type": "string" + }, + "type": { + "type": "string" + }, + "status": { + "type": "string" + }, + "history": { + "type": "object", + "properties": { + "latest": { + "type": "boolean" + }, + "createdBy": { + "$ref": "#/definitions/user" + }, + "createdDate": { + "type": "string", + "format": "date-time" + }, + "contributors": { + "type": "object", + "properties": { + "publishers": { + "$ref": "#/definitions/usersUserKeys" + } + } + }, + "previousVersion": { + "$ref": "#/definitions/version" + } + } + }, + "version": { + "$ref": "#/definitions/version" + }, + "descendants": { + "type": "object", + "properties": { + "comment": { + "type": "object", + "properties": { + "results": { + "type": "array", + "items": { + "$ref": "#/definitions/content" + } + } + } + } + } + }, + "restrictions": { + "type": "object", + "properties": { + "read": { + "$ref": "#/definitions/contentRestriction" + } + } + }, + "_expandable": { + "type": "object", + "properties": { + "container": { + "type": "string" + }, + "space": { + "type": "string" + } + } + }, + "_links": { + "type": "object", + "properties": { + "self": { + "type": "string" + }, + "tinyui": { + "type": "string" + }, + "editui": { + "type": "string" + }, + "webui": { + "type": "string" + } + } + } + } +} diff --git a/airbyte-integrations/connectors/source-confluence/source_confluence/schemas/group.json b/airbyte-integrations/connectors/source-confluence/source_confluence/schemas/group.json new file mode 100644 index 0000000000000..e430f4dfaa039 --- /dev/null +++ b/airbyte-integrations/connectors/source-confluence/source_confluence/schemas/group.json @@ -0,0 +1,10 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "id": { "type": ["null", "string"] }, + "name": { "type": ["null", "string"] }, + "type": { "type": ["null", "string"] }, + "_links": { "type": ["null", "object"] } + } +} diff --git a/airbyte-integrations/connectors/source-confluence/source_confluence/schemas/pages.json b/airbyte-integrations/connectors/source-confluence/source_confluence/schemas/pages.json new file mode 100644 index 0000000000000..023d04af6716e --- /dev/null +++ b/airbyte-integrations/connectors/source-confluence/source_confluence/schemas/pages.json @@ -0,0 +1,199 @@ +{ + "definitions": { + "user": { + "type": "object", + "properties": { + "type": { + "type": ["string", "null"] + }, + "accountId": { + "type": ["string", "null"] + }, + "email": { + "type": ["string", "null"] + }, + "publicName": { + "type": ["string", "null"] + } + } + }, + "content": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "title": { + "type": "string" + }, + "type": { + "type": "string" + }, + "status": { + "type": "string" + } + } + }, + "contentRestriction": { + "type": "object", + "properties": { + "operations": { + "type": ["string", "null"] + }, + "restrictions": { + "user": { + "type": "object", + "properties": { + "results": { + "type": "array", + "items": { + "$ref": "#/definitions/user" + } + } + } + } + } + } + }, + "usersUserKeys": { + "type": "object", + "properties": { + "users": { + "type": "array", + "items": { + "$ref": "#/definitions/user" + } + }, + "userKeys": { + "type": "array", + "items": { + "type": "string" + } + } + } + }, + "version": { + "type": "object", + "properties": { + "by": { + "$ref": "#/definitions/user" + }, + "when": { + "type": ["string", "null"], + "format": "date-time" + }, + "friendlyWhen": { + "type": ["string", "null"] + }, + "message": { + "type": ["string", "null"] + }, + "number": { + "type": ["integer", "null"] + }, + "minorEdit": { + "type": ["boolean", "null"] + }, + "collaborators": { + "$ref": "#/definitions/usersUserKeys" + } + } + } + }, + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "title": { + "type": "string" + }, + "type": { + "type": "string" + }, + "status": { + "type": "string" + }, + "history": { + "type": "object", + "properties": { + "latest": { + "type": "boolean" + }, + "createdBy": { + "$ref": "#/definitions/user" + }, + "createdDate": { + "type": "string", + "format": "date-time" + }, + "contributors": { + "type": "object", + "properties": { + "publishers": { + "$ref": "#/definitions/usersUserKeys" + } + } + }, + "previousVersion": { + "$ref": "#/definitions/version" + } + } + }, + "version": { + "$ref": "#/definitions/version" + }, + "descendants": { + "type": "object", + "properties": { + "comment": { + "type": "object", + "properties": { + "results": { + "type": "array", + "items": { + "$ref": "#/definitions/content" + } + } + } + } + } + }, + "restrictions": { + "type": "object", + "properties": { + "read": { + "$ref": "#/definitions/contentRestriction" + } + } + }, + "_expandable": { + "type": "object", + "properties": { + "container": { + "type": "string" + }, + "space": { + "type": "string" + } + } + }, + "_links": { + "type": "object", + "properties": { + "self": { + "type": "string" + }, + "tinyui": { + "type": "string" + }, + "editui": { + "type": "string" + }, + "webui": { + "type": "string" + } + } + } + } +} diff --git a/airbyte-integrations/connectors/source-confluence/source_confluence/schemas/space.json b/airbyte-integrations/connectors/source-confluence/source_confluence/schemas/space.json new file mode 100644 index 0000000000000..a021d235c3ab5 --- /dev/null +++ b/airbyte-integrations/connectors/source-confluence/source_confluence/schemas/space.json @@ -0,0 +1,16 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "id": { "type": ["null", "integer"] }, + "key": { "type": ["null", "string"] }, + "name": { "type": ["null", "string"] }, + "type": { "type": ["null", "string"] }, + "status": { "type": ["null", "string"] }, + "permissions": { "type": ["null", "array"] }, + "icon": { "type": ["null", "object"] }, + "description": { "type": ["null", "object"] }, + "_expandable": { "type": ["null", "object"] }, + "_links": { "type": ["null", "object"] } + } +} diff --git a/airbyte-integrations/connectors/source-confluence/source_confluence/source.py b/airbyte-integrations/connectors/source-confluence/source_confluence/source.py new file mode 100644 index 0000000000000..cb2510c20f15b --- /dev/null +++ b/airbyte-integrations/connectors/source-confluence/source_confluence/source.py @@ -0,0 +1,146 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +from abc import ABC +from base64 import b64encode +from typing import Any, Dict, Iterable, List, Mapping, MutableMapping, Optional, Tuple + +import requests +from airbyte_cdk.sources import AbstractSource +from airbyte_cdk.sources.streams import Stream +from airbyte_cdk.sources.streams.http import HttpStream +from airbyte_cdk.sources.streams.http.requests_native_auth import TokenAuthenticator +from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer + + +# Basic full refresh stream +class ConfluenceStream(HttpStream, ABC): + url_base = "https://{}/wiki/rest/api/" + primary_key = "id" + limit = 50 + start = 0 + expand = [] + transformer: TypeTransformer = TypeTransformer(TransformConfig.DefaultSchemaNormalization) + + def __init__(self, config: Dict): + super().__init__(authenticator=config["authenticator"]) + self.config = config + self.url_base = self.url_base.format(config["domain_name"]) + + def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: + json_response = response.json() + links = json_response.get("_links") + next_link = links.get("next") + if next_link: + self.start += self.limit + return {"start": self.start} + + def request_params( + self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, any] = None, next_page_token: Mapping[str, Any] = None + ) -> MutableMapping[str, Any]: + params = {"limit": self.limit, "expand": ",".join(self.expand)} + if next_page_token: + params.update({"start": next_page_token["start"]}) + return params + + def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: + json_response = response.json() + records = json_response.get("results", []) + yield from records + + def path( + self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None + ) -> str: + return self.api_name + + +class BaseContentStream(ConfluenceStream, ABC): + api_name = "content" + expand = [ + "history", + "history.lastUpdated", + "history.previousVersion", + "history.contributors", + "restrictions.read.restrictions.user", + "version", + "descendants.comment", + ] + limit = 25 + content_type = None + + def request_params( + self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, any] = None, next_page_token: Mapping[str, Any] = None + ) -> MutableMapping[str, Any]: + params = super().request_params(stream_state, stream_slice=stream_slice, next_page_token=next_page_token) + params.update({"type": self.content_type}) + return params + + +class Pages(BaseContentStream): + """ + API documentation: https://developer.atlassian.com/cloud/confluence/rest/api-group-content/#api-wiki-rest-api-content-get + """ + + content_type = "page" + + +class BlogPosts(BaseContentStream): + """ + API documentation: https://developer.atlassian.com/cloud/confluence/rest/api-group-content/#api-wiki-rest-api-content-get + """ + + content_type = "blogpost" + + +class Space(ConfluenceStream): + """ + API documentation: https://developer.atlassian.com/cloud/confluence/rest/api-group-space/#api-wiki-rest-api-space-get + """ + + api_name = "space" + expand = ["permissions", "icon", "description.plain", "description.view"] + + +class Group(ConfluenceStream): + """ + API documentation: https://developer.atlassian.com/cloud/confluence/rest/api-group-group/#api-wiki-rest-api-group-get + """ + + api_name = "group" + + +class Audit(ConfluenceStream): + """ + API documentation: https://developer.atlassian.com/cloud/confluence/rest/api-group-audit/#api-wiki-rest-api-audit-get + """ + + primary_key = "author" + api_name = "audit" + limit = 1000 + + +# Source +class HttpBasicAuthenticator(TokenAuthenticator): + def __init__(self, email: str, token: str, auth_method: str = "Basic", **kwargs): + auth_string = f"{email}:{token}".encode("utf8") + b64_encoded = b64encode(auth_string).decode("utf8") + super().__init__(token=b64_encoded, auth_method=auth_method, **kwargs) + + +class SourceConfluence(AbstractSource): + def check_connection(self, logger, config) -> Tuple[bool, any]: + auth = HttpBasicAuthenticator(config["email"], config["api_token"], auth_method="Basic").get_auth_header() + url = f"https://{config['domain_name']}/wiki/rest/api/space" + try: + response = requests.get(url, headers=auth) + response.raise_for_status() + return True, None + except requests.exceptions.RequestException as e: + return False, e + + def streams(self, config: Mapping[str, Any]) -> List[Stream]: + auth = HttpBasicAuthenticator(config["email"], config["api_token"], auth_method="Basic") + config["authenticator"] = auth + return [Pages(config), BlogPosts(config), Space(config), Group(config), Audit(config)] diff --git a/airbyte-integrations/connectors/source-confluence/source_confluence/spec.json b/airbyte-integrations/connectors/source-confluence/source_confluence/spec.json new file mode 100644 index 0000000000000..136bd0bead4db --- /dev/null +++ b/airbyte-integrations/connectors/source-confluence/source_confluence/spec.json @@ -0,0 +1,27 @@ +{ + "documentationUrl": "https://docsurl.com", + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Confluence Spec", + "type": "object", + "required": ["api_token", "domain_name", "email"], + "additionalProperties": false, + "properties": { + "api_token": { + "type": "string", + "description": "Please follow the Jira confluence for generating an API token: https://support.atlassian.com/atlassian-account/docs/manage-api-tokens-for-your-atlassian-account/", + "airbyte_secret": true + }, + "domain_name": { + "type": "string", + "description": "Your Confluence domain name", + "examples": ["example.atlassian.net"] + }, + "email": { + "type": "string", + "description": "Your Confluence login email", + "examples": ["abc@example.com"] + } + } + } +} diff --git a/airbyte-integrations/connectors/source-confluence/unit_tests/__init__.py b/airbyte-integrations/connectors/source-confluence/unit_tests/__init__.py new file mode 100644 index 0000000000000..46b7376756ec6 --- /dev/null +++ b/airbyte-integrations/connectors/source-confluence/unit_tests/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# diff --git a/airbyte-integrations/connectors/source-confluence/unit_tests/conftest.py b/airbyte-integrations/connectors/source-confluence/unit_tests/conftest.py new file mode 100644 index 0000000000000..29078266ccf9a --- /dev/null +++ b/airbyte-integrations/connectors/source-confluence/unit_tests/conftest.py @@ -0,0 +1,10 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + +import pytest + + +@pytest.fixture(scope="session", name="config") +def config_fixture(): + return {"api_token": "test_api_key", "domain_name": "example.atlassian.net", "email": "test@example.com"} diff --git a/airbyte-integrations/connectors/source-confluence/unit_tests/test_source.py b/airbyte-integrations/connectors/source-confluence/unit_tests/test_source.py new file mode 100644 index 0000000000000..e03a348803683 --- /dev/null +++ b/airbyte-integrations/connectors/source-confluence/unit_tests/test_source.py @@ -0,0 +1,32 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + +from unittest.mock import MagicMock + +import responses +from source_confluence.source import SourceConfluence + + +def setup_responses(): + responses.add( + responses.GET, + "https://example.atlassian.net/wiki/rest/api/space", + json={"access_token": "test_api_key", "expires_in": 3600}, + ) + + +@responses.activate +def test_check_connection(config): + setup_responses() + source = SourceConfluence() + logger_mock = MagicMock() + assert source.check_connection(logger_mock, config) == (True, None) + + +def test_streams_count(mocker): + source = SourceConfluence() + config_mock = MagicMock() + streams = source.streams(config_mock) + expected_streams_number = 5 + assert len(streams) == expected_streams_number diff --git a/airbyte-integrations/connectors/source-facebook-marketing/Dockerfile b/airbyte-integrations/connectors/source-facebook-marketing/Dockerfile index 16dd6edf9e90d..2a2905c3f229e 100644 --- a/airbyte-integrations/connectors/source-facebook-marketing/Dockerfile +++ b/airbyte-integrations/connectors/source-facebook-marketing/Dockerfile @@ -12,5 +12,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.2.21 +LABEL io.airbyte.version=0.2.22 LABEL io.airbyte.name=airbyte/source-facebook-marketing diff --git a/airbyte-integrations/connectors/source-facebook-marketing/acceptance-test-config.yml b/airbyte-integrations/connectors/source-facebook-marketing/acceptance-test-config.yml index 4ec24438c10d0..d426101a5ef5f 100644 --- a/airbyte-integrations/connectors/source-facebook-marketing/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-facebook-marketing/acceptance-test-config.yml @@ -29,4 +29,5 @@ tests: # Because one read response contains this metric, and other doesn't. # Therefore, it's needed to ignore fields like this in API responses. ignored_fields: - "ads_insights_age_and_gender": ["cost_per_estimated_ad_recallers"] + "ads_insights_age_and_gender": ["cost_per_estimated_ad_recallers"] + "ad_creatives": ["thumbnail_url"] diff --git a/airbyte-integrations/connectors/source-facebook-marketing/source_facebook_marketing/streams.py b/airbyte-integrations/connectors/source-facebook-marketing/source_facebook_marketing/streams.py index cb45532f32deb..315e19628ea48 100644 --- a/airbyte-integrations/connectors/source-facebook-marketing/source_facebook_marketing/streams.py +++ b/airbyte-integrations/connectors/source-facebook-marketing/source_facebook_marketing/streams.py @@ -290,6 +290,9 @@ class AdsInsights(FBMarketingIncrementalStream): action_attribution_windows = ALL_ACTION_ATTRIBUTION_WINDOWS time_increment = 1 + running_jobs = deque() + times_job_restarted = {} + breakdowns = [] def __init__( @@ -327,7 +330,7 @@ def read_records( stream_state: Mapping[str, Any] = None, ) -> Iterable[Mapping[str, Any]]: """Waits for current job to finish (slice) and yield its result""" - result = self.wait_for_job(stream_slice["job"]) + result = self.wait_for_job(stream_slice["job"], stream_state=stream_state) # because we query `lookback_window` days before actual cursor we might get records older then cursor for obj in result.get_result(): @@ -341,20 +344,19 @@ def stream_slices(self, stream_state: Mapping[str, Any] = None, **kwargs) -> Ite 3. we shouldn't proceed to consumption of the next job before previous succeed """ stream_state = stream_state or {} - running_jobs = deque() date_ranges = list(self._date_ranges(stream_state=stream_state)) for params in date_ranges: params = deep_merge(params, self.request_params(stream_state=stream_state)) job = self._create_insights_job(params) - running_jobs.append(job) - if len(running_jobs) >= self.MAX_ASYNC_JOBS: - yield {"job": running_jobs.popleft()} + self.running_jobs.append(job) + if len(self.running_jobs) >= self.MAX_ASYNC_JOBS: + yield {"job": self.running_jobs.popleft()} - while running_jobs: - yield {"job": running_jobs.popleft()} + while self.running_jobs: + yield {"job": self.running_jobs.popleft()} @backoff_policy - def wait_for_job(self, job) -> AdReportRun: + def wait_for_job(self, job, stream_state: Mapping[str, Any] = None) -> AdReportRun: factor = 2 start_time = pendulum.now() sleep_seconds = factor @@ -367,10 +369,20 @@ def wait_for_job(self, job) -> AdReportRun: if job["async_status"] == "Job Completed": return job - elif job["async_status"] == "Job Failed": - raise JobTimeoutException(f"AdReportRun {job} failed after {runtime.in_seconds()} seconds.") - elif job["async_status"] == "Job Skipped": - raise JobTimeoutException(f"AdReportRun {job} skipped after {runtime.in_seconds()} seconds.") + elif job["async_status"] in ["Job Failed", "Job Skipped"]: + time_range = (job["date_start"], job["date_stop"]) + if self.times_job_restarted.get(time_range, 0) < 6: + params = deep_merge( + {"time_range": {"since": job["date_start"], "until": job["date_stop"]}}, + self.request_params(stream_state=stream_state), + ) + restart_job = self._create_insights_job(params) + self.running_jobs.append(restart_job) + self.times_job_restarted[time_range] += 1 + elif job["async_status"] == "Job Failed": + raise JobTimeoutException(f"AdReportRun {job} failed after {runtime.in_seconds()} seconds.") + elif job["async_status"] == "Job Skipped": + raise JobTimeoutException(f"AdReportRun {job} skipped after {runtime.in_seconds()} seconds.") if runtime > self.MAX_WAIT_TO_START and job_progress_pct == 0: raise JobTimeoutException( diff --git a/airbyte-integrations/connectors/source-facebook-pages/Dockerfile b/airbyte-integrations/connectors/source-facebook-pages/Dockerfile index ae7872f9c6ea5..58db26cc4f3be 100644 --- a/airbyte-integrations/connectors/source-facebook-pages/Dockerfile +++ b/airbyte-integrations/connectors/source-facebook-pages/Dockerfile @@ -12,5 +12,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.2 +LABEL io.airbyte.version=0.1.3 LABEL io.airbyte.name=airbyte/source-facebook-pages diff --git a/airbyte-integrations/connectors/source-facebook-pages/acceptance-test-config.yml b/airbyte-integrations/connectors/source-facebook-pages/acceptance-test-config.yml index 88c7f8e4f8fac..f78b8aefb46a3 100644 --- a/airbyte-integrations/connectors/source-facebook-pages/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-facebook-pages/acceptance-test-config.yml @@ -18,3 +18,10 @@ tests: full_refresh: - config_path: "secrets/config.json" configured_catalog_path: "integration_tests/configured_catalog.json" + ignored_fields: + # Since we generating new Page access token for each connector run this + # field would containt different values. + page: + - "posts/paging/next" + - "published_posts/paging/next" + - "feed/paging/next" diff --git a/airbyte-integrations/connectors/source-facebook-pages/source_facebook_pages/source.py b/airbyte-integrations/connectors/source-facebook-pages/source_facebook_pages/source.py index 0a0c670d7d6e3..de2ddc87497f1 100755 --- a/airbyte-integrations/connectors/source-facebook-pages/source_facebook_pages/source.py +++ b/airbyte-integrations/connectors/source-facebook-pages/source_facebook_pages/source.py @@ -5,6 +5,7 @@ from typing import Any, List, Mapping, Tuple +import requests from airbyte_cdk.logger import AirbyteLogger from airbyte_cdk.models import SyncMode from airbyte_cdk.sources import AbstractSource @@ -19,17 +20,32 @@ def check_connection(self, logger: AirbyteLogger, config: Mapping[str, Any]) -> error_msg = None try: - _ = list(Page(access_token=config["access_token"], page_id=config["page_id"]).read_records(sync_mode=SyncMode.full_refresh)) + access_token, page_id = config["access_token"], config["page_id"] + access_token = self.generate_page_access_token(page_id, access_token) + _ = list(Page(access_token=access_token, page_id=page_id).read_records(sync_mode=SyncMode.full_refresh)) ok = True except Exception as e: error_msg = repr(e) return ok, error_msg + @staticmethod + def generate_page_access_token(page_id: str, access_token: str) -> str: + # We are expecting to receive User access token from config. To access + # Pages API we need to generate Page access token. Page access tokens + # can be generated from another Page access token (with the same page ID) + # so if user manually set Page access token instead of User access + # token it would be no problem unless it has wrong page ID. + # https://developers.facebook.com/docs/pages/access-tokens#get-a-page-access-token + r = requests.get(f"https://graph.facebook.com/{page_id}", params={"fields": "access_token", "access_token": access_token}) + return r.json()["access_token"] + def streams(self, config: Mapping[str, Any]) -> List[Stream]: + access_token, page_id = config["access_token"], config["page_id"] + access_token = self.generate_page_access_token(page_id, access_token) stream_kwargs = { - "access_token": config["access_token"], - "page_id": config["page_id"], + "access_token": access_token, + "page_id": page_id, } streams = [ diff --git a/airbyte-integrations/connectors/source-facebook-pages/source_facebook_pages/spec.json b/airbyte-integrations/connectors/source-facebook-pages/source_facebook_pages/spec.json index a8745f64b8308..0320a9f340f0f 100755 --- a/airbyte-integrations/connectors/source-facebook-pages/source_facebook_pages/spec.json +++ b/airbyte-integrations/connectors/source-facebook-pages/source_facebook_pages/spec.json @@ -5,15 +5,17 @@ "title": "Facebook Pages Spec", "type": "object", "required": ["access_token", "page_id"], - "additionalProperties": false, + "additionalProperties": true, "properties": { "access_token": { "type": "string", + "title": "Page Access Token", "description": "Facebook Page Access Token", "airbyte_secret": true }, "page_id": { "type": "string", + "title": "Page ID", "description": "Page ID" } } diff --git a/airbyte-integrations/connectors/source-file-secure/Dockerfile b/airbyte-integrations/connectors/source-file-secure/Dockerfile index a0187b0ffcc6d..510bb42dc9010 100644 --- a/airbyte-integrations/connectors/source-file-secure/Dockerfile +++ b/airbyte-integrations/connectors/source-file-secure/Dockerfile @@ -1,14 +1,13 @@ -FROM airbyte/source-file:0.2.6 +FROM airbyte/source-file:0.2.7 WORKDIR /airbyte/integration_code - -ENV CODE_PATH="source_file_secure" -ENV AIRBYTE_IMPL_MODULE="source_file_secure" -ENV AIRBYTE_IMPL_PATH="SourceFileSecure" - -COPY $CODE_PATH ./$CODE_PATH -RUN sed -i 's/source_file/source_file_secure/g' setup.py +COPY source_file_secure ./source_file_secure +COPY main.py ./ +COPY setup.py ./ RUN pip install . +ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" +ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] + +LABEL io.airbyte.version=0.1.1 LABEL io.airbyte.name=airbyte/source-file-secure -LABEL io.airbyte.version=0.1.0 diff --git a/airbyte-integrations/connectors/source-file-secure/acceptance-test-config.yml b/airbyte-integrations/connectors/source-file-secure/acceptance-test-config.yml index ddddb348377ef..e8a25bfedf974 100644 --- a/airbyte-integrations/connectors/source-file-secure/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-file-secure/acceptance-test-config.yml @@ -1,7 +1,7 @@ # See [Source Acceptance Tests](https://docs.airbyte.io/connector-development/testing-connectors/source-acceptance-tests-reference) # for more information about how to configure these tests -# Here we tries to test a basic tests only. +# Here we tries to test a basic tests only. # The main part of tests should be executed for the source-file connector connector_image: airbyte/source-file-secure:dev tests: @@ -11,19 +11,17 @@ tests: - config_path: "integration_tests/invalid_config.json" status: "failed" # for https - - config_path: "integration_tests/https_config.json" - status: "succeed" + - config_path: "integration_tests/config.json" + status: "succeed" # for local should be failed - config_path: "integration_tests/local_config.json" - status: "exception" + status: "exception" discovery: # for https - - config_path: "integration_tests/https_config.json" + - config_path: "integration_tests/config.json" basic_read: # for https - - config_path: "integration_tests/https_config.json" - configured_catalog_path: "integration_tests/configured_https_catalog.json" - - + - config_path: "integration_tests/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" diff --git a/airbyte-integrations/connectors/source-file-secure/build.gradle b/airbyte-integrations/connectors/source-file-secure/build.gradle index e4fbfaf782f21..7417c474d873f 100644 --- a/airbyte-integrations/connectors/source-file-secure/build.gradle +++ b/airbyte-integrations/connectors/source-file-secure/build.gradle @@ -8,8 +8,3 @@ plugins { airbytePython { moduleDirectory 'source_file_secure' } - -dependencies { - implementation files(project(':airbyte-integrations:bases:source-acceptance-test').airbyteDocker.outputs) - implementation files(project(':airbyte-integrations:bases:base-python').airbyteDocker.outputs) -} diff --git a/airbyte-integrations/connectors/source-file-secure/integration_tests/https_config.json b/airbyte-integrations/connectors/source-file-secure/integration_tests/config.json similarity index 100% rename from airbyte-integrations/connectors/source-file-secure/integration_tests/https_config.json rename to airbyte-integrations/connectors/source-file-secure/integration_tests/config.json diff --git a/airbyte-integrations/connectors/source-file-secure/integration_tests/configured_https_catalog.json b/airbyte-integrations/connectors/source-file-secure/integration_tests/configured_catalog.json similarity index 100% rename from airbyte-integrations/connectors/source-file-secure/integration_tests/configured_https_catalog.json rename to airbyte-integrations/connectors/source-file-secure/integration_tests/configured_catalog.json diff --git a/airbyte-integrations/connectors/source-file-secure/integration_tests/invalid_config.json b/airbyte-integrations/connectors/source-file-secure/integration_tests/invalid_config.json index c2d398ed6c58a..fd1448b39352f 100644 --- a/airbyte-integrations/connectors/source-file-secure/integration_tests/invalid_config.json +++ b/airbyte-integrations/connectors/source-file-secure/integration_tests/invalid_config.json @@ -4,7 +4,7 @@ "reader_options": "{\"bla\": \",\", \"nrows\": 20}", "url": "https://fake-fake.com", "provider": { - "storage": "fake", + "storage": "HTTPS", "reader_impl": "fake" } } diff --git a/airbyte-integrations/connectors/source-file-secure/main_dev.py b/airbyte-integrations/connectors/source-file-secure/main.py similarity index 82% rename from airbyte-integrations/connectors/source-file-secure/main_dev.py rename to airbyte-integrations/connectors/source-file-secure/main.py index 4d942018cb520..2ba9b1095e950 100644 --- a/airbyte-integrations/connectors/source-file-secure/main_dev.py +++ b/airbyte-integrations/connectors/source-file-secure/main.py @@ -5,7 +5,7 @@ import sys -from base_python.entrypoint import launch +from airbyte_cdk.entrypoint import launch from source_file_secure import SourceFileSecure if __name__ == "__main__": diff --git a/airbyte-integrations/connectors/source-file-secure/requirements.txt b/airbyte-integrations/connectors/source-file-secure/requirements.txt index 16b5e8bf302a0..982c4b5a4923c 100644 --- a/airbyte-integrations/connectors/source-file-secure/requirements.txt +++ b/airbyte-integrations/connectors/source-file-secure/requirements.txt @@ -1,4 +1,3 @@ -# This file is autogenerated -- only edit if you know what you are doing. Use setup.py for declaring dependencies. --e ../../bases/airbyte-protocol --e ../../bases/base-python +-e ../../bases/source-acceptance-test -e ../source-file +-e . diff --git a/airbyte-integrations/connectors/source-file-secure/setup.py b/airbyte-integrations/connectors/source-file-secure/setup.py index 15636060002a7..7345e937a1a5f 100644 --- a/airbyte-integrations/connectors/source-file-secure/setup.py +++ b/airbyte-integrations/connectors/source-file-secure/setup.py @@ -6,8 +6,7 @@ from setuptools import find_packages, setup MAIN_REQUIREMENTS = [ - "airbyte-protocol", - "base-python", + "airbyte-cdk~=0.1", "gcsfs==0.7.1", "genson==1.2.2", "google-cloud-storage==1.35.0", @@ -31,7 +30,7 @@ ] setup( - name="source_file", + name="source_file_secure", description="Source implementation for File", author="Airbyte", author_email="contact@airbyte.io", diff --git a/airbyte-integrations/connectors/source-file-secure/source_file_secure/source.py b/airbyte-integrations/connectors/source-file-secure/source_file_secure/source.py index 323615f59d4aa..761a867576a78 100644 --- a/airbyte-integrations/connectors/source-file-secure/source_file_secure/source.py +++ b/airbyte-integrations/connectors/source-file-secure/source_file_secure/source.py @@ -6,12 +6,12 @@ import os import sys -from airbyte_protocol import ConnectorSpecification -from base_python.logger import AirbyteLogger - # some integration tests doesn't setup dependences from # requirements.txt file and Python can return a exception. # Thus we should to import this parent module manually +from airbyte_cdk import AirbyteLogger +from airbyte_cdk.models import ConnectorSpecification + try: import source_file.source except ModuleNotFoundError: diff --git a/airbyte-integrations/connectors/source-file-secure/unit_tests/unit_test.py b/airbyte-integrations/connectors/source-file-secure/unit_tests/unit_test.py index 2c20878f20e9d..61568885be2b1 100644 --- a/airbyte-integrations/connectors/source-file-secure/unit_tests/unit_test.py +++ b/airbyte-integrations/connectors/source-file-secure/unit_tests/unit_test.py @@ -3,7 +3,7 @@ # import pytest -from base_python.logger import AirbyteLogger +from airbyte_cdk import AirbyteLogger from source_file_secure import SourceFileSecure from source_file_secure.source import LOCAL_STORAGE_NAME diff --git a/airbyte-integrations/connectors/source-freshsales/.dockerignore b/airbyte-integrations/connectors/source-freshsales/.dockerignore new file mode 100644 index 0000000000000..02bb719d4c3ee --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/.dockerignore @@ -0,0 +1,7 @@ +* +!Dockerfile +!Dockerfile.test +!main.py +!source_freshsales +!setup.py +!secrets diff --git a/airbyte-integrations/connectors/source-freshsales/Dockerfile b/airbyte-integrations/connectors/source-freshsales/Dockerfile new file mode 100644 index 0000000000000..d7e7bc9102319 --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/Dockerfile @@ -0,0 +1,38 @@ +FROM python:3.7.11-alpine3.14 as base + +# build and load all requirements +FROM base as builder +WORKDIR /airbyte/integration_code + +# upgrade pip to the latest version +RUN apk --no-cache upgrade \ + && pip install --upgrade pip \ + && apk --no-cache add tzdata build-base + + +COPY setup.py ./ +# install necessary packages to a temporary folder +RUN pip install --prefix=/install . + +# build a clean environment +FROM base +WORKDIR /airbyte/integration_code + +# copy all loaded and built libraries to a pure basic image +COPY --from=builder /install /usr/local +# add default timezone settings +COPY --from=builder /usr/share/zoneinfo/Etc/UTC /etc/localtime +RUN echo "Etc/UTC" > /etc/timezone + +# bash is installed for more convenient debugging. +RUN apk --no-cache add bash + +# copy payload code only +COPY main.py ./ +COPY source_freshsales ./source_freshsales + +ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" +ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] + +LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.name=airbyte/source-freshsales diff --git a/airbyte-integrations/connectors/source-freshsales/README.md b/airbyte-integrations/connectors/source-freshsales/README.md new file mode 100644 index 0000000000000..01cfa7382c1c7 --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/README.md @@ -0,0 +1,132 @@ +# Freshsales Source + +This is the repository for the Freshsales source connector, written in Python. +For information about how to use this connector within Airbyte, see [the documentation](https://docs.airbyte.io/integrations/sources/freshsales). + +## Local development + +### Prerequisites +**To iterate on this connector, make sure to complete this prerequisites section.** + +#### Minimum Python version required `= 3.7.0` + +#### Build & Activate Virtual Environment and install dependencies +From this connector directory, create a virtual environment: +``` +python -m venv .venv +``` + +This will generate a virtualenv for this module in `.venv/`. Make sure this venv is active in your +development environment of choice. To activate it from the terminal, run: +``` +source .venv/bin/activate +pip install -r requirements.txt +pip install '.[tests]' +``` +If you are in an IDE, follow your IDE's instructions to activate the virtualenv. + +Note that while we are installing dependencies from `requirements.txt`, you should only edit `setup.py` for your dependencies. `requirements.txt` is +used for editable installs (`pip install -e`) to pull in Python dependencies from the monorepo and will call `setup.py`. +If this is mumbo jumbo to you, don't worry about it, just put your deps in `setup.py` but install using `pip install -r requirements.txt` and everything +should work as you expect. + +#### Building via Gradle +You can also build the connector in Gradle. This is typically used in CI and not needed for your development workflow. + +To build using Gradle, from the Airbyte repository root, run: +``` +./gradlew :airbyte-integrations:connectors:source-freshsales:build +``` + +#### Create credentials +**If you are a community contributor**, follow the instructions in the [documentation](https://docs.airbyte.io/integrations/sources/freshsales) +to generate the necessary credentials. Then create a file `secrets/config.json` conforming to the `source_freshsales/spec.json` file. +Note that any directory named `secrets` is gitignored across the entire Airbyte repo, so there is no danger of accidentally checking in sensitive information. +See `integration_tests/sample_config.json` for a sample config file. + +**If you are an Airbyte core member**, copy the credentials in Lastpass under the secret name `source freshsales test creds` +and place them into `secrets/config.json`. + +### Locally running the connector +``` +python main.py spec +python main.py check --config secrets/config.json +python main.py discover --config secrets/config.json +python main.py read --config secrets/config.json --catalog integration_tests/configured_catalog.json +``` + +### Locally running the connector docker image + +#### Build +First, make sure you build the latest Docker image: +``` +docker build . -t airbyte/source-freshsales:dev +``` + +You can also build the connector image via Gradle: +``` +./gradlew :airbyte-integrations:connectors:source-freshsales:airbyteDocker +``` +When building via Gradle, the docker image name and tag, respectively, are the values of the `io.airbyte.name` and `io.airbyte.version` `LABEL`s in +the Dockerfile. + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/source-freshsales:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-freshsales:dev check --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-freshsales:dev discover --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/source-freshsales:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json +``` +## Testing +Make sure to familiarize yourself with [pytest test discovery](https://docs.pytest.org/en/latest/goodpractices.html#test-discovery) to know how your test files and methods should be named. +First install test dependencies into your virtual environment: +``` +pip install .[tests] +``` +### Unit Tests +To run unit tests locally, from the connector directory run: +``` +python -m pytest unit_tests +``` + +### Integration Tests +There are two types of integration tests: Acceptance Tests (Airbyte's test suite for all source connectors) and custom integration tests (which are specific to this connector). +#### Custom Integration tests +Place custom tests inside `integration_tests/` folder, then, from the connector root, run +``` +python -m pytest integration_tests +``` +#### Acceptance Tests +Customize `acceptance-test-config.yml` file to configure tests. See [Source Acceptance Tests](https://docs.airbyte.io/connector-development/testing-connectors/source-acceptance-tests-reference) for more information. +If your connector requires to create or destroy resources for use during acceptance tests create fixtures for it and place them inside integration_tests/acceptance.py. +To run your integration tests with acceptance tests, from the connector root, run +``` +python -m pytest integration_tests -p integration_tests.acceptance +``` +To run your integration tests with docker + +### Using gradle to run tests +All commands should be run from airbyte project root. +To run unit tests: +``` +./gradlew :airbyte-integrations:connectors:source-freshsales:unitTest +``` +To run acceptance and custom integration tests: +``` +./gradlew :airbyte-integrations:connectors:source-freshsales:integrationTest +``` + +## Dependency Management +All of your dependencies should go in `setup.py`, NOT `requirements.txt`. The requirements file is only used to connect internal Airbyte dependencies in the monorepo for local development. +We split dependencies between two groups, dependencies that are: +* required for your connector to work need to go to `MAIN_REQUIREMENTS` list. +* required for the testing need to go to `TEST_REQUIREMENTS` list + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing unit and integration tests. +1. Bump the connector version in `Dockerfile` -- just increment the value of the `LABEL io.airbyte.version` appropriately (we use [SemVer](https://semver.org/)). +1. Create a Pull Request. +1. Pat yourself on the back for being an awesome contributor. +1. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. diff --git a/airbyte-integrations/connectors/source-freshsales/acceptance-test-config.yml b/airbyte-integrations/connectors/source-freshsales/acceptance-test-config.yml new file mode 100644 index 0000000000000..d8678f3f24443 --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/acceptance-test-config.yml @@ -0,0 +1,20 @@ +# See [Source Acceptance Tests](https://docs.airbyte.io/connector-development/testing-connectors/source-acceptance-tests-reference) +# for more information about how to configure these tests +connector_image: airbyte/source-freshsales:dev +tests: + spec: + - spec_path: "source_freshsales/spec.json" + connection: + - config_path: "secrets/config.json" + status: "succeed" + - config_path: "integration_tests/invalid_config.json" + status: "failed" + discovery: + - config_path: "secrets/config.json" + basic_read: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" + empty_streams: ["lost_deals", "won_deals"] + full_refresh: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" diff --git a/airbyte-integrations/connectors/source-freshsales/acceptance-test-docker.sh b/airbyte-integrations/connectors/source-freshsales/acceptance-test-docker.sh new file mode 100644 index 0000000000000..e4d8b1cef8961 --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/acceptance-test-docker.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env sh + +# Build latest connector image +docker build . -t $(cat acceptance-test-config.yml | grep "connector_image" | head -n 1 | cut -d: -f2) + +# Pull latest acctest image +docker pull airbyte/source-acceptance-test:latest + +# Run +docker run --rm -it \ + -v /var/run/docker.sock:/var/run/docker.sock \ + -v /tmp:/tmp \ + -v $(pwd):/test_input \ + airbyte/source-acceptance-test \ + --acceptance-test-config /test_input + diff --git a/airbyte-integrations/connectors/source-freshsales/build.gradle b/airbyte-integrations/connectors/source-freshsales/build.gradle new file mode 100644 index 0000000000000..097ec61b07f86 --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/build.gradle @@ -0,0 +1,14 @@ +plugins { + id 'airbyte-python' + id 'airbyte-docker' + id 'airbyte-source-acceptance-test' +} + +airbytePython { + moduleDirectory 'source_freshsales' +} + +dependencies { + implementation files(project(':airbyte-integrations:bases:source-acceptance-test').airbyteDocker.outputs) + implementation files(project(':airbyte-integrations:bases:base-python').airbyteDocker.outputs) +} diff --git a/airbyte-integrations/connectors/source-freshsales/integration_tests/__init__.py b/airbyte-integrations/connectors/source-freshsales/integration_tests/__init__.py new file mode 100644 index 0000000000000..46b7376756ec6 --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/integration_tests/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# diff --git a/airbyte-integrations/connectors/source-freshsales/integration_tests/acceptance.py b/airbyte-integrations/connectors/source-freshsales/integration_tests/acceptance.py new file mode 100644 index 0000000000000..108075487440f --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/integration_tests/acceptance.py @@ -0,0 +1,14 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +import pytest + +pytest_plugins = ("source_acceptance_test.plugin",) + + +@pytest.fixture(scope="session", autouse=True) +def connector_setup(): + """ This fixture is a placeholder for external resources that acceptance test might require.""" + yield diff --git a/airbyte-integrations/connectors/source-freshsales/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/source-freshsales/integration_tests/configured_catalog.json new file mode 100644 index 0000000000000..470df870dfdbb --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/integration_tests/configured_catalog.json @@ -0,0 +1,94 @@ +{ + "streams": [ + { + "stream": { + "name": "contacts", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "accounts", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "open_deals", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "won_deals", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "lost_deals", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "open_tasks", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "completed_tasks", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "past_appointments", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "upcoming_appointments", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + } + ] +} diff --git a/airbyte-integrations/connectors/source-freshsales/integration_tests/integration_test.py b/airbyte-integrations/connectors/source-freshsales/integration_tests/integration_test.py new file mode 100644 index 0000000000000..2824cd4a16cdb --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/integration_tests/integration_test.py @@ -0,0 +1,8 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +def test_dummy_test(): + """ this is the dummy test to pass integration tests step """ + pass diff --git a/airbyte-integrations/connectors/source-freshsales/integration_tests/invalid_config.json b/airbyte-integrations/connectors/source-freshsales/integration_tests/invalid_config.json new file mode 100644 index 0000000000000..31299549a84b8 --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/integration_tests/invalid_config.json @@ -0,0 +1 @@ +{ "domain_name": "", "api_key": "ghiklmn" } diff --git a/airbyte-integrations/connectors/source-freshsales/integration_tests/sample_config.json b/airbyte-integrations/connectors/source-freshsales/integration_tests/sample_config.json new file mode 100644 index 0000000000000..690c924091122 --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/integration_tests/sample_config.json @@ -0,0 +1 @@ +{ "domain_name": "testabc.myfreshworks.com", "api_key": "ghiklmn" } diff --git a/airbyte-integrations/connectors/source-freshsales/main.py b/airbyte-integrations/connectors/source-freshsales/main.py new file mode 100644 index 0000000000000..3f4d8a1f45c41 --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/main.py @@ -0,0 +1,13 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +import sys + +from airbyte_cdk.entrypoint import launch +from source_freshsales import SourceFreshsales + +if __name__ == "__main__": + source = SourceFreshsales() + launch(source, sys.argv[1:]) diff --git a/airbyte-integrations/connectors/source-freshsales/requirements.txt b/airbyte-integrations/connectors/source-freshsales/requirements.txt new file mode 100644 index 0000000000000..0411042aa0911 --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/requirements.txt @@ -0,0 +1,2 @@ +-e ../../bases/source-acceptance-test +-e . diff --git a/airbyte-integrations/connectors/source-freshsales/setup.py b/airbyte-integrations/connectors/source-freshsales/setup.py new file mode 100644 index 0000000000000..2539849d0c679 --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/setup.py @@ -0,0 +1,29 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +from setuptools import find_packages, setup + +MAIN_REQUIREMENTS = [ + "airbyte-cdk", +] + +TEST_REQUIREMENTS = [ + "pytest~=6.1", + "pytest-mock~=3.6.1", + "source-acceptance-test", +] + +setup( + name="source_freshsales", + description="Source implementation for Freshsales.", + author="Tuan Nguyen", + author_email="anhtuan.nguyen@me.com", + packages=find_packages(), + install_requires=MAIN_REQUIREMENTS, + package_data={"": ["*.json", "schemas/*.json", "schemas/shared/*.json"]}, + extras_require={ + "tests": TEST_REQUIREMENTS, + }, +) diff --git a/airbyte-integrations/connectors/source-freshsales/source_freshsales/__init__.py b/airbyte-integrations/connectors/source-freshsales/source_freshsales/__init__.py new file mode 100644 index 0000000000000..9061e6659822c --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/source_freshsales/__init__.py @@ -0,0 +1,8 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +from .source import SourceFreshsales + +__all__ = ["SourceFreshsales"] diff --git a/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/accounts.json b/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/accounts.json new file mode 100644 index 0000000000000..d43ae83240c8d --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/accounts.json @@ -0,0 +1,67 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "required": ["id"], + "properties": { + "id": { "type": ["null", "integer"] }, + "name": { "type": ["null", "string"] }, + "address": { "type": ["null", "string"] }, + "city": { "type": ["null", "string"] }, + "state": { "type": ["null", "string"] }, + "zipcode": { "type": ["null", "string"] }, + "country": { "type": ["null", "string"] }, + "industry_type_id": { "type": ["null", "integer"] }, + "business_type_id": { "type": ["null", "integer"] }, + "number_of_employees": { "type": ["null", "integer"] }, + "annual_revenue": { "type": ["null", "number"] }, + "website": { "type": ["null", "string"] }, + "phone": { "type": ["null", "string"] }, + "owner_id": { "type": ["null", "integer"] }, + "facebook": { "type": ["null", "string"] }, + "twitter": { "type": ["null", "string"] }, + "linkedin": { "type": ["null", "string"] }, + "territory_id": { "type": ["null", "integer"] }, + "created_at": { "type": ["null", "string"] }, + "updated_at": { "type": ["null", "string"] }, + "parent_sales_account_id": { "type": ["null", "integer"] }, + "first_name": { "type": ["null", "string"] }, + "last_name": { "type": ["null", "string"] }, + "display_name": { "type": ["null", "string"] }, + "avatar": { "type": ["null", "string"] }, + "job_title": { "type": ["null", "string"] }, + "email": { "type": ["null", "string"] }, + "emails": { "type": ["null", "array"] }, + "time_zone": { "type": ["null", "string"] }, + "work_number": { "type": ["null", "string"] }, + "mobile_number": { "type": ["null", "string"] }, + "last_seen": { "type": ["null", "string"] }, + "lead_score": { "type": ["null", "integer"] }, + "last_contacted": { "type": ["null", "string"] }, + "open_deals_amount": { "type": ["null", "number"] }, + "won_deals_amount": { "type": ["null", "number"] }, + "links": { "type": ["null", "object"] }, + "last_contacted_sales_activity_mode": { "type": ["null", "string"] }, + "custom_field": { "type": ["null", "object"] }, + "keyword": { "type": ["null", "string"] }, + "medium": { "type": ["null", "string"] }, + "last_contacted_mode": { "type": ["null", "string"] }, + "recent_note": { "type": ["null", "string"] }, + "won_deals_count": { "type": ["null", "integer"] }, + "last_contacted_via_sales_activity": { "type": ["null", "string"] }, + "completed_sales_sequences": { "type": ["null", "string"] }, + "active_sales_sequences": { "type": ["null", "string"] }, + "web_form_ids": { "type": ["null", "array"] }, + "open_deals_count": { "type": ["null", "integer"] }, + "last_assigned_at": { "type": ["null", "string"] }, + "tags": { "type": ["null", "array"] }, + "is_deleted": { "type": ["null", "boolean"] }, + "team_user_ids": { "type": ["null", "array"] }, + "external_id": { "type": ["null", "string"] }, + "work_email": { "type": ["null", "string"] }, + "subscription_status": { "type": ["null", "integer"] }, + "subscription_types": { "type": ["null", "string"] }, + "customer_fit": { "type": ["null", "string"] }, + "whatsapp_subscription_status": { "type": ["null", "string"] }, + "phone_numbers": { "type": ["null", "array"] } + } +} diff --git a/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/completed_tasks.json b/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/completed_tasks.json new file mode 100644 index 0000000000000..dfdcb18586e09 --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/completed_tasks.json @@ -0,0 +1,21 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "required": ["id"], + "properties": { + "id": { "type": ["null", "integer"] }, + "title": { "type": ["null", "string"] }, + "description": { "type": ["null", "string"] }, + "due_date": { "type": ["null", "string"] }, + "targetable_id": { "type": ["null", "integer"] }, + "targetable_type": { "type": ["null", "string"] }, + "Possible": { "type": ["null", "string"] }, + "owner_id": { "type": ["null", "integer"] }, + "status": { "type": ["null", "string"] }, + "creater_id": { "type": ["null", "integer"] }, + "created_at": { "type": ["null", "string"] }, + "updated_at": { "type": ["null", "string"] }, + "outcome_id": { "type": ["null", "integer"] }, + "task_type_id": { "type": ["null", "integer"] } + } +} diff --git a/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/contacts.json b/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/contacts.json new file mode 100644 index 0000000000000..e7cc9ffc16273 --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/contacts.json @@ -0,0 +1,65 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "required": ["id"], + "properties": { + "id": { "type": ["null", "integer"] }, + "first_name": { "type": ["null", "string"] }, + "last_name": { "type": ["null", "string"] }, + "subscription_status": { "type": ["null", "string"] }, + "job_title": { "type": ["null", "string"] }, + "email": { "type": ["null", "string"] }, + "emails": { "type": ["null", "string"] }, + "work_number": { "type": ["null", "string"] }, + "external_id": { "type": ["null", "string"] }, + "mobile_number": { "type": ["null", "string"] }, + "address": { "type": ["null", "string"] }, + "city": { "type": ["null", "string"] }, + "state": { "type": ["null", "string"] }, + "zipcode": { "type": ["null", "string"] }, + "country": { "type": ["null", "string"] }, + "sales_accounts": { "type": ["null", "array"] }, + "territory_id": { "type": ["null", "integer"] }, + "lead_source_id": { "type": ["null", "integer"] }, + "owner_id": { "type": ["null", "integer"] }, + "subscription_types": { "type": ["null", "string"] }, + "medium": { "type": ["null", "string"] }, + "campaign_id": { "type": ["null", "integer"] }, + "keyword": { "type": ["null", "string"] }, + "time_zone": { "type": ["null", "string"] }, + "facebook": { "type": ["null", "string"] }, + "twitter": { "type": ["null", "string"] }, + "linkedin": { "type": ["null", "string"] }, + "created_at": { "type": ["null", "string"] }, + "updated_at": { "type": ["null", "string"] }, + "contact_status_id": { "type": ["null", "integer"] }, + "sales_account_id": { "type": ["null", "integer"] }, + "lifecycle_stage_id": { "type": ["null", "integer"] }, + "display_name": { "type": ["null", "string"] }, + "avatar": { "type": ["null", "string"] }, + "last_seen": { "type": ["null", "string"] }, + "lead_score": { "type": ["null", "integer"] }, + "last_contacted": { "type": ["null", "string"] }, + "open_deals_amount": { "type": ["null", "number"] }, + "won_deals_amount": { "type": ["null", "number"] }, + "links": { "type": ["null", "object"] }, + "last_contacted_sales_activity_mode": { "type": ["null", "string"] }, + "custom_field": { "type": ["null", "object"] }, + "last_contacted_mode": { "type": ["null", "string"] }, + "recent_note": { "type": ["null", "string"] }, + "won_deals_count": { "type": ["null", "integer"] }, + "last_contacted_via_sales_activity": { "type": ["null", "string"] }, + "completed_sales_sequences": { "type": ["null", "string"] }, + "active_sales_sequences": { "type": ["null", "string"] }, + "web_form_ids": { "type": ["null", "string"] }, + "open_deals_count": { "type": ["null", "integer"] }, + "last_assigned_at": { "type": ["null", "string"] }, + "tags": { "type": ["null", "array"] }, + "is_deleted": { "type": ["null", "boolean"] }, + "team_user_ids": { "type": ["null", "string"] }, + "work_email": { "type": ["null", "string"] }, + "customer_fit": { "type": ["null", "integer"] }, + "whatsapp_subscription_status": { "type": ["null", "integer"] }, + "phone_numbers": { "type": ["null", "array"] } + } +} diff --git a/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/lost_deals.json b/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/lost_deals.json new file mode 100644 index 0000000000000..625e5823b30db --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/lost_deals.json @@ -0,0 +1,52 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "required": ["id"], + "properties": { + "id": { "type": ["null", "integer"] }, + "name": { "type": ["null", "string"] }, + "amount": { "type": ["null", "number"] }, + "currency_id": { "type": ["null", "integer"] }, + "base_currency_amount": { "type": ["null", "number"] }, + "sales_account_id": { "type": ["null", "integer"] }, + "deal_stage_id": { "type": ["null", "integer"] }, + "deal_reason_id": { "type": ["null", "integer"] }, + "deal_type_id": { "type": ["null", "integer"] }, + "owner_id": { "type": ["null", "integer"] }, + "expected_close": { "type": ["null", "string"] }, + "closed_date": { "type": ["null", "string"] }, + "lead_source_id": { "type": ["null", "integer"] }, + "campaign_id": { "type": ["null", "integer"] }, + "deal_product_id": { "type": ["null", "integer"] }, + "deal_payment_status_id": { "type": ["null", "integer"] }, + "probability": { "type": ["null", "integer"] }, + "created_at": { "type": ["null", "string"] }, + "updated_at": { "type": ["null", "string"] }, + "territory_id": { "type": ["null", "integer"] }, + "deal_pipeline_id": { "type": "integer" }, + "stage_updated_time": { "type": ["null", "string"] }, + "custom_field": { "type": ["null", "object"] }, + "age": { "type": ["null", "integer"] }, + "links": { "type": ["null", "object"] }, + "recent_note": { "type": ["null", "string"] }, + "completed_sales_sequences": { "type": ["null", "string"] }, + "active_sales_sequences": { "type": ["null", "string"] }, + "web_form_id": { "type": ["null", "integer"] }, + "upcoming_activities_time": { "type": ["null", "string"] }, + "collaboration": { "type": ["null", "object"] }, + "last_assigned_at": { "type": ["null", "string"] }, + "tags": { "type": ["null", "array"] }, + "last_contacted_sales_activity_mode": { "type": ["null", "string"] }, + "last_contacted_via_sales_activity": { "type": ["null", "string"] }, + "expected_deal_value": { "type": ["null", "number"] }, + "is_deleted": { "type": ["null", "boolean"] }, + "team_user_ids": { "type": ["null", "string"] }, + "avatar": { "type": ["null", "string"] }, + "fc_widget_collaboration": { "type": ["null", "object"] }, + "forecast_category": { "type": ["null", "integer"] }, + "deal_prediction_last_updated_at": { "type": ["null", "string"] }, + "rotten_days": { "type": ["null", "integer"] }, + "has_products": { "type": ["null", "boolean"] }, + "products": { "type": ["null", "string"] } + } +} diff --git a/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/open_deals.json b/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/open_deals.json new file mode 100644 index 0000000000000..625e5823b30db --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/open_deals.json @@ -0,0 +1,52 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "required": ["id"], + "properties": { + "id": { "type": ["null", "integer"] }, + "name": { "type": ["null", "string"] }, + "amount": { "type": ["null", "number"] }, + "currency_id": { "type": ["null", "integer"] }, + "base_currency_amount": { "type": ["null", "number"] }, + "sales_account_id": { "type": ["null", "integer"] }, + "deal_stage_id": { "type": ["null", "integer"] }, + "deal_reason_id": { "type": ["null", "integer"] }, + "deal_type_id": { "type": ["null", "integer"] }, + "owner_id": { "type": ["null", "integer"] }, + "expected_close": { "type": ["null", "string"] }, + "closed_date": { "type": ["null", "string"] }, + "lead_source_id": { "type": ["null", "integer"] }, + "campaign_id": { "type": ["null", "integer"] }, + "deal_product_id": { "type": ["null", "integer"] }, + "deal_payment_status_id": { "type": ["null", "integer"] }, + "probability": { "type": ["null", "integer"] }, + "created_at": { "type": ["null", "string"] }, + "updated_at": { "type": ["null", "string"] }, + "territory_id": { "type": ["null", "integer"] }, + "deal_pipeline_id": { "type": "integer" }, + "stage_updated_time": { "type": ["null", "string"] }, + "custom_field": { "type": ["null", "object"] }, + "age": { "type": ["null", "integer"] }, + "links": { "type": ["null", "object"] }, + "recent_note": { "type": ["null", "string"] }, + "completed_sales_sequences": { "type": ["null", "string"] }, + "active_sales_sequences": { "type": ["null", "string"] }, + "web_form_id": { "type": ["null", "integer"] }, + "upcoming_activities_time": { "type": ["null", "string"] }, + "collaboration": { "type": ["null", "object"] }, + "last_assigned_at": { "type": ["null", "string"] }, + "tags": { "type": ["null", "array"] }, + "last_contacted_sales_activity_mode": { "type": ["null", "string"] }, + "last_contacted_via_sales_activity": { "type": ["null", "string"] }, + "expected_deal_value": { "type": ["null", "number"] }, + "is_deleted": { "type": ["null", "boolean"] }, + "team_user_ids": { "type": ["null", "string"] }, + "avatar": { "type": ["null", "string"] }, + "fc_widget_collaboration": { "type": ["null", "object"] }, + "forecast_category": { "type": ["null", "integer"] }, + "deal_prediction_last_updated_at": { "type": ["null", "string"] }, + "rotten_days": { "type": ["null", "integer"] }, + "has_products": { "type": ["null", "boolean"] }, + "products": { "type": ["null", "string"] } + } +} diff --git a/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/open_tasks.json b/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/open_tasks.json new file mode 100644 index 0000000000000..dfdcb18586e09 --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/open_tasks.json @@ -0,0 +1,21 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "required": ["id"], + "properties": { + "id": { "type": ["null", "integer"] }, + "title": { "type": ["null", "string"] }, + "description": { "type": ["null", "string"] }, + "due_date": { "type": ["null", "string"] }, + "targetable_id": { "type": ["null", "integer"] }, + "targetable_type": { "type": ["null", "string"] }, + "Possible": { "type": ["null", "string"] }, + "owner_id": { "type": ["null", "integer"] }, + "status": { "type": ["null", "string"] }, + "creater_id": { "type": ["null", "integer"] }, + "created_at": { "type": ["null", "string"] }, + "updated_at": { "type": ["null", "string"] }, + "outcome_id": { "type": ["null", "integer"] }, + "task_type_id": { "type": ["null", "integer"] } + } +} diff --git a/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/past_appointments.json b/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/past_appointments.json new file mode 100644 index 0000000000000..29117a8d9fdf4 --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/past_appointments.json @@ -0,0 +1,29 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "required": ["id"], + "properties": { + "id": { "type": ["null", "integer"] }, + "from_date": { "type": ["null", "string"] }, + "date": { "type": ["null", "string"] }, + "Start": { "type": ["null", "string"] }, + "end_date": { "type": ["null", "string"] }, + "End": { "type": ["null", "string"] }, + "time_zone": { "type": ["null", "string"] }, + "title": { "type": ["null", "string"] }, + "description": { "type": ["null", "string"] }, + "creater_id": { "type": ["null", "integer"] }, + "targetable_id": { "type": ["null", "integer"] }, + "targetable_type": { "type": ["null", "string"] }, + "Possible": { "type": ["null", "string"] }, + "location": { "type": ["null", "string"] }, + "created_at": { "type": ["null", "string"] }, + "updated_at": { "type": ["null", "string"] }, + "is_allday": { "type": ["null", "string"] }, + "appointment_attendees_attributes": { "type": ["null", "array"] }, + "outcome_id": { "type": ["null", "integer"] }, + "latitude": { "type": ["null", "string"] }, + "longitude": { "type": ["null", "string"] }, + "checkedin_at": { "type": ["null", "string"] } + } +} diff --git a/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/upcoming_appointments.json b/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/upcoming_appointments.json new file mode 100644 index 0000000000000..29117a8d9fdf4 --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/upcoming_appointments.json @@ -0,0 +1,29 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "required": ["id"], + "properties": { + "id": { "type": ["null", "integer"] }, + "from_date": { "type": ["null", "string"] }, + "date": { "type": ["null", "string"] }, + "Start": { "type": ["null", "string"] }, + "end_date": { "type": ["null", "string"] }, + "End": { "type": ["null", "string"] }, + "time_zone": { "type": ["null", "string"] }, + "title": { "type": ["null", "string"] }, + "description": { "type": ["null", "string"] }, + "creater_id": { "type": ["null", "integer"] }, + "targetable_id": { "type": ["null", "integer"] }, + "targetable_type": { "type": ["null", "string"] }, + "Possible": { "type": ["null", "string"] }, + "location": { "type": ["null", "string"] }, + "created_at": { "type": ["null", "string"] }, + "updated_at": { "type": ["null", "string"] }, + "is_allday": { "type": ["null", "string"] }, + "appointment_attendees_attributes": { "type": ["null", "array"] }, + "outcome_id": { "type": ["null", "integer"] }, + "latitude": { "type": ["null", "string"] }, + "longitude": { "type": ["null", "string"] }, + "checkedin_at": { "type": ["null", "string"] } + } +} diff --git a/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/won_deals.json b/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/won_deals.json new file mode 100644 index 0000000000000..625e5823b30db --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/won_deals.json @@ -0,0 +1,52 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "required": ["id"], + "properties": { + "id": { "type": ["null", "integer"] }, + "name": { "type": ["null", "string"] }, + "amount": { "type": ["null", "number"] }, + "currency_id": { "type": ["null", "integer"] }, + "base_currency_amount": { "type": ["null", "number"] }, + "sales_account_id": { "type": ["null", "integer"] }, + "deal_stage_id": { "type": ["null", "integer"] }, + "deal_reason_id": { "type": ["null", "integer"] }, + "deal_type_id": { "type": ["null", "integer"] }, + "owner_id": { "type": ["null", "integer"] }, + "expected_close": { "type": ["null", "string"] }, + "closed_date": { "type": ["null", "string"] }, + "lead_source_id": { "type": ["null", "integer"] }, + "campaign_id": { "type": ["null", "integer"] }, + "deal_product_id": { "type": ["null", "integer"] }, + "deal_payment_status_id": { "type": ["null", "integer"] }, + "probability": { "type": ["null", "integer"] }, + "created_at": { "type": ["null", "string"] }, + "updated_at": { "type": ["null", "string"] }, + "territory_id": { "type": ["null", "integer"] }, + "deal_pipeline_id": { "type": "integer" }, + "stage_updated_time": { "type": ["null", "string"] }, + "custom_field": { "type": ["null", "object"] }, + "age": { "type": ["null", "integer"] }, + "links": { "type": ["null", "object"] }, + "recent_note": { "type": ["null", "string"] }, + "completed_sales_sequences": { "type": ["null", "string"] }, + "active_sales_sequences": { "type": ["null", "string"] }, + "web_form_id": { "type": ["null", "integer"] }, + "upcoming_activities_time": { "type": ["null", "string"] }, + "collaboration": { "type": ["null", "object"] }, + "last_assigned_at": { "type": ["null", "string"] }, + "tags": { "type": ["null", "array"] }, + "last_contacted_sales_activity_mode": { "type": ["null", "string"] }, + "last_contacted_via_sales_activity": { "type": ["null", "string"] }, + "expected_deal_value": { "type": ["null", "number"] }, + "is_deleted": { "type": ["null", "boolean"] }, + "team_user_ids": { "type": ["null", "string"] }, + "avatar": { "type": ["null", "string"] }, + "fc_widget_collaboration": { "type": ["null", "object"] }, + "forecast_category": { "type": ["null", "integer"] }, + "deal_prediction_last_updated_at": { "type": ["null", "string"] }, + "rotten_days": { "type": ["null", "integer"] }, + "has_products": { "type": ["null", "boolean"] }, + "products": { "type": ["null", "string"] } + } +} diff --git a/airbyte-integrations/connectors/source-freshsales/source_freshsales/source.py b/airbyte-integrations/connectors/source-freshsales/source_freshsales/source.py new file mode 100644 index 0000000000000..974114398ffc6 --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/source_freshsales/source.py @@ -0,0 +1,246 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +from abc import ABC +from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple + +import requests +from airbyte_cdk.sources import AbstractSource +from airbyte_cdk.sources.streams import Stream +from airbyte_cdk.sources.streams.http import HttpStream +from airbyte_cdk.sources.streams.http.auth import TokenAuthenticator +from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer + + +# Basic full refresh stream +class FreshsalesStream(HttpStream, ABC): + url_base = "https://{}/crm/sales/api/" + primary_key = "id" + order_field = "updated_at" + transformer: TypeTransformer = TypeTransformer(TransformConfig.DefaultSchemaNormalization) + + def __init__(self, domain_name: str, **kwargs): + super().__init__(**kwargs) + self.url_base = self.url_base.format(domain_name) + self.domain_name = domain_name + self.page = 1 + + def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: + """ + There is no next page token in the respond so incrementing the page param until there is no new result + """ + list_result = response.json().get(self.object_name, []) + if list_result: + self.page += 1 + return self.page + else: + return None + + def request_params( + self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, any] = None, next_page_token: Mapping[str, Any] = None + ) -> MutableMapping[str, Any]: + params = {"page": self.page, "sort": self.order_field, "sort_type": "asc"} + return params + + def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: + json_response = response.json() + records = json_response.get(self.object_name, []) if self.object_name is not None else json_response + yield from records + + def _get_filters(self) -> List: + """ + Some streams require a filter_id to be passed in. This function gets all available filters. + """ + filters_url = f"https://{self.domain_name}/crm/sales/api/{self.object_name}/filters" + auth = self.authenticator.get_auth_header() + + try: + r = requests.get(filters_url, headers=auth) + r.raise_for_status() + return r.json().get("filters") + except requests.exceptions.RequestException as e: + raise e + + def get_view_id(self): + """ + This function iterate over all available filters and get the relevant filter_id. + """ + if hasattr(self, "filter_name"): + filters = self._get_filters() + return next(filter["id"] for filter in filters if filter["name"] == self.filter_name) + else: + return + + def path( + self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None + ) -> str: + view_id = self.get_view_id() + return f"{self.object_name}/view/{view_id}" + + +class Contacts(FreshsalesStream): + """ + API docs: https://developers.freshworks.com/crm/api/#contacts + """ + + object_name = "contacts" + filter_name = "All Contacts" + + +class Accounts(FreshsalesStream): + """ + API docs: https://developers.freshworks.com/crm/api/#accounts + """ + + object_name = "sales_accounts" + filter_name = "All Accounts" + + +class Deals(FreshsalesStream): + object_name = "deals" + + def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: + json_response = response.json() + records = json_response.get(self.object_name, []) if self.object_name is not None else json_response + # This is to remove data form widget development. Keeping this in failed integration tests. + for record in records: + record.pop("fc_widget_collaboration", None) + yield from records + + +class OpenDeals(Deals): + """ + API docs: https://developers.freshworks.com/crm/api/#deals + """ + + filter_name = "Open Deals" + + +class WonDeals(Deals): + """ + API docs: https://developers.freshworks.com/crm/api/#deals + """ + + filter_name = "Won Deals" + + +class LostDeals(Deals): + """ + API docs: https://developers.freshworks.com/crm/api/#deals + """ + + filter_name = "Lost Deals" + + +class OpenTasks(FreshsalesStream): + """ + API docs: https://developers.freshworks.com/crm/api/#tasks + """ + + object_name = "tasks" + filter_value = "open" + + def path( + self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None + ) -> str: + return f"{self.object_name}" + + def request_params( + self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, any] = None, next_page_token: Mapping[str, Any] = None + ) -> MutableMapping[str, Any]: + params = super().request_params(stream_state, stream_slice=stream_slice, next_page_token=next_page_token) + params["filter"] = self.filter_value + return params + + +class CompletedTasks(FreshsalesStream): + """ + API docs: https://developers.freshworks.com/crm/api/#tasks + """ + + object_name = "tasks" + filter_value = "completed" + + def path( + self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None + ) -> str: + return f"{self.object_name}" + + def request_params( + self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, any] = None, next_page_token: Mapping[str, Any] = None + ) -> MutableMapping[str, Any]: + params = super().request_params(stream_state, stream_slice=stream_slice, next_page_token=next_page_token) + params["filter"] = self.filter_value + return params + + +class PastAppointments(FreshsalesStream): + """ + API docs: https://developers.freshworks.com/crm/api/#appointments + """ + + object_name = "appointments" + filter_value = "past" + + def path( + self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None + ) -> str: + return f"{self.object_name}" + + def request_params( + self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, any] = None, next_page_token: Mapping[str, Any] = None + ) -> MutableMapping[str, Any]: + params = super().request_params(stream_state, stream_slice=stream_slice, next_page_token=next_page_token) + params["filter"] = self.filter_value + return params + + +class UpcomingAppointments(FreshsalesStream): + """ + API docs: https://developers.freshworks.com/crm/api/#appointments + """ + + object_name = "appointments" + filter_value = "upcoming" + + def path( + self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None + ) -> str: + return f"{self.object_name}" + + def request_params( + self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, any] = None, next_page_token: Mapping[str, Any] = None + ) -> MutableMapping[str, Any]: + params = super().request_params(stream_state, stream_slice=stream_slice, next_page_token=next_page_token) + params["filter"] = self.filter_value + return params + + +# Source +class SourceFreshsales(AbstractSource): + def check_connection(self, logger, config) -> Tuple[bool, any]: + auth = TokenAuthenticator(token=f'token={config["api_key"]}', auth_method="Token").get_auth_header() + url = f'https://{config["domain_name"]}/crm/sales/api/contacts/filters' + try: + session = requests.get(url, headers=auth) + session.raise_for_status() + return True, None + except requests.exceptions.RequestException as e: + return False, e + + def streams(self, config: Mapping[str, Any]) -> List[Stream]: + auth = TokenAuthenticator(token=f'token={config["api_key"]}', auth_method="Token") + args = {"authenticator": auth, "domain_name": config["domain_name"]} + return [ + Contacts(**args), + Accounts(**args), + OpenDeals(**args), + WonDeals(**args), + LostDeals(**args), + OpenTasks(**args), + CompletedTasks(**args), + PastAppointments(**args), + UpcomingAppointments(**args), + ] diff --git a/airbyte-integrations/connectors/source-freshsales/source_freshsales/spec.json b/airbyte-integrations/connectors/source-freshsales/source_freshsales/spec.json new file mode 100644 index 0000000000000..f4155198bc275 --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/source_freshsales/spec.json @@ -0,0 +1,22 @@ +{ + "documentationUrl": "https://docsurl.com", + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Freshsales Spec", + "type": "object", + "required": ["domain_name", "api_key"], + "additionalProperties": false, + "properties": { + "domain_name": { + "type": "string", + "description": "Freshsales domain", + "examples": ["mydomain.myfreshworks.com"] + }, + "api_key": { + "type": "string", + "description": "Your API Access Key. See here. The key is case sensitive.", + "airbyte_secret": true + } + } + } +} diff --git a/airbyte-integrations/connectors/source-freshsales/unit_tests/__init__.py b/airbyte-integrations/connectors/source-freshsales/unit_tests/__init__.py new file mode 100644 index 0000000000000..46b7376756ec6 --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/unit_tests/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# diff --git a/airbyte-integrations/connectors/source-freshsales/unit_tests/conftest.py b/airbyte-integrations/connectors/source-freshsales/unit_tests/conftest.py new file mode 100644 index 0000000000000..d03c2820311d0 --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/unit_tests/conftest.py @@ -0,0 +1,13 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + +import json + +import pytest + + +@pytest.fixture(scope="session", name="config") +def config_fixture(): + with open("secrets/config.json", "r") as config_file: + return json.load(config_file) diff --git a/airbyte-integrations/connectors/source-freshsales/unit_tests/test_source.py b/airbyte-integrations/connectors/source-freshsales/unit_tests/test_source.py new file mode 100644 index 0000000000000..132f3c417ad17 --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/unit_tests/test_source.py @@ -0,0 +1,21 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + +from unittest.mock import MagicMock + +from source_freshsales.source import SourceFreshsales + + +def test_check_connection(mocker, config): + source = SourceFreshsales() + logger_mock = MagicMock() + assert source.check_connection(logger_mock, config) == (True, None) + + +def test_count_streams(mocker): + source = SourceFreshsales() + config_mock = MagicMock() + streams = source.streams(config_mock) + expected_streams_number = 9 + assert len(streams) == expected_streams_number diff --git a/airbyte-integrations/connectors/source-google-directory/.dockerignore b/airbyte-integrations/connectors/source-google-directory/.dockerignore index 99638fb7b66de..543110c087f09 100644 --- a/airbyte-integrations/connectors/source-google-directory/.dockerignore +++ b/airbyte-integrations/connectors/source-google-directory/.dockerignore @@ -1,6 +1,6 @@ * !Dockerfile -!Dockerfile.test +!main.py !source_google_directory !setup.py !secrets diff --git a/airbyte-integrations/connectors/source-google-directory/Dockerfile b/airbyte-integrations/connectors/source-google-directory/Dockerfile index 65af5d8719f9a..cc6e9f2f1a690 100644 --- a/airbyte-integrations/connectors/source-google-directory/Dockerfile +++ b/airbyte-integrations/connectors/source-google-directory/Dockerfile @@ -1,18 +1,38 @@ -FROM airbyte/integration-base-python:0.1.1 +FROM python:3.7.11-alpine3.14 as base -# Bash is installed for more convenient debugging. -RUN apt-get update && apt-get install -y bash && rm -rf /var/lib/apt/lists/* +# build and load all requirements +FROM base as builder +WORKDIR /airbyte/integration_code + +# upgrade pip to the latest version +RUN apk --no-cache upgrade \ + && pip install --upgrade pip \ + && apk --no-cache add tzdata build-base -ENV CODE_PATH="source_google_directory" -ENV AIRBYTE_IMPL_MODULE="source_google_directory" -ENV AIRBYTE_IMPL_PATH="SourceGoogleDirectory" -WORKDIR /airbyte/integration_code -COPY $CODE_PATH ./$CODE_PATH COPY setup.py ./ -RUN pip install ".[main]" +# install necessary packages to a temporary folder +RUN pip install --prefix=/install . + +# build a clean environment +FROM base +WORKDIR /airbyte/integration_code + +# copy all loaded and built libraries to a pure basic image +COPY --from=builder /install /usr/local +# add default timezone settings +COPY --from=builder /usr/share/zoneinfo/Etc/UTC /etc/localtime +RUN echo "Etc/UTC" > /etc/timezone + +# bash is installed for more convenient debugging. +RUN apk --no-cache add bash + +# copy payload code only +COPY main.py ./ +COPY source_google_directory ./source_google_directory -ENV AIRBYTE_ENTRYPOINT "/airbyte/base.sh" +ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" +ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.5 +LABEL io.airbyte.version=0.1.8 LABEL io.airbyte.name=airbyte/source-google-directory diff --git a/airbyte-integrations/connectors/source-google-directory/acceptance-test-config.yml b/airbyte-integrations/connectors/source-google-directory/acceptance-test-config.yml new file mode 100644 index 0000000000000..8d3a0596d25d1 --- /dev/null +++ b/airbyte-integrations/connectors/source-google-directory/acceptance-test-config.yml @@ -0,0 +1,28 @@ +# See [Source Acceptance Tests](https://docs.airbyte.io/connector-development/testing-connectors/source-acceptance-tests-reference) +# for more information about how to configure these tests +connector_image: airbyte/source-google-directory:dev +tests: + spec: + - spec_path: "source_google_directory/spec.json" + connection: + - config_path: "secrets/config.json" + status: "succeed" + - config_path: "secrets/config_oauth.json" + status: "succeed" + - config_path: "integration_tests/invalid_config.json" + status: "failed" + - config_path: "integration_tests/invalid_config_oauth.json" + status: "failed" + discovery: + - config_path: "secrets/config.json" + basic_read: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" + - config_path: "secrets/config_oauth.json" + configured_catalog_path: "integration_tests/configured_catalog.json" + full_refresh: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" + # API returns different lastLoginTime for some users, eteg is generated based on all data, so also sometime are different + ignored_fields: + "users": ["etag", "lastLoginTime"] diff --git a/airbyte-integrations/connectors/source-google-directory/acceptance-test-docker.sh b/airbyte-integrations/connectors/source-google-directory/acceptance-test-docker.sh new file mode 100644 index 0000000000000..e4d8b1cef8961 --- /dev/null +++ b/airbyte-integrations/connectors/source-google-directory/acceptance-test-docker.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env sh + +# Build latest connector image +docker build . -t $(cat acceptance-test-config.yml | grep "connector_image" | head -n 1 | cut -d: -f2) + +# Pull latest acctest image +docker pull airbyte/source-acceptance-test:latest + +# Run +docker run --rm -it \ + -v /var/run/docker.sock:/var/run/docker.sock \ + -v /tmp:/tmp \ + -v $(pwd):/test_input \ + airbyte/source-acceptance-test \ + --acceptance-test-config /test_input + diff --git a/airbyte-integrations/connectors/source-google-directory/build.gradle b/airbyte-integrations/connectors/source-google-directory/build.gradle index d5273dc9cd9b1..cc749b14afc54 100644 --- a/airbyte-integrations/connectors/source-google-directory/build.gradle +++ b/airbyte-integrations/connectors/source-google-directory/build.gradle @@ -1,22 +1,9 @@ plugins { id 'airbyte-python' id 'airbyte-docker' - id 'airbyte-standard-source-test-file' + id 'airbyte-source-acceptance-test' } airbytePython { moduleDirectory 'source_google_directory' } - -airbyteStandardSourceTestFile { - // For more information on standard source tests, see https://docs.airbyte.io/connector-development/testing-connectors - specPath = "source_google_directory/spec.json" - configPath = "secrets/config.json" - configuredCatalogPath = "sample_files/configured_catalog.json" -} - - -dependencies { - implementation files(project(':airbyte-integrations:bases:base-standard-source-test-file').airbyteDocker.outputs) - implementation files(project(':airbyte-integrations:bases:base-python').airbyteDocker.outputs) -} diff --git a/airbyte-integrations/connectors/source-google-directory/integration_tests/__init__.py b/airbyte-integrations/connectors/source-google-directory/integration_tests/__init__.py new file mode 100644 index 0000000000000..46b7376756ec6 --- /dev/null +++ b/airbyte-integrations/connectors/source-google-directory/integration_tests/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# diff --git a/airbyte-integrations/connectors/source-google-directory/integration_tests/acceptance.py b/airbyte-integrations/connectors/source-google-directory/integration_tests/acceptance.py new file mode 100644 index 0000000000000..108075487440f --- /dev/null +++ b/airbyte-integrations/connectors/source-google-directory/integration_tests/acceptance.py @@ -0,0 +1,14 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +import pytest + +pytest_plugins = ("source_acceptance_test.plugin",) + + +@pytest.fixture(scope="session", autouse=True) +def connector_setup(): + """ This fixture is a placeholder for external resources that acceptance test might require.""" + yield diff --git a/airbyte-integrations/connectors/source-google-directory/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/source-google-directory/integration_tests/configured_catalog.json new file mode 100644 index 0000000000000..d8a60dea13653 --- /dev/null +++ b/airbyte-integrations/connectors/source-google-directory/integration_tests/configured_catalog.json @@ -0,0 +1,34 @@ +{ + "streams": [ + { + "stream": { + "name": "users", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_cursor": false + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "groups", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_cursor": false + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "group_members", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_cursor": false + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + } + ] +} diff --git a/airbyte-integrations/connectors/source-google-directory/integration_tests/invalid_config.json b/airbyte-integrations/connectors/source-google-directory/integration_tests/invalid_config.json new file mode 100644 index 0000000000000..2135fa0a5cb24 --- /dev/null +++ b/airbyte-integrations/connectors/source-google-directory/integration_tests/invalid_config.json @@ -0,0 +1,4 @@ +{ + "credentials_json": "{}", + "email": "test@test.test" +} diff --git a/airbyte-integrations/connectors/source-google-directory/integration_tests/invalid_config_oauth.json b/airbyte-integrations/connectors/source-google-directory/integration_tests/invalid_config_oauth.json new file mode 100644 index 0000000000000..ade2aa38d0116 --- /dev/null +++ b/airbyte-integrations/connectors/source-google-directory/integration_tests/invalid_config_oauth.json @@ -0,0 +1,7 @@ +{ + "credentials": { + "client_id": "", + "client_secret": "", + "refresh_token": "" + } +} diff --git a/airbyte-integrations/connectors/source-google-directory/integration_tests/sample_config.json b/airbyte-integrations/connectors/source-google-directory/integration_tests/sample_config.json new file mode 100644 index 0000000000000..229ddbd5e6043 --- /dev/null +++ b/airbyte-integrations/connectors/source-google-directory/integration_tests/sample_config.json @@ -0,0 +1,6 @@ +{ + "credentials": { + "credentials_json": "", + "email": "test@test.test" + } +} diff --git a/airbyte-integrations/connectors/source-google-directory/integration_tests/sample_config_oauth.json b/airbyte-integrations/connectors/source-google-directory/integration_tests/sample_config_oauth.json new file mode 100644 index 0000000000000..baca9caa3c4c2 --- /dev/null +++ b/airbyte-integrations/connectors/source-google-directory/integration_tests/sample_config_oauth.json @@ -0,0 +1,7 @@ +{ + "credentials": { + "client_id": "", + "client_secret": "", + "refresh_token": "" + } +} diff --git a/airbyte-integrations/connectors/source-google-directory/main_dev.py b/airbyte-integrations/connectors/source-google-directory/main.py similarity index 84% rename from airbyte-integrations/connectors/source-google-directory/main_dev.py rename to airbyte-integrations/connectors/source-google-directory/main.py index bd3852b046f60..b8f3ffa6715cb 100644 --- a/airbyte-integrations/connectors/source-google-directory/main_dev.py +++ b/airbyte-integrations/connectors/source-google-directory/main.py @@ -5,7 +5,7 @@ import sys -from base_python.entrypoint import launch +from airbyte_cdk.entrypoint import launch from source_google_directory import SourceGoogleDirectory if __name__ == "__main__": diff --git a/airbyte-integrations/connectors/source-google-directory/requirements.txt b/airbyte-integrations/connectors/source-google-directory/requirements.txt index 76af767f3755a..0411042aa0911 100644 --- a/airbyte-integrations/connectors/source-google-directory/requirements.txt +++ b/airbyte-integrations/connectors/source-google-directory/requirements.txt @@ -1,4 +1,2 @@ --e ../../bases/airbyte-protocol --e ../../bases/base-python --e ../../bases/base-python-test +-e ../../bases/source-acceptance-test -e . diff --git a/airbyte-integrations/connectors/source-google-directory/setup.py b/airbyte-integrations/connectors/source-google-directory/setup.py index b2b989d84ed13..31545b105515f 100644 --- a/airbyte-integrations/connectors/source-google-directory/setup.py +++ b/airbyte-integrations/connectors/source-google-directory/setup.py @@ -5,27 +5,29 @@ from setuptools import find_packages, setup +MAIN_REQUIREMENTS = [ + "airbyte-cdk~=0.1", + "google-api-python-client==1.12.8", + "google-auth-httplib2==0.0.4", + "google-auth-oauthlib==0.4.2", + "backoff==1.10.0", +] + +TEST_REQUIREMENTS = [ + "pytest~=6.1", + "pytest-mock~=3.6.1", + "source-acceptance-test", +] + setup( name="source_google_directory", description="Source implementation for Google Directory.", author="Airbyte", author_email="contact@airbyte.io", packages=find_packages(), - install_requires=[ - "airbyte-protocol", - "base-python", - "google-api-python-client==1.12.8", - "google-auth-httplib2==0.0.4", - "google-auth-oauthlib==0.4.2", - "backoff==1.10.0", - ], - package_data={"": ["*.json", "schemas/*.json"]}, - setup_requires=["pytest-runner"], - tests_require=["pytest"], + install_requires=MAIN_REQUIREMENTS, + package_data={"": ["*.json", "schemas/*.json", "schemas/shared/*.json"]}, extras_require={ - # Dependencies required by the main package but not integration tests should go in main. Deps required by - # integration tests but not the main package go in tests. Deps required by both should go in - # install_requires. - "tests": ["airbyte-python-test", "pytest"], + "tests": TEST_REQUIREMENTS, }, ) diff --git a/airbyte-integrations/connectors/source-google-directory/source_google_directory/api.py b/airbyte-integrations/connectors/source-google-directory/source_google_directory/api.py index 4eaee6953a818..f3a5ba0ab4154 100644 --- a/airbyte-integrations/connectors/source-google-directory/source_google_directory/api.py +++ b/airbyte-integrations/connectors/source-google-directory/source_google_directory/api.py @@ -6,11 +6,13 @@ import json from abc import ABC, abstractmethod from functools import partial -from typing import Callable, Dict, Iterator, Sequence +from typing import Any, Callable, Dict, Iterator, Mapping, Sequence import backoff +from google.auth.transport.requests import Request from google.oauth2 import service_account -from googleapiclient.discovery import Resource, build +from google.oauth2.credentials import Credentials +from googleapiclient.discovery import build from googleapiclient.errors import HttpError as GoogleApiHttpError from .utils import rate_limit_handling @@ -19,29 +21,51 @@ class API: - def __init__(self, credentials_json: str, email: str): + def __init__(self, credentials: Mapping[str, Any]): self._creds = None - self._credentials_json = credentials_json - self._admin_email = email + self._raw_credentials = credentials + self._service = None - def _load_account_info(self) -> Dict: - account_info = json.loads(self._credentials_json) + @staticmethod + def _load_account_info(credentials_json: str) -> Dict: + account_info = json.loads(credentials_json) return account_info - def _obtain_creds(self) -> service_account.Credentials: - account_info = self._load_account_info() + def _obtain_service_account_creds(self) -> service_account.Credentials: + """Obtaining creds based on Service account scenario""" + credentials_json = self._raw_credentials.get("credentials_json") + admin_email = self._raw_credentials.get("email") + account_info = self._load_account_info(credentials_json) creds = service_account.Credentials.from_service_account_info(account_info, scopes=SCOPES) - self._creds = creds.with_subject(self._admin_email) - - def _construct_resource(self) -> Resource: + self._creds = creds.with_subject(admin_email) + + def _obtain_web_app_creds(self) -> Credentials: + """Obtaining creds based on Web server application scenario""" + info = { + "client_id": self._raw_credentials.get("client_id"), + "client_secret": self._raw_credentials.get("client_secret"), + "refresh_token": self._raw_credentials.get("refresh_token"), + } + creds = Credentials.from_authorized_user_info(info) + if creds.expired: + creds.refresh(Request()) + self._creds = creds + + def _obtain_creds(self): + if "credentials_json" in self._raw_credentials: + self._obtain_service_account_creds() + elif "client_id" and "client_secret" in self._raw_credentials: + self._obtain_web_app_creds() + + def _construct_resource(self): if not self._creds: self._obtain_creds() - service = build("admin", "directory_v1", credentials=self._creds) - return service + if not self._service: + self._service = build("admin", "directory_v1", credentials=self._creds) def _get_resource(self, name: str): - service = self._construct_resource() - return getattr(service, name) + self._construct_resource() + return getattr(self._service, name) @backoff.on_exception(backoff.expo, GoogleApiHttpError, max_tries=7, giveup=rate_limit_handling) def get(self, name: str, params: Dict = None) -> Dict: diff --git a/airbyte-integrations/connectors/source-google-directory/source_google_directory/client.py b/airbyte-integrations/connectors/source-google-directory/source_google_directory/client.py index 2e5faa7cb9317..086e51c1077bb 100644 --- a/airbyte-integrations/connectors/source-google-directory/source_google_directory/client.py +++ b/airbyte-integrations/connectors/source-google-directory/source_google_directory/client.py @@ -5,14 +5,17 @@ from typing import Any, Mapping, Tuple -from base_python import BaseClient +from airbyte_cdk.sources.deprecated.client import BaseClient from .api import API, GroupMembersAPI, GroupsAPI, UsersAPI class Client(BaseClient): - def __init__(self, credentials_json: str, email: str): - self._api = API(credentials_json, email) + def __init__(self, credentials: Mapping[str, Any] = None, credentials_json: str = None, email: str = None): + # supporting old config format + if not credentials: + credentials = {"credentials_json": credentials_json, "email": email} + self._api = API(credentials) self._apis = {"users": UsersAPI(self._api), "groups": GroupsAPI(self._api), "group_members": GroupMembersAPI(self._api)} super().__init__() diff --git a/airbyte-integrations/connectors/source-google-directory/source_google_directory/schemas/group_members.json b/airbyte-integrations/connectors/source-google-directory/source_google_directory/schemas/group_members.json index bd52c904a89ea..1359c3cea555b 100644 --- a/airbyte-integrations/connectors/source-google-directory/source_google_directory/schemas/group_members.json +++ b/airbyte-integrations/connectors/source-google-directory/source_google_directory/schemas/group_members.json @@ -3,19 +3,19 @@ "type": "object", "properties": { "kind": { - "type": "string" + "type": ["null", "string"] }, "id": { - "type": "string" + "type": ["null", "string"] }, "email": { - "type": "string" + "type": ["null", "string"] }, "role": { - "type": "string" + "type": ["null", "string"] }, "type": { - "type": "string" + "type": ["null", "string"] } } } diff --git a/airbyte-integrations/connectors/source-google-directory/source_google_directory/schemas/groups.json b/airbyte-integrations/connectors/source-google-directory/source_google_directory/schemas/groups.json index cea499bdf2a28..09ef95af9dbed 100644 --- a/airbyte-integrations/connectors/source-google-directory/source_google_directory/schemas/groups.json +++ b/airbyte-integrations/connectors/source-google-directory/source_google_directory/schemas/groups.json @@ -3,28 +3,28 @@ "type": "object", "properties": { "kind": { - "type": "string" + "type": ["null", "string"] }, "id": { - "type": "string" + "type": ["null", "string"] }, "etag": { - "type": "string" + "type": ["null", "string"] }, "email": { - "type": "string" + "type": ["null", "string"] }, "name": { - "type": "string" + "type": ["null", "string"] }, "directMembersCount": { - "type": "string" + "type": ["null", "string"] }, "description": { - "type": "string" + "type": ["null", "string"] }, "adminCreated": { - "type": "boolean" + "type": ["null", "boolean"] } } } diff --git a/airbyte-integrations/connectors/source-google-directory/source_google_directory/schemas/users.json b/airbyte-integrations/connectors/source-google-directory/source_google_directory/schemas/users.json index a90e989c66032..3350edc42db65 100644 --- a/airbyte-integrations/connectors/source-google-directory/source_google_directory/schemas/users.json +++ b/airbyte-integrations/connectors/source-google-directory/source_google_directory/schemas/users.json @@ -3,189 +3,169 @@ "type": "object", "properties": { "kind": { - "type": "string" + "type": ["null", "string"] }, "id": { - "type": "string" + "type": ["null", "string"] }, "username": { - "type": "string" + "type": ["null", "string"] }, "name": { - "type": "object", + "type": ["null", "object"], "properties": { "givenName": { - "type": "string" + "type": ["null", "string"] }, "familyName": { - "type": "string" + "type": ["null", "string"] }, "fullName": { - "type": "string" + "type": ["null", "string"] } - }, - "required": ["givenName", "familyName", "fullName"] + } }, "isAdmin": { - "type": "boolean" + "type": ["null", "boolean"] }, "isDelegatedAdmin": { - "type": "boolean" + "type": ["null", "boolean"] }, "lastLoginTime": { - "type": "string" + "type": ["null", "string"] }, "creationTime": { - "type": "string" + "type": ["null", "string"] }, "agreedToTerms": { - "type": "boolean" + "type": ["null", "boolean"] }, "hashFunction": { - "type": "string" + "type": ["null", "string"] }, "suspended": { - "type": "boolean" + "type": ["null", "boolean"] }, "changePasswordAtNextLogin": { - "type": "boolean" + "type": ["null", "boolean"] }, "ipWhitelisted": { - "type": "boolean" + "type": ["null", "boolean"] }, "emails": { - "type": "array", - "items": [ - { - "type": "object", - "properties": { - "address": { - "type": "string" - }, - "type": { - "type": "string" - }, - "customType": { - "type": "string" - }, - "primary": { - "type": "boolean" - } + "type": ["null", "array"], + "items": { + "type": "object", + "properties": { + "address": { + "type": ["null", "string"] }, - "required": ["address", "type", "customType", "primary"] + "type": { + "type": ["null", "string"] + }, + "customType": { + "type": ["null", "string"] + }, + "primary": { + "type": ["null", "boolean"] + } } - ] + } }, "externalIds": { - "type": "array", - "items": [ - { - "type": "object", - "properties": { - "value": { - "type": "string" - }, - "type": { - "type": "string" - }, - "customType": { - "type": "string" - } + "type": ["null", "array"], + "items": { + "type": "object", + "properties": { + "value": { + "type": ["null", "string"] + }, + "type": { + "type": ["null", "string"] }, - "required": ["value", "type", "customType"] + "customType": { + "type": ["null", "string"] + } } - ] + } }, "relations": { - "type": "array", - "items": [ - { - "type": "object", - "properties": { - "value": { - "type": "string" - }, - "type": { - "type": "string" - }, - "customType": { - "type": "string" - } + "type": ["null", "array"], + "items": { + "type": "object", + "properties": { + "value": { + "type": ["null", "string"] }, - "required": ["value", "type", "customType"] + "type": { + "type": ["null", "string"] + }, + "customType": { + "type": ["null", "string"] + } } - ] + } }, "organizations": { - "type": "array", - "items": [ - { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "title": { - "type": "string" - }, - "primary": { - "type": "boolean" - }, - "customType": { - "type": "string" - }, - "description": { - "type": "string" - } + "type": ["null", "array"], + "items": { + "type": "object", + "properties": { + "name": { + "type": ["null", "string"] + }, + "title": { + "type": ["null", "string"] }, - "required": ["name", "title", "primary", "customType", "description"] + "primary": { + "type": ["null", "boolean"] + }, + "customType": { + "type": ["null", "string"] + }, + "description": { + "type": ["null", "string"] + } } - ] + } }, "phones": { - "type": "array", - "items": [ - { - "type": "object", - "properties": { - "value": { - "type": "string" - }, - "type": { - "type": "string" - } + "type": ["null", "array"], + "items": { + "type": "object", + "properties": { + "value": { + "type": ["null", "string"] }, - "required": ["value", "type"] + "type": { + "type": ["null", "string"] + } } - ] + } }, "aliases": { - "type": "array", - "items": [ - { - "type": "string" - } - ] + "type": ["null", "array"], + "items": { + "type": ["null", "string"] + } }, "nonEditableAliases": { - "type": "array", - "items": [ - { - "type": "string" - } - ] + "type": ["null", "array"], + "items": { + "type": ["null", "string"] + } }, "customerId": { - "type": "string" + "type": ["null", "string"] }, "orgUnitPath": { - "type": "string" + "type": ["null", "string"] }, "isMailboxSetup": { - "type": "boolean" + "type": ["null", "boolean"] }, "includeInGlobalAddressList": { - "type": "boolean" + "type": ["null", "boolean"] } } } diff --git a/airbyte-integrations/connectors/source-google-directory/source_google_directory/source.py b/airbyte-integrations/connectors/source-google-directory/source_google_directory/source.py index 13fbb18ed93d2..98762996cd345 100644 --- a/airbyte-integrations/connectors/source-google-directory/source_google_directory/source.py +++ b/airbyte-integrations/connectors/source-google-directory/source_google_directory/source.py @@ -3,7 +3,7 @@ # -from base_python import BaseSource +from airbyte_cdk.sources.deprecated.base_source import BaseSource from .client import Client diff --git a/airbyte-integrations/connectors/source-google-directory/source_google_directory/spec.json b/airbyte-integrations/connectors/source-google-directory/source_google_directory/spec.json index c848cdde6dae0..5b59a13256374 100644 --- a/airbyte-integrations/connectors/source-google-directory/source_google_directory/spec.json +++ b/airbyte-integrations/connectors/source-google-directory/source_google_directory/spec.json @@ -4,18 +4,87 @@ "$schema": "http://json-schema.org/draft-07/schema#", "title": "Google Directory Spec", "type": "object", - "required": ["credentials_json", "email"], - "additionalProperties": false, + "required": [], + "additionalProperties": true, "properties": { - "credentials_json": { - "type": "string", - "description": "The contents of the JSON service account key. See the docs for more information on how to generate this key.", - "airbyte_secret": true - }, - "email": { - "type": "string", - "description": "The email of the user, which has permissions to access the Google Workspace Admin APIs." + "credentials": { + "title": "Google Credentials", + "description": "Google APIs use the OAuth 2.0 protocol for authentication and authorization. The Source supports Web server application and Service accounts scenarios", + "type": "object", + "oneOf": [ + { + "title": "Sign in via Google (Oauth)", + "description": "For these scenario user only needs to give permission to read Google Directory data", + "type": "object", + "required": ["client_id", "client_secret", "refresh_token"], + "properties": { + "credentials_title": { + "type": "string", + "title": "Credentials title", + "description": "Authentication scenario", + "const": "Web server app", + "enum": ["Web server app"], + "default": "Web server app", + "order": 0 + }, + "client_id": { + "title": "Client ID", + "type": "string", + "description": "The client ID of developer application", + "airbyte_secret": true + }, + "client_secret": { + "title": "Client secret", + "type": "string", + "description": "The client secret of developer application", + "airbyte_secret": true + }, + "refresh_token": { + "title": "Refresh Token", + "type": "string", + "description": "The token for obtaining new access token", + "airbyte_secret": true + } + } + }, + { + "title": "Service account Key", + "description": "For these scenario user should obtain service account's credentials from the Google API Console and provide delegated email", + "type": "object", + "required": ["credentials_json", "email"], + "properties": { + "credentials_title": { + "type": "string", + "title": "Credentials title", + "description": "Authentication scenario", + "const": "Service accounts", + "enum": ["Service accounts"], + "default": "Service accounts", + "order": 0 + }, + "credentials_json": { + "type": "string", + "title": "Credentials JSON", + "description": "The contents of the JSON service account key. See the docs for more information on how to generate this key.", + "airbyte_secret": true + }, + "email": { + "type": "string", + "title": "Email", + "description": "The email of the user, which has permissions to access the Google Workspace Admin APIs." + } + } + } + ] } } + }, + "authSpecification": { + "auth_type": "oauth2.0", + "oauth2Specification": { + "rootObject": ["credentials", 0], + "oauthFlowInitParameters": [["client_id"], ["client_secret"]], + "oauthFlowOutputParameters": [["refresh_token"]] + } } } diff --git a/airbyte-integrations/connectors/source-greenhouse/Dockerfile b/airbyte-integrations/connectors/source-greenhouse/Dockerfile index 7a8111abed8c5..267a7cdc85dc6 100644 --- a/airbyte-integrations/connectors/source-greenhouse/Dockerfile +++ b/airbyte-integrations/connectors/source-greenhouse/Dockerfile @@ -12,5 +12,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.2.5 +LABEL io.airbyte.version=0.2.6 LABEL io.airbyte.name=airbyte/source-greenhouse diff --git a/airbyte-integrations/connectors/source-greenhouse/acceptance-test-config.yml b/airbyte-integrations/connectors/source-greenhouse/acceptance-test-config.yml index e590b104696cf..3c111edb3767e 100644 --- a/airbyte-integrations/connectors/source-greenhouse/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-greenhouse/acceptance-test-config.yml @@ -16,8 +16,7 @@ tests: - config_path: "secrets/config_users_only.json" basic_read: - config_path: "secrets/config.json" - # TODO: replace with configured_catalog.json when https://github.com/airbytehq/airbyte/issues/6546 is resolved - configured_catalog_path: "integration_tests/configured_catalog_no_demographics.json" + configured_catalog_path: "integration_tests/configured_catalog.json" - config_path: "secrets/config.json" configured_catalog_path: "integration_tests/configured_catalog_users_only.json" full_refresh: diff --git a/airbyte-integrations/connectors/source-greenhouse/integration_tests/configured_catalog_const_records.json b/airbyte-integrations/connectors/source-greenhouse/integration_tests/configured_catalog_const_records.json index d73400945f18e..ab9dfc20ec80e 100644 --- a/airbyte-integrations/connectors/source-greenhouse/integration_tests/configured_catalog_const_records.json +++ b/airbyte-integrations/connectors/source-greenhouse/integration_tests/configured_catalog_const_records.json @@ -152,6 +152,69 @@ }, "sync_mode": "full_refresh", "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "demographics_question_sets", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "demographics_questions", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "demographics_answer_options", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "demographics_answers", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "applications_demographics_answers", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "demographics_question_sets_questions", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "demographics_answers_answer_options", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" } ] } diff --git a/airbyte-integrations/connectors/source-greenhouse/integration_tests/configured_catalog_no_demographics.json b/airbyte-integrations/connectors/source-greenhouse/integration_tests/configured_catalog_no_demographics.json deleted file mode 100644 index ee4f6c3b296ea..0000000000000 --- a/airbyte-integrations/connectors/source-greenhouse/integration_tests/configured_catalog_no_demographics.json +++ /dev/null @@ -1,177 +0,0 @@ -{ - "streams": [ - { - "stream": { - "name": "applications", - "json_schema": {}, - "supported_sync_modes": ["full_refresh"], - "source_defined_cursor": false - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" - }, - { - "stream": { - "name": "candidates", - "json_schema": {}, - "supported_sync_modes": ["full_refresh"], - "source_defined_cursor": false - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" - }, - { - "stream": { - "name": "close_reasons", - "json_schema": {}, - "supported_sync_modes": ["full_refresh"], - "source_defined_cursor": false - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" - }, - { - "stream": { - "name": "degrees", - "json_schema": {}, - "supported_sync_modes": ["full_refresh"], - "source_defined_cursor": false - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" - }, - { - "stream": { - "name": "departments", - "json_schema": {}, - "supported_sync_modes": ["full_refresh"], - "source_defined_cursor": false - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" - }, - { - "stream": { - "name": "job_posts", - "json_schema": {}, - "supported_sync_modes": ["full_refresh"], - "source_defined_cursor": false - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" - }, - { - "stream": { - "name": "jobs", - "json_schema": {}, - "supported_sync_modes": ["full_refresh"], - "source_defined_cursor": false - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" - }, - { - "stream": { - "name": "offers", - "json_schema": {}, - "supported_sync_modes": ["full_refresh"], - "source_defined_cursor": false - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" - }, - { - "stream": { - "name": "scorecards", - "json_schema": {}, - "supported_sync_modes": ["full_refresh"], - "source_defined_cursor": false - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" - }, - { - "stream": { - "name": "users", - "json_schema": {}, - "supported_sync_modes": ["full_refresh"], - "source_defined_cursor": false - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" - }, - { - "stream": { - "name": "custom_fields", - "json_schema": {}, - "supported_sync_modes": ["full_refresh"], - "source_defined_cursor": false - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" - }, - { - "stream": { - "name": "interviews", - "json_schema": {}, - "supported_sync_modes": ["full_refresh"] - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" - }, - { - "stream": { - "name": "applications_interviews", - "json_schema": {}, - "supported_sync_modes": ["full_refresh"] - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" - }, - { - "stream": { - "name": "sources", - "json_schema": {}, - "supported_sync_modes": ["full_refresh"] - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" - }, - { - "stream": { - "name": "rejection_reasons", - "json_schema": {}, - "supported_sync_modes": ["full_refresh"] - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" - }, - { - "stream": { - "name": "jobs_openings", - "json_schema": {}, - "supported_sync_modes": ["full_refresh"] - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" - }, - { - "stream": { - "name": "job_stages", - "json_schema": {}, - "supported_sync_modes": ["full_refresh"] - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" - }, - { - "stream": { - "name": "jobs_stages", - "json_schema": {}, - "supported_sync_modes": ["full_refresh"] - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" - } - ] -} diff --git a/airbyte-integrations/connectors/source-greenhouse/source_greenhouse/source.py b/airbyte-integrations/connectors/source-greenhouse/source_greenhouse/source.py index 7496bc0a5db11..9f54797433930 100644 --- a/airbyte-integrations/connectors/source-greenhouse/source_greenhouse/source.py +++ b/airbyte-integrations/connectors/source-greenhouse/source_greenhouse/source.py @@ -11,11 +11,18 @@ from requests.auth import HTTPBasicAuth from source_greenhouse.streams import ( Applications, + ApplicationsDemographicsAnswers, ApplicationsInterviews, Candidates, CloseReasons, CustomFields, Degrees, + DemographicsAnswerOptions, + DemographicsAnswers, + DemographicsAnswersAnswerOptions, + DemographicsQuestions, + DemographicsQuestionSets, + DemographicsQuestionSetsQuestions, Departments, Interviews, JobPosts, @@ -62,6 +69,13 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: Scorecards(authenticator=auth), Sources(authenticator=auth), Users(authenticator=auth), + ApplicationsDemographicsAnswers(authenticator=auth), + DemographicsAnswers(authenticator=auth), + DemographicsAnswerOptions(authenticator=auth), + DemographicsQuestions(authenticator=auth), + DemographicsAnswersAnswerOptions(authenticator=auth), + DemographicsQuestionSets(authenticator=auth), + DemographicsQuestionSetsQuestions(authenticator=auth), ] return streams diff --git a/airbyte-integrations/connectors/source-greenhouse/source_greenhouse/streams.py b/airbyte-integrations/connectors/source-greenhouse/source_greenhouse/streams.py index 49a3a9dff42a9..46f5112eea497 100644 --- a/airbyte-integrations/connectors/source-greenhouse/source_greenhouse/streams.py +++ b/airbyte-integrations/connectors/source-greenhouse/source_greenhouse/streams.py @@ -73,13 +73,13 @@ class Applications(GreenhouseStream): """ -class ApplicationsDemographicsAnswers(GreenhouseStream): +class ApplicationsDemographicsAnswers(GreenhouseSubStream, GreenhouseStream): """ Docs: https://developers.greenhouse.io/harvest.html#get-list-demographic-answers """ - def path(self, **kwargs) -> str: - return "demographics/answers" + parent_stream = Applications + path_template = "applications/{parent_id}/demographics/answers" class ApplicationsInterviews(GreenhouseSubStream, GreenhouseStream): @@ -115,59 +115,58 @@ class Degrees(GreenhouseStream): """ -# TODO: uncomment when https://github.com/airbytehq/airbyte/issues/6546 is resolved -# class DemographicsAnswers(GreenhouseStream): -# """ -# Docs: https://developers.greenhouse.io/harvest.html#get-list-demographic-answers -# """ -# -# def path(self, **kwargs) -> str: -# return "demographics/answers" -# -# -# class DemographicsAnswerOptions(GreenhouseStream): -# """ -# Docs: https://developers.greenhouse.io/harvest.html#get-list-demographic-answer-options -# """ -# -# def path(self, **kwargs) -> str: -# return "demographics/answer_options" -# -# -# class DemographicsQuestions(GreenhouseStream): -# """ -# Docs: https://developers.greenhouse.io/harvest.html#get-list-demographic-questions -# """ -# -# def path(self, **kwargs) -> str: -# return "demographics/questions" -# -# -# class DemographicsAnswersAnswerOptions(GreenhouseSubStream, GreenhouseStream): -# """ -# Docs: https://developers.greenhouse.io/harvest.html#get-list-demographic-answer-options-for-demographic-question -# """ -# -# parent_stream = DemographicsQuestions -# path_template = "demographics/questions/{parent_id}/answer_options" -# -# -# class DemographicsQuestionSets(GreenhouseStream): -# """ -# Docs: https://developers.greenhouse.io/harvest.html#get-list-demographic-question-sets -# """ -# -# def path(self, **kwargs) -> str: -# return "demographics/question_sets" -# -# -# class DemographicsQuestionSetsQuestions(GreenhouseSubStream, GreenhouseStream): -# """ -# Docs: https://developers.greenhouse.io/harvest.html#get-list-demographic-questions-for-demographic-question-set -# """ -# -# parent_stream = DemographicsQuestionSets -# path_template = "demographics/question_sets/{parent_id}/questions" +class DemographicsAnswers(GreenhouseStream): + """ + Docs: https://developers.greenhouse.io/harvest.html#get-list-demographic-answers + """ + + def path(self, **kwargs) -> str: + return "demographics/answers" + + +class DemographicsAnswerOptions(GreenhouseStream): + """ + Docs: https://developers.greenhouse.io/harvest.html#get-list-demographic-answer-options + """ + + def path(self, **kwargs) -> str: + return "demographics/answer_options" + + +class DemographicsQuestions(GreenhouseStream): + """ + Docs: https://developers.greenhouse.io/harvest.html#get-list-demographic-questions + """ + + def path(self, **kwargs) -> str: + return "demographics/questions" + + +class DemographicsAnswersAnswerOptions(GreenhouseSubStream, GreenhouseStream): + """ + Docs: https://developers.greenhouse.io/harvest.html#get-list-demographic-answer-options-for-demographic-question + """ + + parent_stream = DemographicsQuestions + path_template = "demographics/questions/{parent_id}/answer_options" + + +class DemographicsQuestionSets(GreenhouseStream): + """ + Docs: https://developers.greenhouse.io/harvest.html#get-list-demographic-question-sets + """ + + def path(self, **kwargs) -> str: + return "demographics/question_sets" + + +class DemographicsQuestionSetsQuestions(GreenhouseSubStream, GreenhouseStream): + """ + Docs: https://developers.greenhouse.io/harvest.html#get-list-demographic-questions-for-demographic-question-set + """ + + parent_stream = DemographicsQuestionSets + path_template = "demographics/question_sets/{parent_id}/questions" class Departments(GreenhouseStream): diff --git a/airbyte-integrations/connectors/source-hubspot/.dockerignore b/airbyte-integrations/connectors/source-hubspot/.dockerignore index 461b1bb7ee9e3..85586eba85c52 100644 --- a/airbyte-integrations/connectors/source-hubspot/.dockerignore +++ b/airbyte-integrations/connectors/source-hubspot/.dockerignore @@ -1,8 +1,6 @@ * !Dockerfile -!Dockerfile.test +!main.py !source_hubspot !setup.py !secrets -!acceptance-test-config.yml -!acceptance-test.sh diff --git a/airbyte-integrations/connectors/source-hubspot/.gitignore b/airbyte-integrations/connectors/source-hubspot/.gitignore deleted file mode 100644 index 29fffc6a50cc9..0000000000000 --- a/airbyte-integrations/connectors/source-hubspot/.gitignore +++ /dev/null @@ -1 +0,0 @@ -NEW_SOURCE_CHECKLIST.md diff --git a/airbyte-integrations/connectors/source-hubspot/Dockerfile b/airbyte-integrations/connectors/source-hubspot/Dockerfile index e2313033cfb08..5acc4f5ea5285 100644 --- a/airbyte-integrations/connectors/source-hubspot/Dockerfile +++ b/airbyte-integrations/connectors/source-hubspot/Dockerfile @@ -1,18 +1,38 @@ -FROM airbyte/integration-base-python:0.1.1 +FROM python:3.7.11-alpine3.14 as base -# Bash is installed for more convenient debugging. -RUN apt-get update && apt-get install -y bash && rm -rf /var/lib/apt/lists/* +# build and load all requirements +FROM base as builder +WORKDIR /airbyte/integration_code -ENV CODE_PATH="source_hubspot" -ENV AIRBYTE_IMPL_MODULE="source_hubspot" -ENV AIRBYTE_IMPL_PATH="SourceHubspot" +# upgrade pip to the latest version +RUN apk --no-cache upgrade \ + && pip install --upgrade pip \ + && apk --no-cache add tzdata build-base -WORKDIR /airbyte/integration_code -COPY $CODE_PATH ./$CODE_PATH COPY setup.py ./ -RUN pip install . -ENV AIRBYTE_ENTRYPOINT "/airbyte/base.sh" +# install necessary packages to a temporary folder +RUN pip install --prefix=/install . + +# build a clean environment +FROM base +WORKDIR /airbyte/integration_code + +# copy all loaded and built libraries to a pure basic image +COPY --from=builder /install /usr/local +# add default timezone settings +COPY --from=builder /usr/share/zoneinfo/Etc/UTC /etc/localtime +RUN echo "Etc/UTC" > /etc/timezone + +# bash is installed for more convenient debugging. +RUN apk --no-cache add bash + +# copy payload code only +COPY main.py ./ +COPY source_hubspot ./source_hubspot + +ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" +ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.21 +LABEL io.airbyte.version=0.1.23 LABEL io.airbyte.name=airbyte/source-hubspot diff --git a/airbyte-integrations/connectors/source-hubspot/build.gradle b/airbyte-integrations/connectors/source-hubspot/build.gradle index bdea51cec8a89..259bffb74a610 100644 --- a/airbyte-integrations/connectors/source-hubspot/build.gradle +++ b/airbyte-integrations/connectors/source-hubspot/build.gradle @@ -7,8 +7,3 @@ plugins { airbytePython { moduleDirectory 'source_hubspot' } - -dependencies { - implementation files(project(':airbyte-integrations:bases:source-acceptance-test').airbyteDocker.outputs) - implementation files(project(':airbyte-integrations:bases:base-python').airbyteDocker.outputs) -} diff --git a/airbyte-integrations/connectors/source-hubspot/main_dev.py b/airbyte-integrations/connectors/source-hubspot/main.py similarity index 83% rename from airbyte-integrations/connectors/source-hubspot/main_dev.py rename to airbyte-integrations/connectors/source-hubspot/main.py index 6649d86ac6627..2d902157fd450 100644 --- a/airbyte-integrations/connectors/source-hubspot/main_dev.py +++ b/airbyte-integrations/connectors/source-hubspot/main.py @@ -5,7 +5,7 @@ import sys -from base_python.entrypoint import launch +from airbyte_cdk.entrypoint import launch from source_hubspot import SourceHubspot if __name__ == "__main__": diff --git a/airbyte-integrations/connectors/source-hubspot/requirements.txt b/airbyte-integrations/connectors/source-hubspot/requirements.txt index e74f41a28ce1b..7be17a56d745d 100644 --- a/airbyte-integrations/connectors/source-hubspot/requirements.txt +++ b/airbyte-integrations/connectors/source-hubspot/requirements.txt @@ -1,5 +1,3 @@ # This file is autogenerated -- only edit if you know what you are doing. Use setup.py for declaring dependencies. --e ../../bases/airbyte-protocol --e ../../bases/base-python -e ../../bases/source-acceptance-test -e . diff --git a/airbyte-integrations/connectors/source-hubspot/setup.py b/airbyte-integrations/connectors/source-hubspot/setup.py index d15d84cb55069..7c4c01fa6b730 100644 --- a/airbyte-integrations/connectors/source-hubspot/setup.py +++ b/airbyte-integrations/connectors/source-hubspot/setup.py @@ -7,8 +7,6 @@ MAIN_REQUIREMENTS = [ "airbyte-cdk~=0.1", - "airbyte-protocol", - "base-python", "backoff==1.11.1", "pendulum==2.1.2", "requests==2.26.0", diff --git a/airbyte-integrations/connectors/source-hubspot/source_hubspot/api.py b/airbyte-integrations/connectors/source-hubspot/source_hubspot/api.py index 4ee4657241be4..48ec93e7b781d 100644 --- a/airbyte-integrations/connectors/source-hubspot/source_hubspot/api.py +++ b/airbyte-integrations/connectors/source-hubspot/source_hubspot/api.py @@ -14,8 +14,8 @@ import backoff import pendulum as pendulum import requests +from airbyte_cdk.entrypoint import logger from airbyte_cdk.sources.streams.http.requests_native_auth import Oauth2Authenticator -from base_python.entrypoint import logger from source_hubspot.errors import HubspotAccessDenied, HubspotInvalidAuth, HubspotRateLimited, HubspotTimeout # The value is obtained experimentally, Hubspot allows the URL length up to ~16300 symbols, @@ -374,7 +374,7 @@ def parse_response(self, response: Union[Mapping[str, Any], List[dict]]) -> Iter 'message': 'This hapikey (....) does not have proper permissions! (requires any of [automation-access])', 'correlationId': '111111-2222-3333-4444-55555555555'} """ - logger.warn(f"Stream `{self.entity}` cannot be procced. {response.get('message')}") + logger.warning(f"Stream `{self.entity}` cannot be procced. {response.get('message')}") return if response.get(self.data_field) is None: diff --git a/airbyte-integrations/connectors/source-hubspot/source_hubspot/client.py b/airbyte-integrations/connectors/source-hubspot/source_hubspot/client.py index 6f6a2cf865ffe..6dd6ffb0c1dc5 100644 --- a/airbyte-integrations/connectors/source-hubspot/source_hubspot/client.py +++ b/airbyte-integrations/connectors/source-hubspot/source_hubspot/client.py @@ -5,8 +5,8 @@ from typing import Any, Callable, Iterator, Mapping, Optional, Tuple -from airbyte_protocol import AirbyteStream -from base_python import BaseClient +from airbyte_cdk.models import AirbyteStream +from airbyte_cdk.sources.deprecated.client import BaseClient from requests import HTTPError from source_hubspot.api import ( API, diff --git a/airbyte-integrations/connectors/source-hubspot/source_hubspot/source.py b/airbyte-integrations/connectors/source-hubspot/source_hubspot/source.py index 660980307cfba..deed1d336c237 100644 --- a/airbyte-integrations/connectors/source-hubspot/source_hubspot/source.py +++ b/airbyte-integrations/connectors/source-hubspot/source_hubspot/source.py @@ -3,7 +3,7 @@ # -from base_python import BaseSource +from airbyte_cdk.sources.deprecated.base_source import BaseSource from .client import Client diff --git a/airbyte-integrations/connectors/source-hubspot/source_hubspot/spec.json b/airbyte-integrations/connectors/source-hubspot/source_hubspot/spec.json index fbb98942a99c5..8a969205962b9 100644 --- a/airbyte-integrations/connectors/source-hubspot/source_hubspot/spec.json +++ b/airbyte-integrations/connectors/source-hubspot/source_hubspot/spec.json @@ -23,11 +23,9 @@ "type": "object", "title": "Authenticate via Hubspot (Oauth)", "required": [ - "redirect_uri", "client_id", "client_secret", "refresh_token", - "access_token", "credentials_title" ], "properties": { @@ -92,11 +90,7 @@ "auth_type": "oauth2.0", "oauth2Specification": { "rootObject": ["credentials", "0"], - "oauthFlowInitParameters": [ - ["client_id"], - ["client_secret"], - ["refresh_token"] - ], + "oauthFlowInitParameters": [["client_id"], ["client_secret"]], "oauthFlowOutputParameters": [["refresh_token"]] } } diff --git a/airbyte-integrations/connectors/source-hubspot/unit_tests/test_field_type_converting.py b/airbyte-integrations/connectors/source-hubspot/unit_tests/test_field_type_converting.py index 113bc557df64d..f55391f49fec0 100644 --- a/airbyte-integrations/connectors/source-hubspot/unit_tests/test_field_type_converting.py +++ b/airbyte-integrations/connectors/source-hubspot/unit_tests/test_field_type_converting.py @@ -36,14 +36,15 @@ def test_field_type_format_converting(field_type, expected): (1, {"type": ["null", "string"]}), ], ) -def test_bad_field_type_converting(field_type, expected, capsys): +def test_bad_field_type_converting(field_type, expected, caplog, capsys): assert Stream._get_field_props(field_type=field_type) == expected - logs = capsys.readouterr().out + logs = caplog.records - assert '"WARN"' in logs - assert f"Unsupported type {field_type} found" in logs + assert logs + assert logs[0].levelname == "WARNING" + assert logs[0].msg == f"Unsupported type {field_type} found" @pytest.mark.parametrize( diff --git a/airbyte-integrations/connectors/source-iterable/Dockerfile b/airbyte-integrations/connectors/source-iterable/Dockerfile index 85e3f9b3f58c8..b24c983735c7c 100644 --- a/airbyte-integrations/connectors/source-iterable/Dockerfile +++ b/airbyte-integrations/connectors/source-iterable/Dockerfile @@ -12,5 +12,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.9 +LABEL io.airbyte.version=0.1.11 LABEL io.airbyte.name=airbyte/source-iterable diff --git a/airbyte-integrations/connectors/source-iterable/acceptance-test-docker.sh b/airbyte-integrations/connectors/source-iterable/acceptance-test-docker.sh index c522eebbd94e8..4ceedd9e7ba03 100755 --- a/airbyte-integrations/connectors/source-iterable/acceptance-test-docker.sh +++ b/airbyte-integrations/connectors/source-iterable/acceptance-test-docker.sh @@ -1,7 +1,7 @@ #!/usr/bin/env sh # Build latest connector image -docker build . -t $(cat acceptance-test-config.yml | grep "connector_image" | head -n 1 | cut -d: -f2) +docker build . -t $(cat acceptance-test-config.yml | grep "connector_image" | head -n 1 | cut -d: -f2):dev # Pull latest acctest image docker pull airbyte/source-acceptance-test:latest diff --git a/airbyte-integrations/connectors/source-iterable/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/source-iterable/integration_tests/configured_catalog.json index a6392effd040c..e4a8426cc2d24 100644 --- a/airbyte-integrations/connectors/source-iterable/integration_tests/configured_catalog.json +++ b/airbyte-integrations/connectors/source-iterable/integration_tests/configured_catalog.json @@ -170,6 +170,17 @@ }, "sync_mode": "incremental", "destination_sync_mode": "append" + }, + { + "stream": { + "name": "templates", + "json_schema": {}, + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": true, + "default_cursor_field": ["createdAt"] + }, + "sync_mode": "incremental", + "destination_sync_mode": "append" } ] } diff --git a/airbyte-integrations/connectors/source-iterable/setup.py b/airbyte-integrations/connectors/source-iterable/setup.py index 90f33d5300e34..893e468fb733b 100644 --- a/airbyte-integrations/connectors/source-iterable/setup.py +++ b/airbyte-integrations/connectors/source-iterable/setup.py @@ -11,7 +11,7 @@ "requests~=2.25", ] -TEST_REQUIREMENTS = ["pytest~=6.1"] +TEST_REQUIREMENTS = ["pytest~=6.1", "responses==0.13.3"] setup( @@ -20,6 +20,9 @@ author="Airbyte", author_email="contact@airbyte.io", packages=find_packages(), - install_requires=MAIN_REQUIREMENTS + TEST_REQUIREMENTS, + install_requires=MAIN_REQUIREMENTS, + extras_require={ + "tests": TEST_REQUIREMENTS, + }, package_data={"": ["*.json", "schemas/*.json"]}, ) diff --git a/airbyte-integrations/connectors/source-iterable/source_iterable/api.py b/airbyte-integrations/connectors/source-iterable/source_iterable/api.py index ad8288de5f39b..d3d9eb77870d5 100755 --- a/airbyte-integrations/connectors/source-iterable/source_iterable/api.py +++ b/airbyte-integrations/connectors/source-iterable/source_iterable/api.py @@ -69,7 +69,7 @@ def __init__(self, start_date, **kwargs): self.stream_params = {"dataTypeName": self.data_field} def path(self, **kwargs) -> str: - return "/export/data.json" + return "export/data.json" @staticmethod def _field_to_datetime(value: Union[int, str]) -> pendulum.datetime: @@ -88,12 +88,8 @@ def get_updated_state(self, current_stream_state: MutableMapping[str, Any], late """ latest_benchmark = latest_record[self.cursor_field] if current_stream_state.get(self.cursor_field): - return { - self.cursor_field: max( - latest_benchmark, self._field_to_datetime(current_stream_state[self.cursor_field]) - ).to_datetime_string() - } - return {self.cursor_field: latest_benchmark.to_datetime_string()} + return {self.cursor_field: str(max(latest_benchmark, self._field_to_datetime(current_stream_state[self.cursor_field])))} + return {self.cursor_field: str(latest_benchmark)} def request_params(self, stream_state: Mapping[str, Any], **kwargs) -> MutableMapping[str, Any]: @@ -114,6 +110,21 @@ def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapp record[self.cursor_field] = self._field_to_datetime(record[self.cursor_field]) yield record + def request_kwargs( + self, + stream_state: Mapping[str, Any], + stream_slice: Mapping[str, Any] = None, + next_page_token: Mapping[str, Any] = None, + ) -> Mapping[str, Any]: + """ + https://api.iterable.com/api/docs#export_exportDataJson + Sending those type of requests could download large piece of json + objects splitted with newline character. + Passing stream=True argument to requests.session.send method to avoid + loading whole analytics report content into memory. + """ + return {"stream": True} + class Lists(IterableStream): data_field = "lists" @@ -347,6 +358,7 @@ def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapp records = response_json.get(self.data_field, []) for record in records: + record[self.cursor_field] = self._field_to_datetime(record[self.cursor_field]) yield record diff --git a/airbyte-integrations/connectors/source-iterable/source_iterable/schemas/templates.json b/airbyte-integrations/connectors/source-iterable/source_iterable/schemas/templates.json index 8ea961911c394..b4ad623658b4a 100644 --- a/airbyte-integrations/connectors/source-iterable/source_iterable/schemas/templates.json +++ b/airbyte-integrations/connectors/source-iterable/source_iterable/schemas/templates.json @@ -4,7 +4,8 @@ "type": ["null", "number"] }, "createdAt": { - "type": ["null", "integer"] + "type": ["null", "string"], + "format": "date-time" }, "updatedAt": { "type": ["null", "integer"] diff --git a/airbyte-integrations/connectors/source-iterable/unit_tests/__init__.py b/airbyte-integrations/connectors/source-iterable/unit_tests/__init__.py new file mode 100644 index 0000000000000..46b7376756ec6 --- /dev/null +++ b/airbyte-integrations/connectors/source-iterable/unit_tests/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# diff --git a/airbyte-integrations/connectors/source-iterable/unit_tests/test_exports_stream.py b/airbyte-integrations/connectors/source-iterable/unit_tests/test_exports_stream.py new file mode 100644 index 0000000000000..4f151b6155156 --- /dev/null +++ b/airbyte-integrations/connectors/source-iterable/unit_tests/test_exports_stream.py @@ -0,0 +1,42 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + +import json +from unittest import mock + +import pytest +import responses +from airbyte_cdk.models import SyncMode +from source_iterable.api import EmailSend + + +@pytest.fixture +def session_mock(): + with mock.patch("airbyte_cdk.sources.streams.http.http.requests") as requests_mock: + session_mock = mock.MagicMock() + response_mock = mock.MagicMock() + requests_mock.Session.return_value = session_mock + session_mock.send.return_value = response_mock + response_mock.status_code = 200 + yield session_mock + + +def test_send_email_stream(session_mock): + stream = EmailSend(start_date="2020", api_key="") + _ = list(stream.read_records(sync_mode=SyncMode.full_refresh, cursor_field=None, stream_slice=[], stream_state={})) + + assert session_mock.send.called + send_args = session_mock.send.call_args[1] + assert send_args.get("stream") is True + + +@responses.activate +def test_stream_correct(): + record_js = {"createdAt": "2020"} + NUMBER_OF_RECORDS = 10 ** 2 + resp_body = "\n".join([json.dumps(record_js)] * NUMBER_OF_RECORDS) + responses.add("GET", "https://api.iterable.com/api/export/data.json", body=resp_body) + stream = EmailSend(start_date="2020", api_key="") + records = list(stream.read_records(sync_mode=SyncMode.full_refresh, cursor_field=None, stream_slice=[], stream_state={})) + assert len(records) == NUMBER_OF_RECORDS diff --git a/airbyte-integrations/connectors/source-mixpanel/Dockerfile b/airbyte-integrations/connectors/source-mixpanel/Dockerfile index d77882fdd02dd..3f193290b7c53 100644 --- a/airbyte-integrations/connectors/source-mixpanel/Dockerfile +++ b/airbyte-integrations/connectors/source-mixpanel/Dockerfile @@ -12,5 +12,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.1 +LABEL io.airbyte.version=0.1.3 LABEL io.airbyte.name=airbyte/source-mixpanel diff --git a/airbyte-integrations/connectors/source-mixpanel/acceptance-test-config.yml b/airbyte-integrations/connectors/source-mixpanel/acceptance-test-config.yml index 974e60c5e2591..a198296243479 100644 --- a/airbyte-integrations/connectors/source-mixpanel/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-mixpanel/acceptance-test-config.yml @@ -14,9 +14,11 @@ tests: basic_read: - config_path: "secrets/config.json" configured_catalog_path: "integration_tests/configured_catalog.json" + timeout_seconds: 3600 full_refresh: - config_path: "secrets/config.json" configured_catalog_path: "integration_tests/configured_catalog.json" + timeout_seconds: 3600 incremental: # incremental streams Funnels, Revenue, Export # Funnels - fails because it has complex state, like {'funnel_idX': {'date': 'dateX'}} @@ -29,4 +31,5 @@ tests: cursor_paths: revenue: ["date"] export: ["date"] + timeout_seconds: 3600 diff --git a/airbyte-integrations/connectors/source-mixpanel/source_mixpanel/schemas/engage.json b/airbyte-integrations/connectors/source-mixpanel/source_mixpanel/schemas/engage.json index b31b1a29826a7..b530afc24fb93 100644 --- a/airbyte-integrations/connectors/source-mixpanel/source_mixpanel/schemas/engage.json +++ b/airbyte-integrations/connectors/source-mixpanel/source_mixpanel/schemas/engage.json @@ -5,6 +5,46 @@ "properties": { "distinct_id": { "type": ["null", "string"] + }, + "browser": { + "type": ["null", "string"] + }, + "browser_version": { + "type": ["null", "string"] + }, + "city": { + "type": ["null", "string"] + }, + "country_code": { + "type": ["null", "string"] + }, + "region": { + "type": ["null", "string"] + }, + "timezone": { + "type": ["null", "string"] + }, + "last_seen": { + "type": ["null", "string"], + "format": "date-time" + }, + "email": { + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + }, + "first_name": { + "type": ["null", "string"] + }, + "last_name": { + "type": ["null", "string"] + }, + "id": { + "type": ["null", "string"] + }, + "unblocked": { + "type": ["null", "string"] } } } diff --git a/airbyte-integrations/connectors/source-mixpanel/source_mixpanel/source.py b/airbyte-integrations/connectors/source-mixpanel/source_mixpanel/source.py index fc1ce3a78fa58..b0e88d8bed44e 100644 --- a/airbyte-integrations/connectors/source-mixpanel/source_mixpanel/source.py +++ b/airbyte-integrations/connectors/source-mixpanel/source_mixpanel/source.py @@ -27,13 +27,7 @@ class MixpanelStream(HttpStream, ABC): A maximum of 5 concurrent queries 400 queries per hour. - API Rate Limit Handler: - If total number of planned requests is lower than it is allowed per hour - then - reset reqs_per_hour_limit and send requests with small delay (1 reqs/sec) - because API endpoint accept requests bursts up to 3 reqs/sec - else - send requests with planned delay: 3600/reqs_per_hour_limit seconds + API Rate Limit Handler: after each request freeze for the time period: 3600/reqs_per_hour_limit seconds """ @property @@ -82,7 +76,7 @@ def _send_request(self, request: requests.PreparedRequest, request_kwargs: Mappi self.logger.error(f"Stream {self.name}: {e.response.status_code} {e.response.reason} - {error_message}") raise e - def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: + def process_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: json_response = response.json() if self.data_field is not None: data = json_response.get(self.data_field, []) @@ -94,6 +88,11 @@ def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapp for record in data: yield record + def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: + + # parse the whole response + yield from self.process_response(response, **kwargs) + # wait for X seconds to match API limitations time.sleep(3600 / self.reqs_per_hour_limit) @@ -190,10 +189,6 @@ def stream_slices( # add 1 additional day because date range is inclusive start_date = end_date + timedelta(days=1) - # reset reqs_per_hour_limit if we expect less requests (1 req per stream) than it is allowed by API reqs_per_hour_limit - if len(date_slices) < self.reqs_per_hour_limit: - self.reqs_per_hour_limit = 3600 # 1 query per sec - return date_slices def request_params( @@ -269,9 +264,6 @@ def stream_slices( for date_slice in date_slices: stream_slices.append({**funnel_slice, **date_slice}) - # reset reqs_per_hour_limit if we expect less requests (1 req per stream) than it is allowed by API reqs_per_hour_limit - if len(stream_slices) < self.reqs_per_hour_limit: - self.reqs_per_hour_limit = 3600 # queries per hour (1 query per sec) return stream_slices def request_params( @@ -288,7 +280,7 @@ def request_params( params["unit"] = "day" return params - def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: + def process_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: """ response.json() example: { @@ -368,7 +360,7 @@ class EngageSchema(MixpanelStream): def path(self, **kwargs) -> str: return "engage/properties" - def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: + def process_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: """ response.json() example: { @@ -444,7 +436,7 @@ def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, self._total = None return None - def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: + def process_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: """ { "page": 0 @@ -523,7 +515,9 @@ def get_json_schema(self) -> Mapping[str, Any]: # from API: '$browser' # to stream: 'browser' property_name = property_name[1:] - schema["properties"][property_name] = types.get(property_type, {"type": ["null", "string"]}) + # Do not overwrite 'standard' hard-coded properties, add 'custom' properties + if property_name not in schema["properties"]: + schema["properties"][property_name] = types.get(property_type, {"type": ["null", "string"]}) return schema @@ -591,7 +585,7 @@ class Revenue(DateSlicesMixin, IncrementalMixpanelStream): def path(self, **kwargs) -> str: return "engage/revenue" - def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: + def process_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: """ response.json() example: { @@ -634,7 +628,7 @@ class ExportSchema(MixpanelStream): def path(self, **kwargs) -> str: return "events/properties/top" - def parse_response(self, response: requests.Response, **kwargs) -> Iterable[str]: + def process_response(self, response: requests.Response, **kwargs) -> Iterable[str]: """ response.json() example: { @@ -691,7 +685,7 @@ def url_base(self): def path(self, **kwargs) -> str: return "export" - def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: + def process_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: """Export API return response.text in JSONL format but each line is a valid JSON object Raw item example: { @@ -737,9 +731,6 @@ def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapp yield item - # wait for X seconds to meet API limitation - time.sleep(3600 / self.reqs_per_hour_limit) - def get_json_schema(self) -> Mapping[str, Any]: """ :return: A dict of the JSON schema representing this stream. diff --git a/airbyte-integrations/connectors/source-monday/.dockerignore b/airbyte-integrations/connectors/source-monday/.dockerignore new file mode 100644 index 0000000000000..dd9f9850865ef --- /dev/null +++ b/airbyte-integrations/connectors/source-monday/.dockerignore @@ -0,0 +1,7 @@ +* +!Dockerfile +!Dockerfile.test +!main.py +!source_monday +!setup.py +!secrets diff --git a/airbyte-integrations/connectors/source-monday/Dockerfile b/airbyte-integrations/connectors/source-monday/Dockerfile new file mode 100644 index 0000000000000..bd7bf4c76adfb --- /dev/null +++ b/airbyte-integrations/connectors/source-monday/Dockerfile @@ -0,0 +1,38 @@ +FROM python:3.7.11-alpine3.14 as base + +# build and load all requirements +FROM base as builder +WORKDIR /airbyte/integration_code + +# upgrade pip to the latest version +RUN apk --no-cache upgrade \ + && pip install --upgrade pip \ + && apk --no-cache add tzdata build-base + + +COPY setup.py ./ +# install necessary packages to a temporary folder +RUN pip install --prefix=/install . + +# build a clean environment +FROM base +WORKDIR /airbyte/integration_code + +# copy all loaded and built libraries to a pure basic image +COPY --from=builder /install /usr/local +# add default timezone settings +COPY --from=builder /usr/share/zoneinfo/Etc/UTC /etc/localtime +RUN echo "Etc/UTC" > /etc/timezone + +# bash is installed for more convenient debugging. +RUN apk --no-cache add bash + +# copy payload code only +COPY main.py ./ +COPY source_monday ./source_monday + +ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" +ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] + +LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.name=airbyte/source-monday diff --git a/airbyte-integrations/connectors/source-monday/README.md b/airbyte-integrations/connectors/source-monday/README.md new file mode 100644 index 0000000000000..b876f674886ea --- /dev/null +++ b/airbyte-integrations/connectors/source-monday/README.md @@ -0,0 +1,132 @@ +# Monday Source + +This is the repository for the Monday source connector, written in Python. +For information about how to use this connector within Airbyte, see [the documentation](https://docs.airbyte.io/integrations/sources/monday). + +## Local development + +### Prerequisites +**To iterate on this connector, make sure to complete this prerequisites section.** + +#### Minimum Python version required `= 3.7.0` + +#### Build & Activate Virtual Environment and install dependencies +From this connector directory, create a virtual environment: +``` +python -m venv .venv +``` + +This will generate a virtualenv for this module in `.venv/`. Make sure this venv is active in your +development environment of choice. To activate it from the terminal, run: +``` +source .venv/bin/activate +pip install -r requirements.txt +pip install '.[tests]' +``` +If you are in an IDE, follow your IDE's instructions to activate the virtualenv. + +Note that while we are installing dependencies from `requirements.txt`, you should only edit `setup.py` for your dependencies. `requirements.txt` is +used for editable installs (`pip install -e`) to pull in Python dependencies from the monorepo and will call `setup.py`. +If this is mumbo jumbo to you, don't worry about it, just put your deps in `setup.py` but install using `pip install -r requirements.txt` and everything +should work as you expect. + +#### Building via Gradle +You can also build the connector in Gradle. This is typically used in CI and not needed for your development workflow. + +To build using Gradle, from the Airbyte repository root, run: +``` +./gradlew :airbyte-integrations:connectors:source-monday:build +``` + +#### Create credentials +**If you are a community contributor**, follow the instructions in the [documentation](https://docs.airbyte.io/integrations/sources/monday) +to generate the necessary credentials. Then create a file `secrets/config.json` conforming to the `source_monday/spec.json` file. +Note that any directory named `secrets` is gitignored across the entire Airbyte repo, so there is no danger of accidentally checking in sensitive information. +See `integration_tests/sample_config.json` for a sample config file. + +**If you are an Airbyte core member**, copy the credentials in Lastpass under the secret name `source monday test creds` +and place them into `secrets/config.json`. + +### Locally running the connector +``` +python main.py spec +python main.py check --config secrets/config.json +python main.py discover --config secrets/config.json +python main.py read --config secrets/config.json --catalog integration_tests/configured_catalog.json +``` + +### Locally running the connector docker image + +#### Build +First, make sure you build the latest Docker image: +``` +docker build . -t airbyte/source-monday:dev +``` + +You can also build the connector image via Gradle: +``` +./gradlew :airbyte-integrations:connectors:source-monday:airbyteDocker +``` +When building via Gradle, the docker image name and tag, respectively, are the values of the `io.airbyte.name` and `io.airbyte.version` `LABEL`s in +the Dockerfile. + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/source-monday:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-monday:dev check --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-monday:dev discover --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/source-monday:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json +``` +## Testing +Make sure to familiarize yourself with [pytest test discovery](https://docs.pytest.org/en/latest/goodpractices.html#test-discovery) to know how your test files and methods should be named. +First install test dependencies into your virtual environment: +``` +pip install .[tests] +``` +### Unit Tests +To run unit tests locally, from the connector directory run: +``` +python -m pytest unit_tests +``` + +### Integration Tests +There are two types of integration tests: Acceptance Tests (Airbyte's test suite for all source connectors) and custom integration tests (which are specific to this connector). +#### Custom Integration tests +Place custom tests inside `integration_tests/` folder, then, from the connector root, run +``` +python -m pytest integration_tests +``` +#### Acceptance Tests +Customize `acceptance-test-config.yml` file to configure tests. See [Source Acceptance Tests](https://docs.airbyte.io/connector-development/testing-connectors/source-acceptance-tests-reference) for more information. +If your connector requires to create or destroy resources for use during acceptance tests create fixtures for it and place them inside integration_tests/acceptance.py. +To run your integration tests with acceptance tests, from the connector root, run +``` +python -m pytest integration_tests -p integration_tests.acceptance +``` +To run your integration tests with docker + +### Using gradle to run tests +All commands should be run from airbyte project root. +To run unit tests: +``` +./gradlew :airbyte-integrations:connectors:source-monday:unitTest +``` +To run acceptance and custom integration tests: +``` +./gradlew :airbyte-integrations:connectors:source-monday:integrationTest +``` + +## Dependency Management +All of your dependencies should go in `setup.py`, NOT `requirements.txt`. The requirements file is only used to connect internal Airbyte dependencies in the monorepo for local development. +We split dependencies between two groups, dependencies that are: +* required for your connector to work need to go to `MAIN_REQUIREMENTS` list. +* required for the testing need to go to `TEST_REQUIREMENTS` list + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing unit and integration tests. +1. Bump the connector version in `Dockerfile` -- just increment the value of the `LABEL io.airbyte.version` appropriately (we use [SemVer](https://semver.org/)). +1. Create a Pull Request. +1. Pat yourself on the back for being an awesome contributor. +1. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. diff --git a/airbyte-integrations/connectors/source-monday/acceptance-test-config.yml b/airbyte-integrations/connectors/source-monday/acceptance-test-config.yml new file mode 100644 index 0000000000000..18d63f2b9cdf6 --- /dev/null +++ b/airbyte-integrations/connectors/source-monday/acceptance-test-config.yml @@ -0,0 +1,20 @@ +# See [Source Acceptance Tests](https://docs.airbyte.io/connector-development/testing-connectors/source-acceptance-tests-reference) +# for more information about how to configure these tests +connector_image: airbyte/source-monday:dev +tests: + spec: + - spec_path: "source_monday/spec.json" + connection: + - config_path: "secrets/config.json" + status: "succeed" + - config_path: "integration_tests/invalid_config.json" + status: "failed" + discovery: + - config_path: "secrets/config.json" + basic_read: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" + empty_streams: ["teams"] + full_refresh: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" diff --git a/airbyte-integrations/connectors/source-monday/acceptance-test-docker.sh b/airbyte-integrations/connectors/source-monday/acceptance-test-docker.sh new file mode 100644 index 0000000000000..e4d8b1cef8961 --- /dev/null +++ b/airbyte-integrations/connectors/source-monday/acceptance-test-docker.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env sh + +# Build latest connector image +docker build . -t $(cat acceptance-test-config.yml | grep "connector_image" | head -n 1 | cut -d: -f2) + +# Pull latest acctest image +docker pull airbyte/source-acceptance-test:latest + +# Run +docker run --rm -it \ + -v /var/run/docker.sock:/var/run/docker.sock \ + -v /tmp:/tmp \ + -v $(pwd):/test_input \ + airbyte/source-acceptance-test \ + --acceptance-test-config /test_input + diff --git a/airbyte-integrations/connectors/source-monday/build.gradle b/airbyte-integrations/connectors/source-monday/build.gradle new file mode 100644 index 0000000000000..3a1003739141c --- /dev/null +++ b/airbyte-integrations/connectors/source-monday/build.gradle @@ -0,0 +1,14 @@ +plugins { + id 'airbyte-python' + id 'airbyte-docker' + id 'airbyte-source-acceptance-test' +} + +airbytePython { + moduleDirectory 'source_monday' +} + +dependencies { + implementation files(project(':airbyte-integrations:bases:source-acceptance-test').airbyteDocker.outputs) + implementation files(project(':airbyte-integrations:bases:base-python').airbyteDocker.outputs) +} diff --git a/airbyte-integrations/connectors/source-monday/integration_tests/__init__.py b/airbyte-integrations/connectors/source-monday/integration_tests/__init__.py new file mode 100644 index 0000000000000..46b7376756ec6 --- /dev/null +++ b/airbyte-integrations/connectors/source-monday/integration_tests/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# diff --git a/airbyte-integrations/connectors/source-monday/integration_tests/abnormal_state.json b/airbyte-integrations/connectors/source-monday/integration_tests/abnormal_state.json new file mode 100644 index 0000000000000..52b0f2c2118f4 --- /dev/null +++ b/airbyte-integrations/connectors/source-monday/integration_tests/abnormal_state.json @@ -0,0 +1,5 @@ +{ + "todo-stream-name": { + "todo-field-name": "todo-abnormal-value" + } +} diff --git a/airbyte-integrations/connectors/source-monday/integration_tests/acceptance.py b/airbyte-integrations/connectors/source-monday/integration_tests/acceptance.py new file mode 100644 index 0000000000000..58c194c5d1376 --- /dev/null +++ b/airbyte-integrations/connectors/source-monday/integration_tests/acceptance.py @@ -0,0 +1,16 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +import pytest + +pytest_plugins = ("source_acceptance_test.plugin",) + + +@pytest.fixture(scope="session", autouse=True) +def connector_setup(): + """ This fixture is a placeholder for external resources that acceptance test might require.""" + # TODO: setup test dependencies if needed. otherwise remove the TODO comments + yield + # TODO: clean up test dependencies diff --git a/airbyte-integrations/connectors/source-monday/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/source-monday/integration_tests/configured_catalog.json new file mode 100644 index 0000000000000..a44855e5ae74a --- /dev/null +++ b/airbyte-integrations/connectors/source-monday/integration_tests/configured_catalog.json @@ -0,0 +1,54 @@ +{ + "streams": [ + { + "stream": { + "name": "items", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "boards", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "teams", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "updates", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "users", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + } + ] +} diff --git a/airbyte-integrations/connectors/source-monday/integration_tests/invalid_config.json b/airbyte-integrations/connectors/source-monday/integration_tests/invalid_config.json new file mode 100644 index 0000000000000..801a78c2451c9 --- /dev/null +++ b/airbyte-integrations/connectors/source-monday/integration_tests/invalid_config.json @@ -0,0 +1,3 @@ +{ + "api_token": "abcd" +} diff --git a/airbyte-integrations/connectors/source-monday/integration_tests/sample_config.json b/airbyte-integrations/connectors/source-monday/integration_tests/sample_config.json new file mode 100644 index 0000000000000..e14d519d95824 --- /dev/null +++ b/airbyte-integrations/connectors/source-monday/integration_tests/sample_config.json @@ -0,0 +1,3 @@ +{ + "api_token": "12345abc" +} diff --git a/airbyte-integrations/connectors/source-monday/integration_tests/sample_state.json b/airbyte-integrations/connectors/source-monday/integration_tests/sample_state.json new file mode 100644 index 0000000000000..3587e579822d0 --- /dev/null +++ b/airbyte-integrations/connectors/source-monday/integration_tests/sample_state.json @@ -0,0 +1,5 @@ +{ + "todo-stream-name": { + "todo-field-name": "value" + } +} diff --git a/airbyte-integrations/connectors/source-monday/main.py b/airbyte-integrations/connectors/source-monday/main.py new file mode 100644 index 0000000000000..97c4bafc81010 --- /dev/null +++ b/airbyte-integrations/connectors/source-monday/main.py @@ -0,0 +1,13 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +import sys + +from airbyte_cdk.entrypoint import launch +from source_monday import SourceMonday + +if __name__ == "__main__": + source = SourceMonday() + launch(source, sys.argv[1:]) diff --git a/airbyte-integrations/connectors/source-monday/requirements.txt b/airbyte-integrations/connectors/source-monday/requirements.txt new file mode 100644 index 0000000000000..0411042aa0911 --- /dev/null +++ b/airbyte-integrations/connectors/source-monday/requirements.txt @@ -0,0 +1,2 @@ +-e ../../bases/source-acceptance-test +-e . diff --git a/airbyte-integrations/connectors/source-monday/setup.py b/airbyte-integrations/connectors/source-monday/setup.py new file mode 100644 index 0000000000000..fdb97c1c95179 --- /dev/null +++ b/airbyte-integrations/connectors/source-monday/setup.py @@ -0,0 +1,29 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +from setuptools import find_packages, setup + +MAIN_REQUIREMENTS = [ + "airbyte-cdk", +] + +TEST_REQUIREMENTS = [ + "pytest~=6.1", + "pytest-mock~=3.6.1", + "source-acceptance-test", +] + +setup( + name="source_monday", + description="Source implementation for Monday.", + author="Airbyte", + author_email="contact@airbyte.io", + packages=find_packages(), + install_requires=MAIN_REQUIREMENTS, + package_data={"": ["*.json", "schemas/*.json", "schemas/shared/*.json"]}, + extras_require={ + "tests": TEST_REQUIREMENTS, + }, +) diff --git a/airbyte-integrations/connectors/source-monday/source_monday/__init__.py b/airbyte-integrations/connectors/source-monday/source_monday/__init__.py new file mode 100644 index 0000000000000..b2995ef1eb176 --- /dev/null +++ b/airbyte-integrations/connectors/source-monday/source_monday/__init__.py @@ -0,0 +1,8 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +from .source import SourceMonday + +__all__ = ["SourceMonday"] diff --git a/airbyte-integrations/connectors/source-monday/source_monday/schemas/boards.json b/airbyte-integrations/connectors/source-monday/source_monday/schemas/boards.json new file mode 100644 index 0000000000000..196f77d9ef2e7 --- /dev/null +++ b/airbyte-integrations/connectors/source-monday/source_monday/schemas/boards.json @@ -0,0 +1,82 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "board_kind": { "type": ["null", "string"] }, + "columns": { + "type": ["null", "array"], + "properties": { + "archived": { "type": ["null", "boolean"] }, + "id": { "type": ["null", "string"] }, + "settings_str": { "type": ["null", "string"] }, + "title": { "type": ["null", "string"] }, + "type": { "type": ["null", "string"] }, + "width": { "type": ["null", "integer"] } + } + }, + "communication": { "type": ["null", "object"] }, + "description": { "type": ["null", "string"] }, + "groups": { + "type": ["null", "array"], + "properties": { + "archived": { "type": ["null", "boolean"] }, + "color": { "type": ["null", "string"] }, + "deleted": { "type": ["null", "boolean"] }, + "id": { "type": ["null", "string"] }, + "position": { "type": ["null", "string"] }, + "title": { "type": ["null", "string"] } + } + }, + "id": { "type": ["null", "integer"] }, + "name": { "type": ["null", "string"] }, + "owner": { + "type": ["null", "object"], + "properties": { + "id": { "type": ["null", "integer"] } + } + }, + "permissions": { "type": ["null", "string"] }, + "pos": { "type": ["null", "string"] }, + "state": { "type": ["null", "string"] }, + "subscribers": { + "type": ["null", "array"], + "properties": { + "id": { "type": ["null", "integer"] } + } + }, + "tags": { + "type": ["null", "array"], + "properties": { + "id": { "type": ["null", "string"] } + } + }, + "top_group": { + "type": ["null", "object"], + "properties": { + "id": { "type": ["null", "string"] } + } + }, + "updated_at": { "type": ["null", "string"], "format": "date-time" }, + "updates": { + "type": ["null", "array"], + "properties": { + "id": { "type": ["null", "integer"] } + } + }, + "views": { + "type": ["null", "array"], + "properties": { + "id": { "type": ["null", "integer"] } + } + }, + "workspace": { + "type": ["null", "object"], + "properties": { + "id": { "type": ["null", "integer"] }, + "name": { "type": ["null", "string"] }, + "kind": { "type": ["null", "string"] }, + "description": { "type": ["null", "string"] } + } + } + } +} diff --git a/airbyte-integrations/connectors/source-monday/source_monday/schemas/items.json b/airbyte-integrations/connectors/source-monday/source_monday/schemas/items.json new file mode 100644 index 0000000000000..9ce9ea04479f9 --- /dev/null +++ b/airbyte-integrations/connectors/source-monday/source_monday/schemas/items.json @@ -0,0 +1,59 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "assets": { + "type": ["array", "object"], + "properties": { + "id": { "type": ["null", "integer"] } + } + }, + "board": { + "type": ["null", "object"], + "properties": { + "id": { "type": ["null", "integer"] } + } + }, + "column_values": { + "type": ["null", "array"], + "properties": { + "id": { "type": ["null", "integer"] }, + "value": { "type": ["null", "object"] }, + "additional_info": { "type": ["null", "object"] }, + "text": { "type": ["null", "string"] }, + "title": { "type": ["null", "string"] }, + "type": { "type": ["null", "string"] } + } + }, + "created_at": { "type": ["null", "string"], "format": "date-time" }, + "creator_id": { "type": ["null", "integer"] }, + "group": { + "type": ["null", "object"], + "properties": { + "id": { "type": ["null", "string"] } + } + }, + "id": { "type": ["null", "integer"] }, + "name": { "type": ["null", "string"] }, + "parent_item": { + "type": ["null", "object"], + "properties": { + "id": { "type": ["null", "integer"] } + } + }, + "state": { "type": ["null", "string"] }, + "subscribers": { + "type": ["null", "array"], + "properties": { + "id": { "type": ["null", "integer"] } + } + }, + "updated_at": { "type": ["null", "string"], "format": "date-time" }, + "updates": { + "type": ["null", "array"], + "properties": { + "id": { "type": ["null", "integer"] } + } + } + } +} diff --git a/airbyte-integrations/connectors/source-monday/source_monday/schemas/teams.json b/airbyte-integrations/connectors/source-monday/source_monday/schemas/teams.json new file mode 100644 index 0000000000000..c409d05b6bbdb --- /dev/null +++ b/airbyte-integrations/connectors/source-monday/source_monday/schemas/teams.json @@ -0,0 +1,15 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "id": { "type": ["null", "integer"] }, + "name": { "type": ["null", "string"] }, + "picture_url": { "type": ["null", "string"] }, + "users": { + "type": ["null", "array"], + "properties": { + "id": { "type": ["null", "integer"] } + } + } + } +} diff --git a/airbyte-integrations/connectors/source-monday/source_monday/schemas/updates.json b/airbyte-integrations/connectors/source-monday/source_monday/schemas/updates.json new file mode 100644 index 0000000000000..11a3164a6fad0 --- /dev/null +++ b/airbyte-integrations/connectors/source-monday/source_monday/schemas/updates.json @@ -0,0 +1,30 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "assets": { + "type": ["null", "array"], + "properties": { + "id": { "type": ["null", "integer"] } + } + }, + "body": { "type": ["null", "string"] }, + "created_at": { "type": ["null", "string"], "format": "date-time" }, + "creator_id": { "type": ["null", "integer"] }, + "id": { "type": ["null", "integer"] }, + "item_id": { "type": ["null", "integer"] }, + "replies": { + "type": ["null", "array"], + "properties": { + "id": { "type": ["null", "integer"] }, + "creator_id": { "type": ["null", "integer"] }, + "created_at": { "type": ["null", "string"], "format": "date-time" }, + "text_body": { "type": ["null", "string"] }, + "updated_at": { "type": ["null", "string"], "format": "date-time" }, + "body": { "type": ["null", "string"] } + } + }, + "text_body": { "type": ["null", "string"] }, + "updated_at": { "type": ["null", "string"], "format": "date-time" } + } +} diff --git a/airbyte-integrations/connectors/source-monday/source_monday/schemas/users.json b/airbyte-integrations/connectors/source-monday/source_monday/schemas/users.json new file mode 100644 index 0000000000000..a064bdc3f4bca --- /dev/null +++ b/airbyte-integrations/connectors/source-monday/source_monday/schemas/users.json @@ -0,0 +1,31 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "birthday": { "type": ["null", "string"], "format": "date-time" }, + "country_code": { "type": ["null", "string"] }, + "created_at": { "type": ["null", "string"], "format": "date-time" }, + "join_date": { "type": ["null", "string"], "format": "date" }, + "email": { "type": ["null", "string"] }, + "enabled": { "type": ["null", "boolean"] }, + "id": { "type": ["null", "integer"] }, + "is_admin": { "type": ["null", "boolean"] }, + "is_guest": { "type": ["null", "boolean"] }, + "is_pending": { "type": ["null", "boolean"] }, + "is_view_only": { "type": ["null", "boolean"] }, + "is_verified": { "type": ["null", "boolean"] }, + "location": { "type": ["null", "string"] }, + "mobile_phone": { "type": ["null", "string"] }, + "name": { "type": ["null", "string"] }, + "phone": { "type": ["null", "string"] }, + "photo_original": { "type": ["null", "string"] }, + "photo_small": { "type": ["null", "string"] }, + "photo_thumb": { "type": ["null", "string"] }, + "photo_thumb_small": { "type": ["null", "string"] }, + "photo_tiny": { "type": ["null", "string"] }, + "time_zone_identifier": { "type": ["null", "string"] }, + "title": { "type": ["null", "string"] }, + "url": { "type": ["null", "string"] }, + "utc_hours_diff": { "type": ["null", "integer"] } + } +} diff --git a/airbyte-integrations/connectors/source-monday/source_monday/source.py b/airbyte-integrations/connectors/source-monday/source_monday/source.py new file mode 100644 index 0000000000000..2e0cab5d0815f --- /dev/null +++ b/airbyte-integrations/connectors/source-monday/source_monday/source.py @@ -0,0 +1,139 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +import json +import os +from abc import ABC +from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple + +import requests +from airbyte_cdk.sources import AbstractSource +from airbyte_cdk.sources.streams import Stream +from airbyte_cdk.sources.streams.http import HttpStream +from airbyte_cdk.sources.streams.http.auth import TokenAuthenticator +from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer + + +# Basic full refresh stream +class MondayStream(HttpStream, ABC): + url_base = "https://api.monday.com/v2" + primary_key = "id" + page = 1 + transformer: TypeTransformer = TypeTransformer(TransformConfig.DefaultSchemaNormalization) + + def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: + json_response = response.json().get("data", {}) + records = json_response.get(self.name.lower(), []) + self.page += 1 + if records: + return {"page": self.page} + + def load_schema(self): + """ + Load schema from file and make a GraphQL query + """ + script_dir = os.path.dirname(__file__) + schema_path = os.path.join(script_dir, f"schemas/{self.name.lower()}.json") + with open(schema_path) as f: + schema_dict = json.load(f) + schema = schema_dict["properties"] + graphql_schema = [] + for col in schema: + if "properties" in schema[col]: + nested_ids = ",".join(schema[col]["properties"]) + graphql_schema.append(f"{col}{{{nested_ids}}}") + else: + graphql_schema.append(col) + return ",".join(graphql_schema) + + def should_retry(self, response: requests.Response) -> bool: + # Monday API return code 200 with and errors key if complexity is too high. + # https://api.developer.monday.com/docs/complexity-queries + is_complex_query = response.json().get("errors") + return response.status_code == 429 or 500 <= response.status_code < 600 or is_complex_query + + @property + def retry_factor(self) -> int: + return 15 + + def request_params( + self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, any] = None, next_page_token: Mapping[str, Any] = None + ) -> MutableMapping[str, Any]: + graphql_params = {} + if next_page_token: + graphql_params.update(next_page_token) + + graphql_query = ",".join([f"{k}:{v}" for k, v in graphql_params.items()]) + + # Monday uses a query string to pass in environments + params = {"query": f"query {{ {self.name.lower()} ({graphql_query}) {{ {self.load_schema()} }} }}"} + return params + + def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: + json_response = response.json().get("data", {}) + records = json_response.get(self.name.lower(), []) + yield from records + + def path( + self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None + ) -> str: + return "" + + +class Items(MondayStream): + """ + API Documentation: https://api.developer.monday.com/docs/items-queries + """ + + +class Boards(MondayStream): + """ + API Documentation: https://api.developer.monday.com/docs/groups-queries#groups-queries + """ + + +class Teams(MondayStream): + """ + API Documentation: https://api.developer.monday.com/docs/teams-queries + """ + + +class Updates(MondayStream): + """ + API Documentation: https://api.developer.monday.com/docs/updates-queries + """ + + +class Users(MondayStream): + """ + API Documentation: https://api.developer.monday.com/docs/users-queries-1 + """ + + def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: + pass + + +# Source +class SourceMonday(AbstractSource): + def check_connection(self, logger, config) -> Tuple[bool, any]: + url = "https://api.monday.com/v2" + params = {"query": "{boards(limit:1){id name}}"} + auth = TokenAuthenticator(config["api_token"]).get_auth_header() + try: + response = requests.post(url, params=params, headers=auth) + response.raise_for_status() + return True, None + except requests.exceptions.RequestException as e: + return False, e + + def streams(self, config: Mapping[str, Any]) -> List[Stream]: + auth = TokenAuthenticator(token=config["api_token"]) + return [ + Items(authenticator=auth), + Boards(authenticator=auth), + Teams(authenticator=auth), + Updates(authenticator=auth), + Users(authenticator=auth), + ] diff --git a/airbyte-integrations/connectors/source-monday/source_monday/spec.json b/airbyte-integrations/connectors/source-monday/source_monday/spec.json new file mode 100644 index 0000000000000..870dd5ac587d4 --- /dev/null +++ b/airbyte-integrations/connectors/source-monday/source_monday/spec.json @@ -0,0 +1,17 @@ +{ + "documentationUrl": "https://docsurl.com", + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Monday Spec", + "type": "object", + "required": ["api_token"], + "additionalProperties": false, + "properties": { + "api_token": { + "type": "string", + "description": "This is the API token to authenticate requests to Monday. Profile picture (bottom left) => Admin => API", + "airbyte_secret": true + } + } + } +} diff --git a/airbyte-integrations/connectors/source-monday/unit_tests/__init__.py b/airbyte-integrations/connectors/source-monday/unit_tests/__init__.py new file mode 100644 index 0000000000000..46b7376756ec6 --- /dev/null +++ b/airbyte-integrations/connectors/source-monday/unit_tests/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# diff --git a/airbyte-integrations/connectors/source-monday/unit_tests/conftest.py b/airbyte-integrations/connectors/source-monday/unit_tests/conftest.py new file mode 100644 index 0000000000000..d03c2820311d0 --- /dev/null +++ b/airbyte-integrations/connectors/source-monday/unit_tests/conftest.py @@ -0,0 +1,13 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + +import json + +import pytest + + +@pytest.fixture(scope="session", name="config") +def config_fixture(): + with open("secrets/config.json", "r") as config_file: + return json.load(config_file) diff --git a/airbyte-integrations/connectors/source-monday/unit_tests/test_source.py b/airbyte-integrations/connectors/source-monday/unit_tests/test_source.py new file mode 100644 index 0000000000000..ed8c514aacbf1 --- /dev/null +++ b/airbyte-integrations/connectors/source-monday/unit_tests/test_source.py @@ -0,0 +1,21 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + +from unittest.mock import MagicMock + +from source_monday.source import SourceMonday + + +def test_check_connection(mocker, config): + source = SourceMonday() + logger_mock = MagicMock() + assert source.check_connection(logger_mock, config) == (True, None) + + +def test_stream_count(mocker): + source = SourceMonday() + config_mock = MagicMock() + streams = source.streams(config_mock) + expected_streams_number = 5 + assert len(streams) == expected_streams_number diff --git a/airbyte-integrations/connectors/source-mssql-strict-encrypt/Dockerfile b/airbyte-integrations/connectors/source-mssql-strict-encrypt/Dockerfile index d25e01d0ad9f6..adb13884e8aa4 100644 --- a/airbyte-integrations/connectors/source-mssql-strict-encrypt/Dockerfile +++ b/airbyte-integrations/connectors/source-mssql-strict-encrypt/Dockerfile @@ -8,5 +8,5 @@ COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar RUN tar xf ${APPLICATION}.tar --strip-components=1 -LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.version=0.1.1 LABEL io.airbyte.name=airbyte/source-mssql-strict-encrypt diff --git a/airbyte-integrations/connectors/source-mssql/Dockerfile b/airbyte-integrations/connectors/source-mssql/Dockerfile index 0683c70931345..b73c16413c579 100644 --- a/airbyte-integrations/connectors/source-mssql/Dockerfile +++ b/airbyte-integrations/connectors/source-mssql/Dockerfile @@ -8,5 +8,5 @@ COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar RUN tar xf ${APPLICATION}.tar --strip-components=1 -LABEL io.airbyte.version=0.3.6 +LABEL io.airbyte.version=0.3.8 LABEL io.airbyte.name=airbyte/source-mssql diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcProperties.java b/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcProperties.java index ce1476668304f..50dd8429577b8 100644 --- a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcProperties.java +++ b/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcProperties.java @@ -26,6 +26,9 @@ static Properties getDebeziumProperties() { // https://debezium.io/documentation/reference/1.4/connectors/sqlserver.html#sqlserver-property-provide-transaction-metadata props.setProperty("provide.transaction.metadata", "false"); + props.setProperty("converters", "mssql_converter"); + props.setProperty("mssql_converter.type", "io.airbyte.integrations.debezium.internals.MSSQLConverter"); + return props; } diff --git a/airbyte-integrations/connectors/source-mysql-strict-encrypt/Dockerfile b/airbyte-integrations/connectors/source-mysql-strict-encrypt/Dockerfile index 0710969d42385..d6929a0b4878e 100644 --- a/airbyte-integrations/connectors/source-mysql-strict-encrypt/Dockerfile +++ b/airbyte-integrations/connectors/source-mysql-strict-encrypt/Dockerfile @@ -8,5 +8,5 @@ COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar RUN tar xf ${APPLICATION}.tar --strip-components=1 -LABEL io.airbyte.version=0.1.1 +LABEL io.airbyte.version=0.1.2 LABEL io.airbyte.name=airbyte/source-mysql-strict-encrypt diff --git a/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test/resources/expected_spec.json b/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test/resources/expected_spec.json index 42e1c5104e518..d26dd1d611a6e 100644 --- a/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test/resources/expected_spec.json +++ b/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test/resources/expected_spec.json @@ -46,7 +46,7 @@ "type": "string", "title": "Replication Method", "description": "Replication method to use for extracting data from the database. STANDARD replication requires no setup on the DB side but will not be able to represent deletions incrementally. CDC uses the Binlog to detect inserts, updates, and deletes. This needs to be configured on the source database itself.", - "order": 6, + "order": 7, "default": "STANDARD", "enum": ["STANDARD", "CDC"] } diff --git a/airbyte-integrations/connectors/source-mysql/Dockerfile b/airbyte-integrations/connectors/source-mysql/Dockerfile index b223be5a9f20a..69879005089e2 100644 --- a/airbyte-integrations/connectors/source-mysql/Dockerfile +++ b/airbyte-integrations/connectors/source-mysql/Dockerfile @@ -8,6 +8,6 @@ COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar RUN tar xf ${APPLICATION}.tar --strip-components=1 -LABEL io.airbyte.version=0.4.8 +LABEL io.airbyte.version=0.4.9 LABEL io.airbyte.name=airbyte/source-mysql diff --git a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/source/mysql/MySqlSourceDatatypeTest.java b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/source/mysql/MySqlSourceDatatypeTest.java index 0c3827c89189a..d314fef26ab28 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/source/mysql/MySqlSourceDatatypeTest.java +++ b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/source/mysql/MySqlSourceDatatypeTest.java @@ -102,6 +102,15 @@ protected void initTests() { .addExpectedValues("1") .build()); + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("smallint") + .airbyteType(JsonSchemaPrimitive.NUMBER) + .fullSourceDataType("smallint unsigned") + .addInsertValues("null", "0", "65535") + .addExpectedValues(null, "0", "65535") + .build()); + addDataTypeTestData( TestDataHolder.builder() .sourceType("mediumint") diff --git a/airbyte-integrations/connectors/source-okta/Dockerfile b/airbyte-integrations/connectors/source-okta/Dockerfile index 5dc6182789386..1c554912b8906 100644 --- a/airbyte-integrations/connectors/source-okta/Dockerfile +++ b/airbyte-integrations/connectors/source-okta/Dockerfile @@ -12,5 +12,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.3 +LABEL io.airbyte.version=0.1.4 LABEL io.airbyte.name=airbyte/source-okta diff --git a/airbyte-integrations/connectors/source-okta/acceptance-test-config.yml b/airbyte-integrations/connectors/source-okta/acceptance-test-config.yml index 16df00be02454..1e60fe03ef3a0 100644 --- a/airbyte-integrations/connectors/source-okta/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-okta/acceptance-test-config.yml @@ -12,6 +12,7 @@ tests: basic_read: - config_path: "secrets/config.json" configured_catalog_path: "integration_tests/configured_catalog.json" + empty_streams: ["logs"] full_refresh: - config_path: "secrets/config.json" configured_catalog_path: "integration_tests/configured_catalog.json" diff --git a/airbyte-integrations/connectors/source-okta/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/source-okta/integration_tests/configured_catalog.json index a5fdac3293e30..7c58f625727a2 100644 --- a/airbyte-integrations/connectors/source-okta/integration_tests/configured_catalog.json +++ b/airbyte-integrations/connectors/source-okta/integration_tests/configured_catalog.json @@ -21,6 +21,17 @@ "destination_sync_mode": "overwrite", "cursor_field": ["lastUpdated"], "primary_key": [["id"]] + }, + { + "stream": { + "name": "logs", + "json_schema": {}, + "supported_sync_modes": ["full_refresh", "incremental"] + }, + "sync_mode": "incremental", + "destination_sync_mode": "overwrite", + "cursor_field": ["published"], + "primary_key": [["uuid"]] } ] } diff --git a/airbyte-integrations/connectors/source-okta/source_okta/source.py b/airbyte-integrations/connectors/source-okta/source_okta/source.py index 2bd410c9ae604..c39afa0eac49f 100644 --- a/airbyte-integrations/connectors/source-okta/source_okta/source.py +++ b/airbyte-integrations/connectors/source-okta/source_okta/source.py @@ -90,9 +90,14 @@ def get_updated_state(self, current_stream_state: MutableMapping[str, Any], late ) } - def request_params(self, stream_state=None, **kwargs): + def request_params( + self, + stream_state: Mapping[str, Any], + stream_slice: Mapping[str, any] = None, + next_page_token: Mapping[str, Any] = None, + ) -> MutableMapping[str, Any]: stream_state = stream_state or {} - params = super().request_params(stream_state=stream_state, **kwargs) + params = super().request_params(stream_state, stream_slice, next_page_token) latest_entry = stream_state.get(self.cursor_field) if latest_entry: params["filter"] = f'{self.cursor_field} gt "{latest_entry}"' @@ -108,12 +113,31 @@ def path(self, **kwargs) -> str: class Logs(IncrementalOktaStream): + cursor_field = "published" primary_key = "uuid" def path(self, **kwargs) -> str: return "logs" + def request_params( + self, + stream_state: Mapping[str, Any], + stream_slice: Mapping[str, any] = None, + next_page_token: Mapping[str, Any] = None, + ) -> MutableMapping[str, Any]: + # The log stream use a different params to get data + # https://developer.okta.com/docs/reference/api/system-log/#datetime-filter + stream_state = stream_state or {} + params = { + "limit": self.page_size, + **(next_page_token or {}), + } + latest_entry = stream_state.get(self.cursor_field) + if latest_entry: + params["since"] = latest_entry + return params + class Users(IncrementalOktaStream): cursor_field = "lastUpdated" diff --git a/airbyte-integrations/connectors/source-postgres-strict-encrypt/Dockerfile b/airbyte-integrations/connectors/source-postgres-strict-encrypt/Dockerfile index ae28a1f58fdb6..183fd4031a844 100644 --- a/airbyte-integrations/connectors/source-postgres-strict-encrypt/Dockerfile +++ b/airbyte-integrations/connectors/source-postgres-strict-encrypt/Dockerfile @@ -8,5 +8,5 @@ COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar RUN tar xf ${APPLICATION}.tar --strip-components=1 -LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.version=0.1.1 LABEL io.airbyte.name=airbyte/source-postgres-strict-encrypt diff --git a/airbyte-integrations/connectors/source-postgres/Dockerfile b/airbyte-integrations/connectors/source-postgres/Dockerfile index e671b877ce94f..7702449dd508f 100644 --- a/airbyte-integrations/connectors/source-postgres/Dockerfile +++ b/airbyte-integrations/connectors/source-postgres/Dockerfile @@ -8,5 +8,5 @@ COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar RUN tar xf ${APPLICATION}.tar --strip-components=1 -LABEL io.airbyte.version=0.3.11 +LABEL io.airbyte.version=0.3.13 LABEL io.airbyte.name=airbyte/source-postgres diff --git a/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresCdcProperties.java b/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresCdcProperties.java index eeb22b57b8a0c..8cda9da3468a7 100644 --- a/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresCdcProperties.java +++ b/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresCdcProperties.java @@ -20,6 +20,9 @@ static Properties getDebeziumProperties(final JsonNode config) { props.setProperty("publication.autocreate.mode", "disabled"); + props.setProperty("converters", "datetime"); + props.setProperty("datetime.type", "io.airbyte.integrations.debezium.internals.PostgresConverter"); + return props; } diff --git a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcPostgresSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcPostgresSourceAcceptanceTest.java index c05ac40e69f5b..62124ae28e375 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcPostgresSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcPostgresSourceAcceptanceTest.java @@ -70,6 +70,7 @@ protected void setupEnvironment(final TestDestinationEnv environment) throws Exc .put("username", container.getUsername()) .put("password", container.getPassword()) .put("replication_method", replicationMethod) + .put("ssl", false) .build()); final Database database = Databases.createDatabase( diff --git a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcPostgresSourceDatatypeTest.java b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcPostgresSourceDatatypeTest.java index f1bbfd347bcb7..38f6ae3e47627 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcPostgresSourceDatatypeTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcPostgresSourceDatatypeTest.java @@ -17,10 +17,6 @@ import org.testcontainers.containers.PostgreSQLContainer; import org.testcontainers.utility.MountableFile; -/** - * None of the tests in this class use the cdc path (run the tests and search for `using CDC: false` - * in logs). This is exact same as {@link PostgresSourceAcceptanceTest} - */ public class CdcPostgresSourceDatatypeTest extends AbstractSourceDatabaseTypeTest { private static final String SLOT_NAME_BASE = "debezium_slot"; @@ -54,6 +50,7 @@ protected Database setupDatabase() throws Exception { .put("username", container.getUsername()) .put("password", container.getPassword()) .put("replication_method", replicationMethod) + .put("ssl", false) .build()); final Database database = Databases.createDatabase( @@ -138,25 +135,23 @@ protected void initTests() { .addExpectedValues("1", "32767", "0", "-32767") .build()); - // BUG https://github.com/airbytehq/airbyte/issues/3932 - // BIT type is currently parsed as a Boolean which is incorrect - // addDataTypeTestData( - // TestDataHolder.builder() - // .sourceType("bit") - // .fullSourceDataType("BIT(3)") - // .airbyteType(JsonSchemaPrimitive.NUMBER) - // .addInsertValues("B'101'") - // //.addExpectedValues("101") - // - .build()); + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("bit") + .fullSourceDataType("BIT(3)") + .airbyteType(JsonSchemaPrimitive.NUMBER) + .addInsertValues("B'101'", "B'111'", "null") + .addExpectedValues("101", "111", null) + .build()); - // addDataTypeTestData( - // TestDataHolder.builder() - // .sourceType("bit_varying") - // .fullSourceDataType("BIT VARYING(5)") - // .airbyteType(JsonSchemaPrimitive.NUMBER) - // .addInsertValues("B'101'", "null") - // .addExpectedValues("101", null) - // .build()); + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("bit_varying") + .fullSourceDataType("BIT VARYING(5)") + .airbyteType(JsonSchemaPrimitive.NUMBER) + .addInsertValues("B'101'", "null") + .addExpectedValues("101", null) + .build()); addDataTypeTestData( TestDataHolder.builder() @@ -222,36 +217,28 @@ protected void initTests() { "128.1.0.0/16", "2001:4f8:3:ba::/64") .build()); - // JdbcUtils-> DATE_FORMAT is set as ""yyyy-MM-dd'T'HH:mm:ss'Z'"" so it doesnt suppose to handle BC - // dates - // addDataTypeTestData( - // TestDataHolder.builder() - // .sourceType("date") - // .airbyteType(JsonSchemaPrimitive.STRING) - // .addInsertValues("'1999-01-08'", "null") // "'199-10-10 BC'" - // .addExpectedValues("1999-01-08T00:00:00Z", null) // , "199-10-10 BC") - // .build()); + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("date") + .airbyteType(JsonSchemaPrimitive.STRING) + .addInsertValues("'January 7, 1999'", "'1999-01-08'", "'1/9/1999'", "'January 10, 99 BC'", "'January 11, 99 AD'", "null") + .addExpectedValues("1999-01-07", "1999-01-08", "1999-01-09", "0099-01-10", "1999-01-11", null) + .build()); - // Values "'-Infinity'", "'Infinity'", "'Nan'" will not be parsed due to: - // JdbcUtils -> setJsonField contains: - // case FLOAT, DOUBLE -> o.put(columnName, nullIfInvalid(() -> r.getDouble(i), Double::isFinite)); addDataTypeTestData( TestDataHolder.builder() .sourceType("float8") .airbyteType(JsonSchemaPrimitive.NUMBER) - .addInsertValues("'123'", "'1234567890.1234567'", "null") - .addExpectedValues("123.0", "1.2345678901234567E9", null) + .addInsertValues("'123'", "'1234567890.1234567'", "'-Infinity'", "'Infinity'", "'NaN'", "null") + .addExpectedValues("123.0", "1.2345678901234567E9", "-Infinity", "Infinity", "NaN", null) .build()); - // Values "'-Infinity'", "'Infinity'", "'Nan'" will not be parsed due to: - // JdbcUtils -> setJsonField contains: - // case FLOAT, DOUBLE -> o.put(columnName, nullIfInvalid(() -> r.getDouble(i), Double::isFinite)); addDataTypeTestData( TestDataHolder.builder() .sourceType("float") .airbyteType(JsonSchemaPrimitive.NUMBER) - .addInsertValues("'123'", "'1234567890.1234567'", "null") - .addExpectedValues("123.0", "1.2345678901234567E9", null) + .addInsertValues("'123'", "'1234567890.1234567'", "'-Infinity'", "'Infinity'", "'NaN'", "null") + .addExpectedValues("123.0", "1.2345678901234567E9", "-Infinity", "Infinity", "NaN", null) .build()); addDataTypeTestData( @@ -270,13 +257,15 @@ protected void initTests() { .addExpectedValues(null, "-2147483648", "2147483647") .build()); - // addDataTypeTestData( - // TestDataHolder.builder() - // .sourceType("interval") - // .airbyteType(JsonSchemaPrimitive.STRING) - // .addInsertValues("null", "'P1Y2M3DT4H5M6S'", "'-178000000'", "'178000000'") - // .addExpectedValues(null, "1 year 2 mons 3 days 04:05:06", "-49444:26:40", "49444:26:40") - // .build()); + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("interval") + .airbyteType(JsonSchemaPrimitive.STRING) + .addInsertValues("null", "'P1Y2M3DT4H5M6S'", "'PT4H5M6S'", "'-300'", "'-178000000'", + "'178000000'", "'1-2'", "'3 4:05:06'", "'P0002-02-03T04:05:06'") + .addExpectedValues(null, "1 year 2 mons 3 days 04:05:06", "04:05:06", "-00:05:00", "-49444:26:40", + "49444:26:40", "1 year 2 mons 00:00:00", "3 days 04:05:06", "2 year 2 mons 3 days 04:05:06") + .build()); addDataTypeTestData( TestDataHolder.builder() @@ -313,39 +302,35 @@ protected void initTests() { "08:00:2b:01:02:03:04:07") .build()); - // The Money type fails when amount is > 1,000. in JdbcUtils-> rowToJson as r.getObject(i); - // Bad value for type double : 1,000.01 - // The reason is that in jdbc implementation money type is tried to get as Double (jdbc - // implementation) - // Max values for Money type: "-92233720368547758.08", "92233720368547758.07" - // addDataTypeTestData( - // TestDataHolder.builder() - // .sourceType("money") - // .airbyteType(JsonSchemaPrimitive.STRING) - // .addInsertValues("null", "'999.99'") - // .addExpectedValues(null, "999.99") - // .build()); + // Max values for Money type should be: "-92233720368547758.08", "92233720368547758.07", + // debezium return rounded value for values more than 999999999999999 and less than + // -999999999999999, + // we map these value as null; + // opened issue https://github.com/airbytehq/airbyte/issues/7338 + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("money") + .airbyteType(JsonSchemaPrimitive.STRING) + .addInsertValues("null", "'999.99'", "'1,000.01'", "'-999999999999.99'", "'-999999999999999'", "'999999999999.99'", "'999999999999999'", + "'-92233720368547758.08'", "'92233720368547758.07'") + .addExpectedValues(null, "999.99", "1000.01", "-999999999999.99", "-999999999999999", "999999999999.99", "999999999999999", + null, null) + .build()); - // The numeric type in Postres may contain 'Nan' type, but in JdbcUtils-> rowToJson - // we try to map it like this, so it fails - // case NUMERIC, DECIMAL -> o.put(columnName, nullIfInvalid(() -> r.getBigDecimal(i))); addDataTypeTestData( TestDataHolder.builder() .sourceType("numeric") .airbyteType(JsonSchemaPrimitive.NUMBER) - .addInsertValues("'99999'", "null") - .addExpectedValues("99999", null) + .addInsertValues("'99999'", "'NAN'", null) + .addExpectedValues("99999", "NAN", null) .build()); - // The numeric type in Postres may contain 'Nan' type, but in JdbcUtils-> rowToJson - // we try to map it like this, so it fails - // case NUMERIC, DECIMAL -> o.put(columnName, nullIfInvalid(() -> r.getBigDecimal(i))); addDataTypeTestData( TestDataHolder.builder() .sourceType("decimal") .airbyteType(JsonSchemaPrimitive.NUMBER) - .addInsertValues("99999", "5.1", "0", "null") - .addExpectedValues("99999", "5.1", "0", null) + .addInsertValues("99999", "5.1", "0", "'NAN'", "null") + .addExpectedValues("99999", "5.1", "0", "NAN", null) .build()); addDataTypeTestData( @@ -353,8 +338,8 @@ protected void initTests() { .sourceType("numeric") .fullSourceDataType("numeric(13,4)") .airbyteType(JsonSchemaPrimitive.NUMBER) - .addInsertValues("0.1880", "10.0000", "5213.3468", "null") - .addExpectedValues("0.1880", "10.0000", "5213.3468", null) + .addInsertValues("0.1880", "10.0000", "5213.3468", "'NAN'", "null") + .addExpectedValues("0.1880", "10.0000", "5213.3468", "NAN", null) .build()); addDataTypeTestData( @@ -374,51 +359,45 @@ protected void initTests() { .addExpectedValues("a", "abc", "Миші йдуть;", "櫻花分店", "", null, "\\xF0\\x9F\\x9A\\x80") .build()); - // JdbcUtils-> DATE_FORMAT is set as ""yyyy-MM-dd'T'HH:mm:ss'Z'"" for both Date and Time types. - // So Time only (04:05:06) would be represented like "1970-01-01T04:05:06Z" which is incorrect addDataTypeTestData( TestDataHolder.builder() .sourceType("time") .airbyteType(JsonSchemaPrimitive.STRING) - .addInsertValues("null") - .addNullExpectedValue() + .addInsertValues("null", "'04:05:06'", "'2021-04-12 05:06:07'", "'04:05 PM'") + .addExpectedValues(null, "04:05:06", "05:06:07", "16:05:00") .build()); - // JdbcUtils-> DATE_FORMAT is set as ""yyyy-MM-dd'T'HH:mm:ss'Z'"" for both Date and Time types. - // So Time only (04:05:06) would be represented like "1970-01-01T04:05:06Z" which is incorrect addDataTypeTestData( TestDataHolder.builder() .sourceType("timetz") .airbyteType(JsonSchemaPrimitive.STRING) - .addInsertValues("null") - .addNullExpectedValue() - .build()); - - // addDataTypeTestData( - // TestDataHolder.builder() - // .sourceType("timestamp") - // .airbyteType(JsonSchemaPrimitive.STRING) - // .addInsertValues("TIMESTAMP '2004-10-19 10:23:54'", "null") - // .addExpectedValues("2004-10-19T10:23:54Z", null) - // .build()); - - // May be run locally, but correct the timezone aacording to your location - // addDataTypeTestData( - // TestDataHolder.builder() - // .sourceType("timestamptz") - // .airbyteType(JsonSchemaPrimitive.STRING) - // .addInsertValues("TIMESTAMP '2004-10-19 10:23:54+02'", "null") - // .addExpectedValues("2004-10-19T07:23:54Z", null) - // .build()); - - // addDataTypeTestData( - // TestDataHolder.builder() - // .sourceType("tsvector") - // .airbyteType(JsonSchemaPrimitive.STRING) - // .addInsertValues("to_tsvector('The quick brown fox jumped over the lazy dog.')") - // .addExpectedValues( - // "'brown':3 'dog':9 'fox':4 'jumped':5 'lazy':8 'over':6 'quick':2 'the':1,7") - // .build()); + .addInsertValues("null", "'04:05:06+03'", "'2021-04-12 05:06:07+00'", "'060708-03'") + .addExpectedValues(null, "04:05:06+03", "05:06:07+00", "06:07:08-03") + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("timestamp") + .airbyteType(JsonSchemaPrimitive.STRING) + .addInsertValues("TIMESTAMP '2004-10-19 10:23:54'", "null") + .addExpectedValues("2004-10-19T10:23:54Z", null) + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("timestamptz") + .airbyteType(JsonSchemaPrimitive.STRING) + .addInsertValues("TIMESTAMP WITH TIME ZONE '2004-10-19 10:23:54+03'", "null") + .addExpectedValues("2004-10-19T07:23:54Z", null) + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("tsvector") + .airbyteType(JsonSchemaPrimitive.STRING) + .addInsertValues("to_tsvector('The quick brown fox jumped over the lazy dog.')") + .addExpectedValues("'brown':3 'dog':9 'fox':4 'jumped':5 'lazy':8 'over':6 'quick':2 'the':1,7") + .build()); addDataTypeTestData( TestDataHolder.builder() @@ -456,13 +435,13 @@ protected void initTests() { .addExpectedValues("[\"10000\",\"10000\",\"10000\",\"10000\"]", null) .build()); - // addDataTypeTestData( - // TestDataHolder.builder() - // .sourceType("inventory_item") - // .airbyteType(JsonSchemaPrimitive.STRING) - // .addInsertValues("ROW('fuzzy dice', 42, 1.99)", "null") - // .addExpectedValues("(\"fuzzy dice\",42,1.99)", null) - // .build()); + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("inventory_item") + .airbyteType(JsonSchemaPrimitive.STRING) + .addInsertValues("ROW('fuzzy dice', 42, 1.99)", "null") + .addExpectedValues("(\"fuzzy dice\",42,1.99)", null) + .build()); addDataTypeTestData( TestDataHolder.builder() @@ -472,62 +451,62 @@ protected void initTests() { .addExpectedValues("(\"2010-01-01 14:30:00\",\"2010-01-01 15:30:00\")", null) .build()); - // addDataTypeTestData( - // TestDataHolder.builder() - // .sourceType("box") - // .airbyteType(JsonSchemaPrimitive.STRING) - // .addInsertValues("'((3,7),(15,18))'", "'((0,0),(0,0))'", "null") - // .addExpectedValues("(15,18),(3,7)", "(0,0),(0,0)", null) - // .build()); - - // addDataTypeTestData( - // TestDataHolder.builder() - // .sourceType("circle") - // .airbyteType(JsonSchemaPrimitive.STRING) - // .addInsertValues("'(5,7),10'", "'(0,0),0'", "'(-10,-4),10'", "null") - // .addExpectedValues("<(5,7),10>", "<(0,0),0>", "<(-10,-4),10>", null) - // .build()); - - // addDataTypeTestData( - // TestDataHolder.builder() - // .sourceType("line") - // .airbyteType(JsonSchemaPrimitive.STRING) - // .addInsertValues("'{4,5,6}'", "'{0,1,0}'", "null") - // .addExpectedValues("{4,5,6}", "{0,1,0}", null) - // .build()); - - // addDataTypeTestData( - // TestDataHolder.builder() - // .sourceType("lseg") - // .airbyteType(JsonSchemaPrimitive.STRING) - // .addInsertValues("'((3,7),(15,18))'", "'((0,0),(0,0))'", "null") - // .addExpectedValues("[(3,7),(15,18)]", "[(0,0),(0,0)]", null) - // .build()); - - // addDataTypeTestData( - // TestDataHolder.builder() - // .sourceType("path") - // .airbyteType(JsonSchemaPrimitive.STRING) - // .addInsertValues("'((3,7),(15,18))'", "'((0,0),(0,0))'", "null") - // .addExpectedValues("((3,7),(15,18))", "((0,0),(0,0))", null) - // .build()); - - // addDataTypeTestData( - // TestDataHolder.builder() - // .sourceType("point") - // .airbyteType(JsonSchemaPrimitive.NUMBER) - // .addInsertValues("'(3,7)'", "'(0,0)'", "'(999999999999999999999999,0)'", "null") - // .addExpectedValues("(3,7)", "(0,0)", "(1e+24,0)", null) - // .build()); - - // addDataTypeTestData( - // TestDataHolder.builder() - // .sourceType("polygon") - // .airbyteType(JsonSchemaPrimitive.STRING) - // .addInsertValues("'((3,7),(15,18))'", "'((0,0),(0,0))'", - // "'((0,0),(999999999999999999999999,0))'", "null") - // .addExpectedValues("((3,7),(15,18))", "((0,0),(0,0))", "((0,0),(1e+24,0))", null) - // .build()); + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("box") + .airbyteType(JsonSchemaPrimitive.STRING) + .addInsertValues("'((3,7),(15,18))'", "'((0,0),(0,0))'", "null") + .addExpectedValues("(15.0,18.0),(3.0,7.0)", "(0.0,0.0),(0.0,0.0)", null) + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("circle") + .airbyteType(JsonSchemaPrimitive.STRING) + .addInsertValues("'(5,7),10'", "'(0,0),0'", "'(-10,-4),10'", "null") + .addExpectedValues("<(5.0,7.0),10.0>", "<(0.0,0.0),0.0>", "<(-10.0,-4.0),10.0>", null) + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("line") + .airbyteType(JsonSchemaPrimitive.STRING) + .addInsertValues("'{4,5,6}'", "'{0,1,0}'", "null") + .addExpectedValues("{4.0,5.0,6.0}", "{0.0,1.0,0.0}", null) + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("lseg") + .airbyteType(JsonSchemaPrimitive.STRING) + .addInsertValues("'((3,7),(15,18))'", "'((0,0),(0,0))'", "null") + .addExpectedValues("[(3.0,7.0),(15.0,18.0)]", "[(0.0,0.0),(0.0,0.0)]", null) + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("path") + .airbyteType(JsonSchemaPrimitive.STRING) + .addInsertValues("'((3,7),(15.5,18.2))'", "'((0,0),(0,0))'", "null") + .addExpectedValues("((3.0,7.0),(15.5,18.2))", "((0.0,0.0),(0.0,0.0))", null) + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("point") + .airbyteType(JsonSchemaPrimitive.NUMBER) + .addInsertValues("'(3,7)'", "'(0,0)'", "'(999999999999999999999999,0)'", "null") + .addExpectedValues("(3.0,7.0)", "(0.0,0.0)", "(1.0E24,0.0)", null) + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("polygon") + .airbyteType(JsonSchemaPrimitive.STRING) + .addInsertValues("'((3,7),(15,18))'", "'((0,0),(0,0))'", + "'((0,0),(999999999999999999999999,0))'", "null") + .addExpectedValues("((3.0,7.0),(15.0,18.0))", "((0.0,0.0),(0.0,0.0))", "((0.0,0.0),(1.0E24,0.0))", null) + .build()); } } diff --git a/airbyte-integrations/connectors/source-recharge/Dockerfile b/airbyte-integrations/connectors/source-recharge/Dockerfile index 4292222431b21..b91a5d04fcafb 100644 --- a/airbyte-integrations/connectors/source-recharge/Dockerfile +++ b/airbyte-integrations/connectors/source-recharge/Dockerfile @@ -12,5 +12,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.3 +LABEL io.airbyte.version=0.1.4 LABEL io.airbyte.name=airbyte/source-recharge diff --git a/airbyte-integrations/connectors/source-recharge/source_recharge/api.py b/airbyte-integrations/connectors/source-recharge/source_recharge/api.py index a7f7bcbfc4934..30e922385db03 100644 --- a/airbyte-integrations/connectors/source-recharge/source_recharge/api.py +++ b/airbyte-integrations/connectors/source-recharge/source_recharge/api.py @@ -56,6 +56,15 @@ def get_stream_data(self, response_data: Any) -> List[dict]: else: return [response_data] + def should_retry(self, response: requests.Response) -> bool: + res = super().should_retry(response) + if res: + return res + + # For some reason, successful responses contains incomplete data + content_length = int(response.headers.get("Content-Length", 0)) + return response.status_code == 200 and content_length > len(response.content) + class IncrementalRechargeStream(RechargeStream, ABC): diff --git a/airbyte-integrations/connectors/source-salesforce/BOOTSTRAP.md b/airbyte-integrations/connectors/source-salesforce/BOOTSTRAP.md index d08b9bdb0f83d..943fb5c4e4f93 100644 --- a/airbyte-integrations/connectors/source-salesforce/BOOTSTRAP.md +++ b/airbyte-integrations/connectors/source-salesforce/BOOTSTRAP.md @@ -10,6 +10,19 @@ There are two types of objects: To query an object, one must use [SOQL](https://developer.salesforce.com/docs/atlas.en-us.api_rest.meta/api_rest/dome_query.htm), Salesforce’s proprietary SQL language. An example might be `SELECT * FROM WHERE SystemModstamp > 2122-01-18T21:18:20.000Z`. +Because the `Salesforce` connector pulls all objects from `Salesforce` dynamically, then all streams are dynamically generated accordingly. +And at the stage of creating a schema for each stream, we understand whether the stream is dynamic or not (if the stream has one of the +following fields: `SystemModstamp`, `LastModifiedDate`, `CreatedDate`, `LoginTime`, then it is dynamic). +Based on this data, for streams that have information about record updates - we filter by `updated at`, and for streams that have information +only about the date of creation of the record (as in the case of streams that have only the `CreatedDate` field) - we filter by `created at`. +And we assign the Cursor as follows: +``` +@property +def cursor_field(self) -> str: + return self.replication_key +``` +`replication_key` is one of the following values: `SystemModstamp`, `LastModifiedDate`, `CreatedDate`, `LoginTime`. + In addition there are two types of APIs exposed by Salesforce: * **[REST API](https://developer.salesforce.com/docs/atlas.en-us.api_rest.meta/api_rest/dome_queryall.htm)**: completely synchronous * **[BULK API](https://developer.salesforce.com/docs/atlas.en-us.api_asynch.meta/api_asynch/queries.htm)**: has larger rate limit allowance (150k objects per day on the standard plan) but is asynchronous and therefore follows a request-poll-wait pattern. diff --git a/airbyte-integrations/connectors/source-salesforce/Dockerfile b/airbyte-integrations/connectors/source-salesforce/Dockerfile index 4779533f45117..47ba807177208 100644 --- a/airbyte-integrations/connectors/source-salesforce/Dockerfile +++ b/airbyte-integrations/connectors/source-salesforce/Dockerfile @@ -12,5 +12,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.2 +LABEL io.airbyte.version=0.1.3 LABEL io.airbyte.name=airbyte/source-salesforce diff --git a/airbyte-integrations/connectors/source-salesforce/integration_tests/configured_catalog_bulk.json b/airbyte-integrations/connectors/source-salesforce/integration_tests/configured_catalog_bulk.json index 69da9893a876b..0088a9218122b 100644 --- a/airbyte-integrations/connectors/source-salesforce/integration_tests/configured_catalog_bulk.json +++ b/airbyte-integrations/connectors/source-salesforce/integration_tests/configured_catalog_bulk.json @@ -80,30 +80,6 @@ "sync_mode": "incremental", "destination_sync_mode": "append" }, - { - "stream": { - "name": "LoginGeo", - "json_schema": {}, - "supported_sync_modes": ["full_refresh", "incremental"], - "source_defined_cursor": true, - "default_cursor_field": ["SystemModstamp"], - "source_defined_primary_key": [["Id"]] - }, - "sync_mode": "incremental", - "destination_sync_mode": "append" - }, - { - "stream": { - "name": "LoginHistory", - "json_schema": {}, - "supported_sync_modes": ["full_refresh", "incremental"], - "source_defined_cursor": true, - "default_cursor_field": ["LoginTime"], - "source_defined_primary_key": [["Id"]] - }, - "sync_mode": "incremental", - "destination_sync_mode": "append" - }, { "stream": { "name": "PermissionSetTabSetting", diff --git a/airbyte-integrations/connectors/source-salesforce/integration_tests/configured_catalog_rest.json b/airbyte-integrations/connectors/source-salesforce/integration_tests/configured_catalog_rest.json index bdf4425c618f5..c1d410e37bf9c 100644 --- a/airbyte-integrations/connectors/source-salesforce/integration_tests/configured_catalog_rest.json +++ b/airbyte-integrations/connectors/source-salesforce/integration_tests/configured_catalog_rest.json @@ -70,30 +70,6 @@ "sync_mode": "incremental", "destination_sync_mode": "append" }, - { - "stream": { - "name": "LoginGeo", - "json_schema": {}, - "supported_sync_modes": ["full_refresh", "incremental"], - "source_defined_cursor": true, - "default_cursor_field": ["SystemModstamp"], - "source_defined_primary_key": [["Id"]] - }, - "sync_mode": "incremental", - "destination_sync_mode": "append" - }, - { - "stream": { - "name": "LoginHistory", - "json_schema": {}, - "supported_sync_modes": ["full_refresh", "incremental"], - "source_defined_cursor": true, - "default_cursor_field": ["LoginTime"], - "source_defined_primary_key": [["Id"]] - }, - "sync_mode": "incremental", - "destination_sync_mode": "append" - }, { "stream": { "name": "PermissionSetTabSetting", diff --git a/airbyte-integrations/connectors/source-salesforce/source_salesforce/spec.json b/airbyte-integrations/connectors/source-salesforce/source_salesforce/spec.json index a676b2a0674b6..a167a53fc2c4d 100644 --- a/airbyte-integrations/connectors/source-salesforce/source_salesforce/spec.json +++ b/airbyte-integrations/connectors/source-salesforce/source_salesforce/spec.json @@ -28,7 +28,7 @@ "airbyte_secret": true }, "start_date": { - "description": "UTC date and time in the format 2017-01-25T00:00:00Z. Any data before this date will not be replicated.", + "description": "UTC date and time in the format 2017-01-25T00:00:00Z. Any data before this date will not be replicated. This field uses the \"updated\" field if available, otherwise the \"created\" fields if they are available for a stream.", "type": "string", "pattern": "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$", "examples": ["2021-07-25T00:00:00Z"] diff --git a/airbyte-integrations/connectors/source-salesforce/source_salesforce/streams.py b/airbyte-integrations/connectors/source-salesforce/source_salesforce/streams.py index b668bf0c965f4..3c5d44d7e115e 100644 --- a/airbyte-integrations/connectors/source-salesforce/source_salesforce/streams.py +++ b/airbyte-integrations/connectors/source-salesforce/source_salesforce/streams.py @@ -63,7 +63,7 @@ def request_params( selected_properties = { key: value for key, value in selected_properties.items() - if not (("format" in value and value["format"] == "base64") or "object" in value["type"]) + if not (("format" in value and value["format"] == "base64") or ("object" in value["type"] and len(value["type"]) < 3)) } query = f"SELECT {','.join(selected_properties.keys())} FROM {self.name} " @@ -179,13 +179,7 @@ def transform_types(field_types: list = None): """ Convert Jsonschema data types to Python data types. """ - convert_types_map = { - "boolean": bool, - "string": str, - "number": float, - "integer": int, - "object": dict, - } + convert_types_map = {"boolean": bool, "string": str, "number": float, "integer": int, "object": dict, "array": list} return [convert_types_map[field_type] for field_type in field_types if field_type != "null"] for key, value in record.items(): @@ -279,7 +273,7 @@ def request_params( selected_properties = { key: value for key, value in selected_properties.items() - if not (("format" in value and value["format"] == "base64") or "object" in value["type"]) + if not (("format" in value and value["format"] == "base64") or ("object" in value["type"] and len(value["type"]) < 3)) } stream_date = stream_state.get(self.cursor_field) diff --git a/airbyte-integrations/connectors/source-sentry/.dockerignore b/airbyte-integrations/connectors/source-sentry/.dockerignore new file mode 100644 index 0000000000000..a5ed66c554120 --- /dev/null +++ b/airbyte-integrations/connectors/source-sentry/.dockerignore @@ -0,0 +1,7 @@ +* +!Dockerfile +!Dockerfile.test +!main.py +!source_sentry +!setup.py +!secrets diff --git a/airbyte-integrations/connectors/source-sentry/.gitignore b/airbyte-integrations/connectors/source-sentry/.gitignore new file mode 100644 index 0000000000000..d6e830be95797 --- /dev/null +++ b/airbyte-integrations/connectors/source-sentry/.gitignore @@ -0,0 +1 @@ +.python-version \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-sentry/Dockerfile b/airbyte-integrations/connectors/source-sentry/Dockerfile new file mode 100644 index 0000000000000..f743e7e20a611 --- /dev/null +++ b/airbyte-integrations/connectors/source-sentry/Dockerfile @@ -0,0 +1,38 @@ +FROM python:3.7.11-alpine3.14 as base + +# build and load all requirements +FROM base as builder +WORKDIR /airbyte/integration_code + +# upgrade pip to the latest version +RUN apk --no-cache upgrade \ + && pip install --upgrade pip \ + && apk --no-cache add tzdata build-base + + +COPY setup.py ./ +# install necessary packages to a temporary folder +RUN pip install --prefix=/install . + +# build a clean environment +FROM base +WORKDIR /airbyte/integration_code + +# copy all loaded and built libraries to a pure basic image +COPY --from=builder /install /usr/local +# add default timezone settings +COPY --from=builder /usr/share/zoneinfo/Etc/UTC /etc/localtime +RUN echo "Etc/UTC" > /etc/timezone + +# bash is installed for more convenient debugging. +RUN apk --no-cache add bash + +# copy payload code only +COPY main.py ./ +COPY source_sentry ./source_sentry + +ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" +ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] + +LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.name=airbyte/source-sentry diff --git a/airbyte-integrations/connectors/source-sentry/README.md b/airbyte-integrations/connectors/source-sentry/README.md new file mode 100644 index 0000000000000..bb0502804de66 --- /dev/null +++ b/airbyte-integrations/connectors/source-sentry/README.md @@ -0,0 +1,132 @@ +# Sentry Source + +This is the repository for the Sentry source connector, written in Python. +For information about how to use this connector within Airbyte, see [the documentation](https://docs.airbyte.io/integrations/sources/sentry). + +## Local development + +### Prerequisites +**To iterate on this connector, make sure to complete this prerequisites section.** + +#### Minimum Python version required `= 3.7.0` + +#### Build & Activate Virtual Environment and install dependencies +From this connector directory, create a virtual environment: +``` +python -m venv .venv +``` + +This will generate a virtualenv for this module in `.venv/`. Make sure this venv is active in your +development environment of choice. To activate it from the terminal, run: +``` +source .venv/bin/activate +pip install -r requirements.txt +pip install '.[tests]' +``` +If you are in an IDE, follow your IDE's instructions to activate the virtualenv. + +Note that while we are installing dependencies from `requirements.txt`, you should only edit `setup.py` for your dependencies. `requirements.txt` is +used for editable installs (`pip install -e`) to pull in Python dependencies from the monorepo and will call `setup.py`. +If this is mumbo jumbo to you, don't worry about it, just put your deps in `setup.py` but install using `pip install -r requirements.txt` and everything +should work as you expect. + +#### Building via Gradle +You can also build the connector in Gradle. This is typically used in CI and not needed for your development workflow. + +To build using Gradle, from the Airbyte repository root, run: +``` +./gradlew :airbyte-integrations:connectors:source-sentry:build +``` + +#### Create credentials +**If you are a community contributor**, follow the instructions in the [documentation](https://docs.airbyte.io/integrations/sources/sentry) +to generate the necessary credentials. Then create a file `secrets/config.json` conforming to the `source_sentry/spec.json` file. +Note that any directory named `secrets` is gitignored across the entire Airbyte repo, so there is no danger of accidentally checking in sensitive information. +See `integration_tests/sample_config.json` for a sample config file. + +**If you are an Airbyte core member**, copy the credentials in Lastpass under the secret name `source sentry test creds` +and place them into `secrets/config.json`. + +### Locally running the connector +``` +python main.py spec +python main.py check --config secrets/config.json +python main.py discover --config secrets/config.json +python main.py read --config secrets/config.json --catalog integration_tests/configured_catalog.json +``` + +### Locally running the connector docker image + +#### Build +First, make sure you build the latest Docker image: +``` +docker build . -t airbyte/source-sentry:dev +``` + +You can also build the connector image via Gradle: +``` +./gradlew :airbyte-integrations:connectors:source-sentry:airbyteDocker +``` +When building via Gradle, the docker image name and tag, respectively, are the values of the `io.airbyte.name` and `io.airbyte.version` `LABEL`s in +the Dockerfile. + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/source-sentry:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-sentry:dev check --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-sentry:dev discover --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/source-sentry:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json +``` +## Testing +Make sure to familiarize yourself with [pytest test discovery](https://docs.pytest.org/en/latest/goodpractices.html#test-discovery) to know how your test files and methods should be named. +First install test dependencies into your virtual environment: +``` +pip install .[tests] +``` +### Unit Tests +To run unit tests locally, from the connector directory run: +``` +python -m pytest unit_tests +``` + +### Integration Tests +There are two types of integration tests: Acceptance Tests (Airbyte's test suite for all source connectors) and custom integration tests (which are specific to this connector). +#### Custom Integration tests +Place custom tests inside `integration_tests/` folder, then, from the connector root, run +``` +python -m pytest integration_tests +``` +#### Acceptance Tests +Customize `acceptance-test-config.yml` file to configure tests. See [Source Acceptance Tests](https://docs.airbyte.io/connector-development/testing-connectors/source-acceptance-tests-reference) for more information. +If your connector requires to create or destroy resources for use during acceptance tests create fixtures for it and place them inside integration_tests/acceptance.py. +To run your integration tests with acceptance tests, from the connector root, run +``` +python -m pytest integration_tests -p integration_tests.acceptance +``` +To run your integration tests with docker + +### Using gradle to run tests +All commands should be run from airbyte project root. +To run unit tests: +``` +./gradlew :airbyte-integrations:connectors:source-sentry:unitTest +``` +To run acceptance and custom integration tests: +``` +./gradlew :airbyte-integrations:connectors:source-sentry:integrationTest +``` + +## Dependency Management +All of your dependencies should go in `setup.py`, NOT `requirements.txt`. The requirements file is only used to connect internal Airbyte dependencies in the monorepo for local development. +We split dependencies between two groups, dependencies that are: +* required for your connector to work need to go to `MAIN_REQUIREMENTS` list. +* required for the testing need to go to `TEST_REQUIREMENTS` list + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing unit and integration tests. +1. Bump the connector version in `Dockerfile` -- just increment the value of the `LABEL io.airbyte.version` appropriately (we use [SemVer](https://semver.org/)). +1. Create a Pull Request. +1. Pat yourself on the back for being an awesome contributor. +1. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. diff --git a/airbyte-integrations/connectors/source-sentry/acceptance-test-config.yml b/airbyte-integrations/connectors/source-sentry/acceptance-test-config.yml new file mode 100644 index 0000000000000..a36b40014c1b2 --- /dev/null +++ b/airbyte-integrations/connectors/source-sentry/acceptance-test-config.yml @@ -0,0 +1,18 @@ +connector_image: airbyte/source-sentry:dev +tests: + spec: + - spec_path: "source_sentry/spec.json" + connection: + - config_path: "secrets/config.json" + status: "succeed" + - config_path: "integration_tests/invalid_config.json" + status: "failed" + discovery: + - config_path: "secrets/config.json" + basic_read: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" + empty_streams: [] + full_refresh: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" diff --git a/airbyte-integrations/connectors/source-sentry/acceptance-test-docker.sh b/airbyte-integrations/connectors/source-sentry/acceptance-test-docker.sh new file mode 100644 index 0000000000000..e4d8b1cef8961 --- /dev/null +++ b/airbyte-integrations/connectors/source-sentry/acceptance-test-docker.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env sh + +# Build latest connector image +docker build . -t $(cat acceptance-test-config.yml | grep "connector_image" | head -n 1 | cut -d: -f2) + +# Pull latest acctest image +docker pull airbyte/source-acceptance-test:latest + +# Run +docker run --rm -it \ + -v /var/run/docker.sock:/var/run/docker.sock \ + -v /tmp:/tmp \ + -v $(pwd):/test_input \ + airbyte/source-acceptance-test \ + --acceptance-test-config /test_input + diff --git a/airbyte-integrations/connectors/source-sentry/bootstrap.md b/airbyte-integrations/connectors/source-sentry/bootstrap.md new file mode 100644 index 0000000000000..7f1e939a53901 --- /dev/null +++ b/airbyte-integrations/connectors/source-sentry/bootstrap.md @@ -0,0 +1,16 @@ +## Streams + +Sentry is a REST API. Connector has the following streams, and all of them support full refresh only. + +* [Events](https://docs.sentry.io/api/events/list-a-projects-events/) +* [Issues](https://docs.sentry.io/api/events/list-a-projects-issues/) + +And a [ProjectDetail](https://docs.sentry.io/api/projects/retrieve-a-project/) stream is also implemented just for connection checking. + +## Authentication + +Sentry API offers three types of [authentication methods](https://docs.sentry.io/api/auth/). + +* Auth Token - The most common authentication method in Sentry. Connector only supports this method. +* DSN Authentication - Only some API endpoints support this method. Not supported by this connector. +* API Keys - Keys are passed using HTTP Basic auth, and a legacy means of authenticating. They will still be supported but are disabled for new accounts. Not supported by this connector. \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-sentry/build.gradle b/airbyte-integrations/connectors/source-sentry/build.gradle new file mode 100644 index 0000000000000..ba18928d4c3b1 --- /dev/null +++ b/airbyte-integrations/connectors/source-sentry/build.gradle @@ -0,0 +1,14 @@ +plugins { + id 'airbyte-python' + id 'airbyte-docker' + id 'airbyte-source-acceptance-test' +} + +airbytePython { + moduleDirectory 'source_sentry' +} + +dependencies { + implementation files(project(':airbyte-integrations:bases:source-acceptance-test').airbyteDocker.outputs) + implementation files(project(':airbyte-integrations:bases:base-python').airbyteDocker.outputs) +} diff --git a/airbyte-integrations/connectors/source-sentry/integration_tests/__init__.py b/airbyte-integrations/connectors/source-sentry/integration_tests/__init__.py new file mode 100644 index 0000000000000..46b7376756ec6 --- /dev/null +++ b/airbyte-integrations/connectors/source-sentry/integration_tests/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# diff --git a/airbyte-integrations/connectors/source-sentry/integration_tests/acceptance.py b/airbyte-integrations/connectors/source-sentry/integration_tests/acceptance.py new file mode 100644 index 0000000000000..108075487440f --- /dev/null +++ b/airbyte-integrations/connectors/source-sentry/integration_tests/acceptance.py @@ -0,0 +1,14 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +import pytest + +pytest_plugins = ("source_acceptance_test.plugin",) + + +@pytest.fixture(scope="session", autouse=True) +def connector_setup(): + """ This fixture is a placeholder for external resources that acceptance test might require.""" + yield diff --git a/airbyte-integrations/connectors/source-sentry/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/source-sentry/integration_tests/configured_catalog.json new file mode 100644 index 0000000000000..ed38985229c58 --- /dev/null +++ b/airbyte-integrations/connectors/source-sentry/integration_tests/configured_catalog.json @@ -0,0 +1,40 @@ +{ + "streams": [ + { + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite", + "stream": { + "name": "events", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + } + }, + { + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite", + "stream": { + "name": "issues", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + } + }, + { + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite", + "stream": { + "name": "project_detail", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + } + }, + { + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite", + "stream": { + "name": "projects", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + } + } + ] +} diff --git a/airbyte-integrations/connectors/source-sentry/integration_tests/invalid_config.json b/airbyte-integrations/connectors/source-sentry/integration_tests/invalid_config.json new file mode 100644 index 0000000000000..7a94e39068564 --- /dev/null +++ b/airbyte-integrations/connectors/source-sentry/integration_tests/invalid_config.json @@ -0,0 +1,6 @@ +{ + "auth_token": "invalid-token", + "hostname": "sentry.io", + "organization": "invalid-organization", + "project": "invalid-project" +} diff --git a/airbyte-integrations/connectors/source-sentry/integration_tests/sample_config.json b/airbyte-integrations/connectors/source-sentry/integration_tests/sample_config.json new file mode 100644 index 0000000000000..f0e080f4be8f2 --- /dev/null +++ b/airbyte-integrations/connectors/source-sentry/integration_tests/sample_config.json @@ -0,0 +1,6 @@ +{ + "auth_token": "token", + "hostname": "sentry.io", + "organization": "organization", + "project": "project" +} diff --git a/airbyte-integrations/connectors/source-sentry/main.py b/airbyte-integrations/connectors/source-sentry/main.py new file mode 100644 index 0000000000000..0bfb3a278aab4 --- /dev/null +++ b/airbyte-integrations/connectors/source-sentry/main.py @@ -0,0 +1,13 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +import sys + +from airbyte_cdk.entrypoint import launch +from source_sentry import SourceSentry + +if __name__ == "__main__": + source = SourceSentry() + launch(source, sys.argv[1:]) diff --git a/airbyte-integrations/connectors/source-sentry/requirements.txt b/airbyte-integrations/connectors/source-sentry/requirements.txt new file mode 100644 index 0000000000000..0411042aa0911 --- /dev/null +++ b/airbyte-integrations/connectors/source-sentry/requirements.txt @@ -0,0 +1,2 @@ +-e ../../bases/source-acceptance-test +-e . diff --git a/airbyte-integrations/connectors/source-sentry/setup.py b/airbyte-integrations/connectors/source-sentry/setup.py new file mode 100644 index 0000000000000..ddf6245c4d992 --- /dev/null +++ b/airbyte-integrations/connectors/source-sentry/setup.py @@ -0,0 +1,29 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +from setuptools import find_packages, setup + +MAIN_REQUIREMENTS = [ + "airbyte-cdk", +] + +TEST_REQUIREMENTS = [ + "pytest~=6.1", + "pytest-mock~=3.6.1", + "source-acceptance-test", +] + +setup( + name="source_sentry", + description="Source implementation for Sentry.", + author="Airbyte", + author_email="contact@airbyte.io", + packages=find_packages(), + install_requires=MAIN_REQUIREMENTS, + package_data={"": ["*.json", "schemas/*.json", "schemas/shared/*.json"]}, + extras_require={ + "tests": TEST_REQUIREMENTS, + }, +) diff --git a/airbyte-integrations/connectors/source-sentry/source_sentry/__init__.py b/airbyte-integrations/connectors/source-sentry/source_sentry/__init__.py new file mode 100644 index 0000000000000..3435dba8e0726 --- /dev/null +++ b/airbyte-integrations/connectors/source-sentry/source_sentry/__init__.py @@ -0,0 +1,8 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +from .source import SourceSentry + +__all__ = ["SourceSentry"] diff --git a/airbyte-integrations/connectors/source-sentry/source_sentry/schemas/events.json b/airbyte-integrations/connectors/source-sentry/source_sentry/schemas/events.json new file mode 100644 index 0000000000000..59345c95f7d4e --- /dev/null +++ b/airbyte-integrations/connectors/source-sentry/source_sentry/schemas/events.json @@ -0,0 +1,72 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "eventID": { + "type": "string" + }, + "tags": { + "type": "array", + "items": { + "type": "object", + "properties": { + "value": { + "type": "string" + }, + "key": { + "type": "string" + } + } + } + }, + "dateCreated": { + "type": "string" + }, + "user": { + "type": ["null", "object"], + "properties": { + "username": { + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + }, + "ip_address": { + "type": ["null", "string"] + }, + "email": { + "type": ["null", "string"] + }, + "data": { + "type": ["null", "object"], + "properties": { + "isStaff": { + "type": "boolean" + } + } + }, + "id": { + "type": "string" + } + } + }, + "message": { + "type": "string" + }, + "id": { + "type": "string" + }, + "platform": { + "type": "string" + }, + "event.type": { + "type": "string" + }, + "groupID": { + "type": "string" + }, + "title": { + "type": "string" + } + } +} diff --git a/airbyte-integrations/connectors/source-sentry/source_sentry/schemas/issues.json b/airbyte-integrations/connectors/source-sentry/source_sentry/schemas/issues.json new file mode 100644 index 0000000000000..d4814ea21498b --- /dev/null +++ b/airbyte-integrations/connectors/source-sentry/source_sentry/schemas/issues.json @@ -0,0 +1,133 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "annotations": { + "type": "array", + "items": { + "type": "string" + } + }, + "assignedTo": { + "type": ["null", "object"] + }, + "count": { + "type": "string" + }, + "culprit": { + "type": "string" + }, + "firstSeen": { + "type": "string" + }, + "hasSeen": { + "type": "boolean" + }, + "id": { + "type": "string" + }, + "isBookmarked": { + "type": "boolean" + }, + "isPublic": { + "type": "boolean" + }, + "isSubscribed": { + "type": "boolean" + }, + "lastSeen": { + "type": "string" + }, + "level": { + "type": "string" + }, + "logger": { + "type": ["null", "string"] + }, + "metadata": { + "anyOf": [ + { + "type": "object", + "properties": { + "title": { + "type": "string" + } + } + }, + { + "type": "object", + "properties": { + "filename": { + "type": "string" + }, + "type": { + "type": "string" + }, + "value": { + "type": "string" + } + } + } + ] + }, + "numComments": { + "type": "integer" + }, + "permalink": { + "type": "string" + }, + "project": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "name": { + "type": "string" + }, + "slug": { + "type": "string" + } + } + }, + "shareId": { + "type": ["null", "string"] + }, + "shortId": { + "type": "string" + }, + "stats": { + "type": "object", + "properties": { + "24h": { + "type": "array", + "items": { + "type": "array", + "items": { + "type": "number" + } + } + } + } + }, + "status": { + "type": "string", + "enum": ["resolved", "unresolved", "ignored"] + }, + "statusDetails": { + "type": "object" + }, + "subscriptionDetails": { + "type": ["null", "object"] + }, + "title": { + "type": "string" + }, + "type": { + "type": "string" + }, + "userCount": { + "type": "integer" + } + } +} diff --git a/airbyte-integrations/connectors/source-sentry/source_sentry/schemas/project_detail.json b/airbyte-integrations/connectors/source-sentry/source_sentry/schemas/project_detail.json new file mode 100644 index 0000000000000..efb12e70ffbc8 --- /dev/null +++ b/airbyte-integrations/connectors/source-sentry/source_sentry/schemas/project_detail.json @@ -0,0 +1,402 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "allowedDomains": { + "type": "array", + "items": { + "type": "string" + } + }, + "avatar": { + "type": "object", + "properties": { + "avatarType": { + "type": "string" + }, + "avatarUuid": { + "type": ["null", "string"] + } + } + }, + "color": { + "type": "string" + }, + "dataScrubber": { + "type": "boolean" + }, + "dataScrubberDefaults": { + "type": "boolean" + }, + "dateCreated": { + "type": "string" + }, + "defaultEnvironment": { + "type": ["null", "string"] + }, + "digestsMaxDelay": { + "type": "integer" + }, + "digestsMinDelay": { + "type": "integer" + }, + "features": { + "type": "array", + "items": { + "type": "string" + } + }, + "firstEvent": { + "type": ["null", "string"] + }, + "hasAccess": { + "type": "boolean" + }, + "id": { + "type": "string" + }, + "isBookmarked": { + "type": "boolean" + }, + "isInternal": { + "type": "boolean" + }, + "isMember": { + "type": "boolean" + }, + "isPublic": { + "type": "boolean" + }, + "latestRelease": { + "type": ["null", "object"], + "properties": { + "authors": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "email": { + "type": "string" + } + } + } + }, + "commitCount": { + "type": "integer" + }, + "data": { + "type": "object" + }, + "dateCreated": { + "type": "string" + }, + "dateReleased": { + "type": ["null", "string"] + }, + "deployCount": { + "type": "integer" + }, + "firstEvent": { + "type": ["null", "string"] + }, + "lastCommit": { + "type": ["null", "object"] + }, + "lastDeploy": { + "type": ["null", "object"] + }, + "lastEvent": { + "type": ["null", "string"] + }, + "newGroups": { + "type": "integer" + }, + "owner": { + "type": ["null", "string"] + }, + "projects": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "slug": { + "type": "string" + } + } + } + }, + "ref": { + "type": ["null", "string"] + }, + "shortVersion": { + "type": "string" + }, + "url": { + "type": ["null", "string"] + }, + "version": { + "type": "string" + } + } + }, + "name": { + "type": "string" + }, + "options": { + "type": "object", + "properties": { + "feedback:branding": { + "type": "boolean" + }, + "filters:blacklisted_ips": { + "type": "string" + }, + "filters:error_messages": { + "type": "string" + }, + "filters:releases": { + "type": "string" + }, + "sentry:csp_ignored_sources": { + "type": "string" + }, + "sentry:csp_ignored_sources_defaults": { + "type": "boolean" + }, + "sentry:reprocessing_active": { + "type": "boolean" + } + } + }, + "organization": { + "type": "object", + "properties": { + "avatar": { + "type": "object", + "properties": { + "avatarType": { + "type": "string" + }, + "avatarUuid": { + "type": ["null", "string"] + } + } + }, + "dateCreated": { + "type": "string", + "format": "date-time" + }, + "id": { + "type": "string" + }, + "isEarlyAdopter": { + "type": "boolean" + }, + "name": { + "type": "string" + }, + "require2FA": { + "type": "boolean" + }, + "slug": { + "type": "string" + }, + "status": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "name": { + "type": "string" + } + } + } + } + }, + "platform": { + "type": ["null", "string"] + }, + "platforms": { + "type": "array", + "items": { + "type": "string" + } + }, + "plugins": { + "type": "array", + "items": { + "type": "object", + "properties": { + "assets": { + "type": "array", + "items": { + "type": "string" + } + }, + "author": { + "type": ["null", "object"], + "properties": { + "name": { + "type": "string" + }, + "url": { + "type": "string" + } + } + }, + "canDisable": { + "type": "boolean" + }, + "contexts": { + "type": "array", + "items": { + "type": "string" + } + }, + "description": { + "type": "string" + }, + "doc": { + "type": "string" + }, + "enabled": { + "type": "boolean" + }, + "hasConfiguration": { + "type": "boolean" + }, + "id": { + "type": "string" + }, + "isTestable": { + "type": "boolean" + }, + "metadata": { + "type": "object" + }, + "name": { + "type": "string" + }, + "resourceLinks": { + "type": ["null", "array"], + "items": { + "type": "object", + "properties": { + "title": { + "type": "string" + }, + "url": { + "type": "string" + } + } + } + }, + "shortName": { + "type": "string" + }, + "slug": { + "type": "string" + }, + "status": { + "type": "string" + }, + "type": { + "type": "string" + }, + "version": { + "type": ["null", "string"] + } + } + } + }, + "processingIssues": { + "type": "integer" + }, + "relayPiiConfig": { + "type": ["null", "string"] + }, + "resolveAge": { + "type": "integer" + }, + "safeFields": { + "type": "array", + "items": { + "type": "string" + } + }, + "scrapeJavaScript": { + "type": "boolean" + }, + "scrubIPAddresses": { + "type": "boolean" + }, + "securityToken": { + "type": "string" + }, + "securityTokenHeader": { + "type": ["null", "string"] + }, + "sensitiveFields": { + "type": "array", + "items": { + "type": "string" + } + }, + "slug": { + "type": "string" + }, + "status": { + "type": "string" + }, + "storeCrashReports": { + "type": ["null", "boolean"] + }, + "subjectPrefix": { + "type": "string" + }, + "subjectTemplate": { + "type": "string" + }, + "team": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "name": { + "type": "string" + }, + "slug": { + "type": "string" + } + } + }, + "teams": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "name": { + "type": "string" + }, + "slug": { + "type": "string" + } + } + } + }, + "verifySSL": { + "type": "boolean" + } + } +} diff --git a/airbyte-integrations/connectors/source-sentry/source_sentry/schemas/projects.json b/airbyte-integrations/connectors/source-sentry/source_sentry/schemas/projects.json new file mode 100644 index 0000000000000..3656b0b27c2a9 --- /dev/null +++ b/airbyte-integrations/connectors/source-sentry/source_sentry/schemas/projects.json @@ -0,0 +1,119 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "avatar": { + "type": "object", + "properties": { + "avatarType": { + "type": "string" + }, + "avatarUuid": { + "type": ["null", "string"] + } + } + }, + "color": { + "type": "string" + }, + "dateCreated": { + "type": "string", + "format": "date-time" + }, + "features": { + "type": "array", + "items": { + "type": "string" + } + }, + "firstEvent": { + "type": ["null", "string"] + }, + "hasAccess": { + "type": "boolean" + }, + "id": { + "type": "string" + }, + "isBookmarked": { + "type": "boolean" + }, + "isInternal": { + "type": "boolean" + }, + "isMember": { + "type": "boolean" + }, + "isPublic": { + "type": "boolean" + }, + "name": { + "type": "string" + }, + "organization": { + "type": "object", + "properties": { + "avatar": { + "type": "object", + "properties": { + "avatarType": { + "type": "string" + }, + "avatarUuid": { + "type": ["null", "string"] + } + } + }, + "dateCreated": { + "type": "string", + "format": "date-time" + }, + "id": { + "type": "string" + }, + "isEarlyAdopter": { + "type": "boolean" + }, + "name": { + "type": "string" + }, + "require2FA": { + "type": "boolean" + }, + "slug": { + "type": "string" + }, + "status": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "name": { + "type": "string" + } + } + }, + "requireEmailVerification": { + "type": "boolean" + }, + "features": { + "type": "array", + "items": { + "type": "string" + } + } + } + }, + "platform": { + "type": ["null", "string"] + }, + "slug": { + "type": "string" + }, + "status": { + "type": "string", + "enum": ["active", "disabled", "pending_deletion", "deletion_in_progress"] + } + } +} diff --git a/airbyte-integrations/connectors/source-sentry/source_sentry/source.py b/airbyte-integrations/connectors/source-sentry/source_sentry/source.py new file mode 100644 index 0000000000000..9eb192da02627 --- /dev/null +++ b/airbyte-integrations/connectors/source-sentry/source_sentry/source.py @@ -0,0 +1,44 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +from typing import Any, List, Mapping, Tuple + +from airbyte_cdk.models import SyncMode +from airbyte_cdk.sources import AbstractSource +from airbyte_cdk.sources.streams import Stream +from airbyte_cdk.sources.streams.http.auth import TokenAuthenticator + +from .streams import Events, Issues, ProjectDetail, Projects + + +# Source +class SourceSentry(AbstractSource): + def check_connection(self, logger, config) -> Tuple[bool, Any]: + try: + projects_stream = Projects( + authenticator=TokenAuthenticator(token=config["auth_token"]), + hostname=config.get("hostname"), + ) + next(projects_stream.read_records(sync_mode=SyncMode.full_refresh)) + return True, None + except Exception as e: + return False, e + + def streams(self, config: Mapping[str, Any]) -> List[Stream]: + stream_args = { + "authenticator": TokenAuthenticator(token=config["auth_token"]), + "hostname": config.get("hostname"), + } + project_stream_args = { + **stream_args, + "organization": config["organization"], + "project": config["project"], + } + return [ + Events(**project_stream_args), + Issues(**project_stream_args), + ProjectDetail(**project_stream_args), + Projects(**stream_args), + ] diff --git a/airbyte-integrations/connectors/source-sentry/source_sentry/spec.json b/airbyte-integrations/connectors/source-sentry/source_sentry/spec.json new file mode 100644 index 0000000000000..fbfb01e2fcf53 --- /dev/null +++ b/airbyte-integrations/connectors/source-sentry/source_sentry/spec.json @@ -0,0 +1,34 @@ +{ + "documentationUrl": "https://docs.airbyte.io/integrations/sources/sentry", + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Sentry Spec", + "type": "object", + "required": ["auth_token", "organization", "project"], + "additionalProperties": false, + "properties": { + "auth_token": { + "type": "string", + "title": "Authentication tokens", + "description": "Log into Sentry and then create authentication tokens.For self-hosted, you can find or create authentication tokens by visiting \"{instance_url_prefix}/settings/account/api/auth-tokens/\"", + "airbyte_secret": true + }, + "hostname": { + "type": "string", + "title": "Host Name", + "description": "Host name of Sentry API server.For self-hosted, specify your host name here. Otherwise, leave it empty.", + "default": "sentry.io" + }, + "organization": { + "type": "string", + "title": "Organization", + "description": "The slug of the organization the groups belong to." + }, + "project": { + "type": "string", + "title": "Project", + "description": "The slug of the project the groups belong to." + } + } + } +} diff --git a/airbyte-integrations/connectors/source-sentry/source_sentry/streams.py b/airbyte-integrations/connectors/source-sentry/source_sentry/streams.py new file mode 100644 index 0000000000000..66ab436156f8a --- /dev/null +++ b/airbyte-integrations/connectors/source-sentry/source_sentry/streams.py @@ -0,0 +1,158 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +from abc import ABC +from typing import Any, Iterable, Mapping, MutableMapping, Optional + +import requests +from airbyte_cdk.sources.streams.http import HttpStream + + +class SentryStream(HttpStream, ABC): + API_VERSION = "0" + URL_TEMPLATE = "https://{hostname}/api/{api_version}/" + primary_key = "id" + + def __init__(self, hostname: str, **kwargs): + super().__init__(**kwargs) + self._url_base = self.URL_TEMPLATE.format(hostname=hostname, api_version=self.API_VERSION) + + @property + def url_base(self) -> str: + return self._url_base + + def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: + return None + + def request_params( + self, + stream_state: Mapping[str, Any], + stream_slice: Optional[Mapping[str, Any]] = None, + next_page_token: Optional[Mapping[str, Any]] = None, + ) -> MutableMapping[str, Any]: + return {} + + +class SentryStreamPagination(SentryStream): + def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: + """ + Expect the link header field to always contain the values ​​for `rel`, `results`, and `cursor`. + If there is actually the next page, rel="next"; results="true"; cursor="". + """ + if response.links["next"]["results"] == "true": + return {"cursor": response.links["next"]["cursor"]} + else: + return None + + def request_params( + self, + stream_state: Mapping[str, Any], + stream_slice: Optional[Mapping[str, Any]] = None, + next_page_token: Optional[Mapping[str, Any]] = None, + ) -> MutableMapping[str, Any]: + params = super().request_params(stream_state, stream_slice, next_page_token) + if next_page_token: + params.update(next_page_token) + + return params + + def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: + yield from response.json() + + +class Events(SentryStreamPagination): + """ + Docs: https://docs.sentry.io/api/events/list-a-projects-events/ + """ + + def __init__(self, organization: str, project: str, **kwargs): + super().__init__(**kwargs) + self._organization = organization + self._project = project + + def path( + self, + stream_state: Optional[Mapping[str, Any]] = None, + stream_slice: Optional[Mapping[str, Any]] = None, + next_page_token: Optional[Mapping[str, Any]] = None, + ) -> str: + return f"projects/{self._organization}/{self._project}/events/" + + def request_params( + self, + stream_state: Mapping[str, Any], + stream_slice: Optional[Mapping[str, Any]] = None, + next_page_token: Optional[Mapping[str, Any]] = None, + ) -> MutableMapping[str, Any]: + params = super().request_params(stream_state, stream_slice, next_page_token) + params.update({"full": "true"}) + + return params + + +class Issues(SentryStreamPagination): + """ + Docs: https://docs.sentry.io/api/events/list-a-projects-issues/ + """ + + def __init__(self, organization: str, project: str, **kwargs): + super().__init__(**kwargs) + self._organization = organization + self._project = project + + def path( + self, + stream_state: Optional[Mapping[str, Any]] = None, + stream_slice: Optional[Mapping[str, Any]] = None, + next_page_token: Optional[Mapping[str, Any]] = None, + ) -> str: + return f"projects/{self._organization}/{self._project}/issues/" + + def request_params( + self, + stream_state: Mapping[str, Any], + stream_slice: Optional[Mapping[str, Any]] = None, + next_page_token: Optional[Mapping[str, Any]] = None, + ) -> MutableMapping[str, Any]: + params = super().request_params(stream_state, stream_slice, next_page_token) + params.update({"statsPeriod": "", "query": ""}) + + return params + + +class Projects(SentryStreamPagination): + """ + Docs: https://docs.sentry.io/api/projects/list-your-projects/ + """ + + def path( + self, + stream_state: Optional[Mapping[str, Any]] = None, + stream_slice: Optional[Mapping[str, Any]] = None, + next_page_token: Optional[Mapping[str, Any]] = None, + ) -> str: + return "projects/" + + +class ProjectDetail(SentryStream): + """ + Docs: https://docs.sentry.io/api/projects/retrieve-a-project/ + """ + + def __init__(self, organization: str, project: str, **kwargs): + super().__init__(**kwargs) + self._organization = organization + self._project = project + + def path( + self, + stream_state: Optional[Mapping[str, Any]] = None, + stream_slice: Optional[Mapping[str, Any]] = None, + next_page_token: Optional[Mapping[str, Any]] = None, + ) -> str: + return f"projects/{self._organization}/{self._project}/" + + def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: + yield response.json() diff --git a/airbyte-integrations/connectors/source-sentry/unit_tests/__init__.py b/airbyte-integrations/connectors/source-sentry/unit_tests/__init__.py new file mode 100644 index 0000000000000..46b7376756ec6 --- /dev/null +++ b/airbyte-integrations/connectors/source-sentry/unit_tests/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# diff --git a/airbyte-integrations/connectors/source-sentry/unit_tests/test_source.py b/airbyte-integrations/connectors/source-sentry/unit_tests/test_source.py new file mode 100644 index 0000000000000..03cc8a1144f24 --- /dev/null +++ b/airbyte-integrations/connectors/source-sentry/unit_tests/test_source.py @@ -0,0 +1,26 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + +from unittest.mock import MagicMock + +from source_sentry.source import SourceSentry +from source_sentry.streams import Projects + + +def test_check_connection(mocker): + source = SourceSentry() + logger_mock, config_mock = MagicMock(), MagicMock() + mocker.patch.object(Projects, "read_records", return_value=iter([{"id": "1", "name": "test"}])) + assert source.check_connection(logger_mock, config_mock) == (True, None) + + +def test_streams(mocker): + source = SourceSentry() + config_mock = MagicMock() + config_mock["auth_token"] = "test-token" + config_mock["organization"] = "test-organization" + config_mock["project"] = "test-project" + streams = source.streams(config_mock) + expected_streams_number = 4 + assert len(streams) == expected_streams_number diff --git a/airbyte-integrations/connectors/source-sentry/unit_tests/test_streams.py b/airbyte-integrations/connectors/source-sentry/unit_tests/test_streams.py new file mode 100644 index 0000000000000..2417953ac0400 --- /dev/null +++ b/airbyte-integrations/connectors/source-sentry/unit_tests/test_streams.py @@ -0,0 +1,110 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + +from unittest.mock import MagicMock + +import pytest +from source_sentry.streams import Events, Issues, ProjectDetail, Projects, SentryStreamPagination + +INIT_ARGS = {"hostname": "sentry.io", "organization": "test-org", "project": "test-project"} + + +@pytest.fixture +def patch_base_class(mocker): + # Mock abstract methods to enable instantiating abstract class + mocker.patch.object(SentryStreamPagination, "path", "test_endpoint") + mocker.patch.object(SentryStreamPagination, "__abstractmethods__", set()) + + +def test_next_page_token(patch_base_class): + stream = SentryStreamPagination(hostname="sentry.io") + resp = MagicMock() + cursor = "next_page_num" + resp.links = {"next": {"results": "true", "cursor": cursor}} + inputs = {"response": resp} + expected_token = {"cursor": cursor} + assert stream.next_page_token(**inputs) == expected_token + + +def test_next_page_token_is_none(patch_base_class): + stream = SentryStreamPagination(hostname="sentry.io") + resp = MagicMock() + resp.links = {"next": {"results": "false", "cursor": "no_next"}} + inputs = {"response": resp} + expected_token = None + assert stream.next_page_token(**inputs) == expected_token + + +def next_page_token_inputs(): + links_headers = [ + {}, + {"next": {}}, + ] + responses = [MagicMock() for _ in links_headers] + for mock, header in zip(responses, links_headers): + mock.links = header + + return responses + + +@pytest.mark.parametrize("response", next_page_token_inputs()) +def test_next_page_token_raises(patch_base_class, response): + stream = SentryStreamPagination(hostname="sentry.io") + inputs = {"response": response} + with pytest.raises(KeyError): + stream.next_page_token(**inputs) + + +def test_events_path(): + stream = Events(**INIT_ARGS) + expected = "projects/test-org/test-project/events/" + assert stream.path() == expected + + +def test_issues_path(): + stream = Issues(**INIT_ARGS) + expected = "projects/test-org/test-project/issues/" + assert stream.path() == expected + + +def test_projects_path(): + stream = Projects(hostname="sentry.io") + expected = "projects/" + assert stream.path() == expected + + +def test_project_detail_path(): + stream = ProjectDetail(**INIT_ARGS) + expected = "projects/test-org/test-project/" + assert stream.path() == expected + + +def test_sentry_stream_pagination_request_params(patch_base_class): + stream = SentryStreamPagination(hostname="sentry.io") + expected = {"cursor": "next-page"} + assert stream.request_params(stream_state=None, next_page_token={"cursor": "next-page"}) == expected + + +def test_events_request_params(): + stream = Events(**INIT_ARGS) + expected = {"cursor": "next-page", "full": "true"} + assert stream.request_params(stream_state=None, next_page_token={"cursor": "next-page"}) == expected + + +def test_issues_request_params(): + stream = Issues(**INIT_ARGS) + expected = {"cursor": "next-page", "statsPeriod": "", "query": ""} + assert stream.request_params(stream_state=None, next_page_token={"cursor": "next-page"}) == expected + + +def test_projects_request_params(): + stream = Projects(hostname="sentry.io") + expected = {"cursor": "next-page"} + assert stream.request_params(stream_state=None, next_page_token={"cursor": "next-page"}) == expected + + +def test_project_detail_request_params(): + stream = ProjectDetail(**INIT_ARGS) + expected = {} + assert stream.request_params(stream_state=None, next_page_token=None) == expected diff --git a/airbyte-integrations/connectors/source-shopify/Dockerfile b/airbyte-integrations/connectors/source-shopify/Dockerfile index b20a97b483400..ca1eaaccfbaf7 100644 --- a/airbyte-integrations/connectors/source-shopify/Dockerfile +++ b/airbyte-integrations/connectors/source-shopify/Dockerfile @@ -28,5 +28,5 @@ COPY source_shopify ./source_shopify ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.21 +LABEL io.airbyte.version=0.1.22 LABEL io.airbyte.name=airbyte/source-shopify diff --git a/airbyte-integrations/connectors/source-shopify/integration_tests/abnormal_state.json b/airbyte-integrations/connectors/source-shopify/integration_tests/abnormal_state.json index a103ca3dd890b..d4bcc46c5271f 100644 --- a/airbyte-integrations/connectors/source-shopify/integration_tests/abnormal_state.json +++ b/airbyte-integrations/connectors/source-shopify/integration_tests/abnormal_state.json @@ -46,5 +46,11 @@ }, "inventory_levels": { "updated_at": "2024-07-08T05:40:38-07:00" + }, + "fulfillment_orders": { + "id": 9991307599038 + }, + "fulfillments": { + "updated_at": "2024-07-08T05:40:38-07:00" } } diff --git a/airbyte-integrations/connectors/source-shopify/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/source-shopify/integration_tests/configured_catalog.json index c9ebbd2ca0db9..260bbe53a1a6b 100644 --- a/airbyte-integrations/connectors/source-shopify/integration_tests/configured_catalog.json +++ b/airbyte-integrations/connectors/source-shopify/integration_tests/configured_catalog.json @@ -191,6 +191,30 @@ "sync_mode": "incremental", "cursor_field": ["updated_at"], "destination_sync_mode": "append" + }, + { + "stream": { + "name": "fulfillment_orders", + "json_schema": {}, + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": true, + "default_cursor_field": ["id"] + }, + "sync_mode": "incremental", + "cursor_field": ["id"], + "destination_sync_mode": "append" + }, + { + "stream": { + "name": "fulfillments", + "json_schema": {}, + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": true, + "default_cursor_field": ["updated_at"] + }, + "sync_mode": "incremental", + "cursor_field": ["updated_at"], + "destination_sync_mode": "append" } ] } diff --git a/airbyte-integrations/connectors/source-shopify/integration_tests/state.json b/airbyte-integrations/connectors/source-shopify/integration_tests/state.json index 65ca1ccdfed26..03f98c522caa5 100644 --- a/airbyte-integrations/connectors/source-shopify/integration_tests/state.json +++ b/airbyte-integrations/connectors/source-shopify/integration_tests/state.json @@ -46,5 +46,11 @@ }, "inventory_levels": { "updated_at": "2021-09-10T06:48:10-07:00" + }, + "fulfillment_orders": { + "id": 123 + }, + "fulfillments": { + "updated_at": "2021-09-10T06:48:10-07:00" } } diff --git a/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/fulfillment_orders.json b/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/fulfillment_orders.json new file mode 100644 index 0000000000000..16987cbedd703 --- /dev/null +++ b/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/fulfillment_orders.json @@ -0,0 +1,180 @@ +{ + "type": "object", + "properties": { + "assigned_location_id": { + "type": ["null", "integer"] + }, + "destination": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "integer"] + }, + "address1": { + "type": ["null", "string"] + }, + "address2": { + "type": ["null", "string"] + }, + "city": { + "type": ["null", "string"] + }, + "company": { + "type": ["null", "string"] + }, + "country": { + "type": ["null", "string"] + }, + "email": { + "type": ["null", "string"] + }, + "first_name": { + "type": ["null", "string"] + }, + "last_name": { + "type": ["null", "string"] + }, + "phone": { + "type": ["null", "string"] + }, + "province": { + "type": ["null", "string"] + }, + "zip": { + "type": ["null", "string"] + } + } + }, + "delivery_method": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "integer"] + }, + "method_type": { + "type": ["null", "string"] + } + } + }, + "fulfilled_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "fulfillment_holds": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "reason": { + "type": ["null", "string"] + }, + "reason_notes": { + "type": ["null", "string"] + } + } + } + }, + "id": { + "type": ["null", "integer"] + }, + "line_items": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "integer"] + }, + "shop_id": { + "type": ["null", "integer"] + }, + "fullfillment_order_id": { + "type": ["null", "integer"] + }, + "line_item_id": { + "type": ["null", "integer"] + }, + "inventory_item_id": { + "type": ["null", "integer"] + }, + "quantity": { + "type": ["null", "integer"] + }, + "fulfillable_quantity": { + "type": ["null", "integer"] + }, + "variant_id": { + "type": ["null", "integer"] + } + } + } + }, + "order_id": { + "type": ["null", "integer"] + }, + "request_status": { + "type": ["null", "string"] + }, + "shop_id": { + "type": ["null", "integer"] + }, + "status": { + "type": ["null", "string"] + }, + "supported_actions": { + "type": ["null", "array"], + "items": { + "type": ["null", "string"] + } + }, + "merchant_requests": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "message": { + "type": ["null", "string"] + }, + "kind": { + "type": ["null", "string"] + }, + "request_options": { + "type": ["null", "object"] + } + } + } + }, + "assigned_location": { + "type": ["null", "object"], + "properties": { + "address1": { + "type": ["null", "string"] + }, + "address2": { + "type": ["null", "string"] + }, + "city": { + "type": ["null", "string"] + }, + "country_code": { + "type": ["null", "string"] + }, + "location_id": { + "type": ["null", "integer"] + }, + "name": { + "type": ["null", "string"] + }, + "phone": { + "type": ["null", "string"] + }, + "province": { + "type": ["null", "string"] + }, + "zip": { + "type": ["null", "string"] + } + } + } + } +} diff --git a/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/fulfillments.json b/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/fulfillments.json new file mode 100644 index 0000000000000..05cdc8b4be903 --- /dev/null +++ b/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/fulfillments.json @@ -0,0 +1,269 @@ +{ + "type": "object", + "properties": { + "created_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "id": { + "type": ["null", "integer"] + }, + "location_id": { + "type": ["null", "integer"] + }, + "name": { + "type": ["null", "string"] + }, + "notify_customer": { + "type": ["null", "boolean"] + }, + "order_id": { + "type": ["null", "integer"] + }, + "origin_address": { + "type": ["null", "object"], + "properties": { + "address1": { + "type": "string" + }, + "address2": { + "type": "string" + }, + "city": { + "type": "string" + }, + "country_code": { + "type": "string" + }, + "province_code": { + "type": "string" + }, + "zip": { + "type": "string" + } + } + }, + "receipt": { + "type": ["null", "object"], + "properties": { + "testcase": { + "type": ["null", "boolean"] + }, + "authorization": { + "type": ["null", "string"] + } + } + }, + "service": { + "type": ["null", "string"] + }, + "shipment_status": { + "type": ["null", "string"] + }, + "status": { + "type": ["null", "string"] + }, + "tracking_company": { + "type": ["null", "string"] + }, + "tracking_numbers": { + "type": ["null", "array"], + "items": { + "type": ["null", "string"] + } + }, + "tracking_urls": { + "type": ["null", "array"], + "items": { + "type": ["null", "string"] + } + }, + "updated_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "variant_inventory_management": { + "type": ["null", "string"] + }, + "line_items": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "integer"] + }, + "variant_id": { + "type": ["null", "integer"] + }, + "title": { + "type": ["null", "string"] + }, + "quantity": { + "type": ["null", "integer"] + }, + "price": { + "type": ["null", "string"] + }, + "grams": { + "type": ["null", "number"] + }, + "sku": { + "type": ["null", "string"] + }, + "variant_title": { + "type": ["null", "string"] + }, + "vendor": { + "type": ["null", "string"] + }, + "fulfillment_service": { + "type": ["null", "string"] + }, + "product_id": { + "type": ["null", "integer"] + }, + "requires_shipping": { + "type": ["null", "boolean"] + }, + "taxable": { + "type": ["null", "boolean"] + }, + "gift_card": { + "type": ["null", "boolean"] + }, + "name": { + "type": ["null", "string"] + }, + "variant_inventory_management": { + "type": ["null", "string"] + }, + "properties": { + "type": ["null", "array"], + "items": { + "type": ["null", "string"] + } + }, + "product_exists": { + "type": ["null", "boolean"] + }, + "fulfillable_quantity": { + "type": ["null", "integer"] + }, + "total_discount": { + "type": ["null", "string"] + }, + "fulfillment_status": { + "type": ["null", "string"] + }, + "fulfillment_line_item_id": { + "type": ["null", "integer"] + }, + "tax_lines": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "price": { + "type": ["null", "number"] + }, + "rate": { + "type": ["null", "number"] + }, + "title": { + "type": ["null", "string"] + } + } + } + } + } + } + }, + "duties": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "string"] + }, + "harmonized_system_code": { + "type": ["null", "string"] + }, + "country_code_of_origin": { + "type": ["null", "string"] + }, + "shop_money": { + "type": ["null", "object"], + "properties": { + "amount": { + "type": ["null", "string"] + }, + "currency_code": { + "type": ["null", "string"] + } + } + }, + "presentment_money": { + "type": ["null", "object"], + "properties": { + "amount": { + "type": ["null", "string"] + }, + "currency_code": { + "type": ["null", "string"] + } + } + }, + "tax_lines": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "title": { + "type": ["null", "string"] + }, + "price": { + "type": ["null", "string"] + }, + "rate": { + "type": ["null", "number"] + }, + "price_set": { + "type": ["null", "object"], + "properties": { + "shop_money": { + "type": ["null", "object"], + "properties": { + "amount": { + "type": ["null", "string"] + }, + "currency_code": { + "type": ["null", "string"] + } + } + }, + "presentment_money": { + "type": ["null", "object"], + "properties": { + "amount": { + "type": ["null", "string"] + }, + "currency_code": { + "type": ["null", "string"] + } + } + } + } + }, + "channel_liable": { + "type": ["null", "boolean"] + } + } + } + } + } + } + } + } +} diff --git a/airbyte-integrations/connectors/source-shopify/source_shopify/source.py b/airbyte-integrations/connectors/source-shopify/source_shopify/source.py index 17e348790ec6a..e011b74dc3567 100644 --- a/airbyte-integrations/connectors/source-shopify/source_shopify/source.py +++ b/airbyte-integrations/connectors/source-shopify/source_shopify/source.py @@ -365,6 +365,35 @@ def generate_key(record): ) +class FulfillmentOrders(ChildSubstream): + + parent_stream_class: object = Orders + slice_key = "order_id" + + data_field = "fulfillment_orders" + + cursor_field = "id" + + def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str: + order_id = stream_slice[self.slice_key] + return f"orders/{order_id}/{self.data_field}.json" + + def get_updated_state(self, current_stream_state: MutableMapping[str, Any], latest_record: Mapping[str, Any]) -> Mapping[str, Any]: + return {self.cursor_field: max(latest_record.get(self.cursor_field, 0), current_stream_state.get(self.cursor_field, 0))} + + +class Fulfillments(ChildSubstream): + + parent_stream_class: object = Orders + slice_key = "order_id" + + data_field = "fulfillments" + + def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str: + order_id = stream_slice[self.slice_key] + return f"orders/{order_id}/{self.data_field}.json" + + class SourceShopify(AbstractSource): def check_connection(self, logger: AirbyteLogger, config: Mapping[str, Any]) -> Tuple[bool, any]: @@ -407,4 +436,6 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: DiscountCodes(config), Locations(config), InventoryLevels(config), + FulfillmentOrders(config), + Fulfillments(config), ] diff --git a/airbyte-integrations/connectors/source-stripe/Dockerfile b/airbyte-integrations/connectors/source-stripe/Dockerfile index b6467d211d351..c9d18d752c733 100644 --- a/airbyte-integrations/connectors/source-stripe/Dockerfile +++ b/airbyte-integrations/connectors/source-stripe/Dockerfile @@ -12,5 +12,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.21 +LABEL io.airbyte.version=0.1.22 LABEL io.airbyte.name=airbyte/source-stripe diff --git a/airbyte-integrations/connectors/source-stripe/integration_tests/abnormal_state.json b/airbyte-integrations/connectors/source-stripe/integration_tests/abnormal_state.json index 1703284cdb4de..309cd7f38fe47 100644 --- a/airbyte-integrations/connectors/source-stripe/integration_tests/abnormal_state.json +++ b/airbyte-integrations/connectors/source-stripe/integration_tests/abnormal_state.json @@ -13,5 +13,6 @@ "disputes": { "created": 161099630500 }, "products": { "created": 158551134100 }, "refunds": { "created": 161959562900 }, - "payment_intents": { "created": 161959562900 } + "payment_intents": { "created": 161959562900 }, + "promotion_codes": { "created": 163534157100 } } diff --git a/airbyte-integrations/connectors/source-stripe/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/source-stripe/integration_tests/configured_catalog.json index bbe8a73f9fb2d..5043eebc4b4e6 100644 --- a/airbyte-integrations/connectors/source-stripe/integration_tests/configured_catalog.json +++ b/airbyte-integrations/connectors/source-stripe/integration_tests/configured_catalog.json @@ -36,6 +36,26 @@ "destination_sync_mode": "overwrite", "cursor_field": ["created"] }, + { + "stream": { + "name": "checkout_sessions", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "checkout_sessions_line_items", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, { "stream": { "name": "coupons", @@ -186,6 +206,19 @@ "destination_sync_mode": "overwrite", "cursor_field": ["created"] }, + { + "stream": { + "name": "promotion_codes", + "json_schema": {}, + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": true, + "default_cursor_field": ["created"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "incremental", + "destination_sync_mode": "overwrite", + "cursor_field": ["created"] + }, { "stream": { "name": "refunds", diff --git a/airbyte-integrations/connectors/source-stripe/integration_tests/full_refresh_configured_catalog.json b/airbyte-integrations/connectors/source-stripe/integration_tests/full_refresh_configured_catalog.json index e820bbce21c99..7c13c3b49d540 100644 --- a/airbyte-integrations/connectors/source-stripe/integration_tests/full_refresh_configured_catalog.json +++ b/airbyte-integrations/connectors/source-stripe/integration_tests/full_refresh_configured_catalog.json @@ -18,6 +18,26 @@ }, "sync_mode": "full_refresh", "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "checkout_sessions", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "checkout_sessions_line_items", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" } ] } diff --git a/airbyte-integrations/connectors/source-stripe/integration_tests/non_invoice_line_items_catalog.json b/airbyte-integrations/connectors/source-stripe/integration_tests/non_invoice_line_items_catalog.json index 020378c924509..1467d1f2242ed 100644 --- a/airbyte-integrations/connectors/source-stripe/integration_tests/non_invoice_line_items_catalog.json +++ b/airbyte-integrations/connectors/source-stripe/integration_tests/non_invoice_line_items_catalog.json @@ -104,6 +104,19 @@ "destination_sync_mode": "overwrite", "cursor_field": ["created"] }, + { + "stream": { + "name": "promotion_codes", + "json_schema": {}, + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": true, + "default_cursor_field": ["created"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "incremental", + "destination_sync_mode": "overwrite", + "cursor_field": ["created"] + }, { "stream": { "name": "refunds", diff --git a/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/checkout_sessions.json b/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/checkout_sessions.json new file mode 100644 index 0000000000000..18e90e928a952 --- /dev/null +++ b/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/checkout_sessions.json @@ -0,0 +1,226 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "type": ["null", "object"], + "properties": { + "id": { "type": ["null", "string"] }, + "object": { "type": ["null", "string"] }, + "after_expiration": { + "type": ["null", "object"], + "properties": { + "recovery": { + "type": ["null", "object"], + "properties": { + "allow_promotion_codes": { "type": ["null", "boolean"] }, + "enabled": { "type": ["null", "boolean"] }, + "expires_at": { "type": ["null", "integer"] }, + "url": { "type": ["null", "string"] } + } + } + } + }, + "allow_promotion_codes": { "type": ["null", "boolean"] }, + "amount_subtotal": { "type": ["null", "integer"] }, + "amount_total": { "type": ["null", "integer"] }, + "automatic_tax": { + "type": ["null", "object"], + "properties": { + "enabled": { "type": ["null", "boolean"] }, + "status": { "type": ["null", "string"] } + } + }, + "billing_address_collection": { "type": ["null", "string"] }, + "cancel_url": { "type": ["null", "string"] }, + "client_reference_id": { "type": ["null", "string"] }, + "consent": { + "type": ["null", "object"], + "properties": { + "promotions": { "type": ["null", "string"] } + } + }, + "consent_collection": { + "type": ["null", "object"], + "properties": { + "promotions": { "type": ["null", "string"] } + } + }, + "currency": { "type": ["null", "string"] }, + "customer": { "type": ["null", "string"] }, + "customer_details": { + "type": ["null", "object"], + "properties": { + "email": { "type": ["null", "string"] }, + "phone": { "type": ["null", "string"] }, + "tax_exempt": { "type": ["null", "string"] }, + "tax_ids": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "type": { "type": ["null", "string"] }, + "value": { "type": ["null", "string"] } + } + } + } + } + }, + "customer_email": { "type": ["null", "string"] }, + "expires_at": { "type": ["null", "integer"] }, + "livemode": { "type": ["null", "boolean"] }, + "locale": { "type": ["null", "string"] }, + "metadata": { + "type": ["null", "object"], + "properties": {} + }, + "mode": { "type": ["null", "string"] }, + "payment_intent": { "type": ["null", "string"] }, + "payment_method_options": { + "type": ["null", "object"], + "properties": { + "acss_debit": { + "type": ["null", "object"], + "properties": { + "currency": { "type": ["null", "string"] }, + "mandate_options": { + "type": ["null", "object"], + "properties": { + "custom_mandate_url": { "type": ["null", "string"] }, + "default_for": { + "type": ["null", "array"], + "items": { + "type": ["null", "string"] + } + }, + "interval_description": { "type": ["null", "string"] }, + "payment_schedule": { "type": ["null", "string"] }, + "transaction_type": { "type": ["null", "string"] } + } + }, + "verification_method": { "type": ["null", "string"] } + } + }, + "boleto": { + "type": ["null", "object"], + "properties": { + "expires_after_days": { "type": ["null", "integer"] } + } + }, + "oxxo": { + "type": ["null", "object"], + "properties": { + "expires_after_days": { "type": ["null", "integer"] } + } + } + } + }, + "payment_method_types": { + "type": ["null", "array"], + "items": { + "card": { "type": ["null", "string"] } + } + }, + "payment_status": { "type": ["null", "string"] }, + "phone_number_collection": { + "type": ["null", "object"], + "properties": { + "enabled": { "type": ["null", "boolean"] } + } + }, + "recovered_from": { "type": ["null", "string"] }, + "setup_intent": { "type": ["null", "string"] }, + "shipping": { + "type": ["null", "object"], + "properties": { + "address": { + "type": ["null", "object"], + "properties": { + "city": { "type": ["null", "string"] }, + "country": { "type": ["null", "string"] }, + "line1": { "type": ["null", "string"] }, + "line2": { "type": ["null", "string"] }, + "postal_code": { "type": ["null", "string"] }, + "state": { "type": ["null", "string"] } + } + }, + "name": { "type": ["null", "string"] } + } + }, + "shipping_address_collection": { + "type": ["null", "object"], + "properties": { + "allowed_countries": { + "type": ["null", "array"], + "items": { + "type": ["null", "string"] + } + } + } + }, + "submit_type": { "type": ["null", "string"] }, + "subscription": { "type": ["null", "string"] }, + "success_url": { "type": ["null", "string"] }, + "tax_id_collection": { + "type": ["null", "object"], + "properties": { + "enabled": { "type": ["null", "boolean"] } + } + }, + "total_details": { + "type": ["null", "object"], + "properties": { + "amount_discount": { "type": ["null", "integer"] }, + "amount_shipping": { "type": ["null", "integer"] }, + "amount_tax": { "type": ["null", "integer"] }, + "breakdown": { + "type": ["null", "object"], + "properties": { + "discounts": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "amount": { "type": ["null", "integer"] }, + "discount": { + "type": ["null", "object"], + "properties": {} + } + } + } + }, + "taxes": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "amount": { "type": ["null", "integer"] }, + "rate": { + "type": ["null", "object"], + "properties": { + "id": { "type": ["null", "string"] }, + "object": { "type": ["null", "string"] }, + "active": { "type": ["null", "boolean"] }, + "country": { "type": ["null", "string"] }, + "created": { "type": ["null", "integer"] }, + "description": { "type": ["null", "string"] }, + "display_name": { "type": ["null", "string"] }, + "inclusive": { "type": ["null", "boolean"] }, + "jurisdiction": { "type": ["null", "string"] }, + "livemode": { "type": ["null", "boolean"] }, + "metadata": { + "type": ["null", "object"], + "properties": {} + }, + "percentage": { "type": ["null", "number"] }, + "state": { "type": ["null", "string"] }, + "tax_type": { "type": ["null", "string"] } + } + } + } + } + } + } + } + } + }, + "url": { "type": ["null", "string"] } + } +} diff --git a/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/checkout_sessions_line_items.json b/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/checkout_sessions_line_items.json new file mode 100644 index 0000000000000..3440bcab5a376 --- /dev/null +++ b/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/checkout_sessions_line_items.json @@ -0,0 +1,151 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "type": ["null", "object"], + "properties": { + "id": { "type": ["null", "string"] }, + "checkout_session_id": { "type": ["null", "string"] }, + "object": { "type": ["null", "string"] }, + "amount_subtotal": { "type": ["null", "integer"] }, + "amount_total": { "type": ["null", "integer"] }, + "currency": { "type": ["null", "string"] }, + "description": { "type": ["null", "string"] }, + "discounts": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "amount": { "type": ["null", "integer"] }, + "discount": { + "type": ["null", "object"], + "properties": { + "id": { "type": ["null", "string"] }, + "coupon": { + "type": ["null", "object"], + "properties": { + "id": { "type": ["null", "string"] }, + "amount_off": { "type": ["null", "integer"] }, + "currency": { "type": ["null", "string"] }, + "duration": { "type": ["null", "string"] }, + "duration_in_months": { "type": ["null", "integer"] }, + "metadata": { + "type": ["null", "object"], + "properties": {} + }, + "name": { "type": ["null", "string"] }, + "percent_off": { "type": ["null", "number"] }, + "object": { "type": ["null", "string"] }, + "applies_to": { + "type": ["null", "object"], + "properties": { + "products": { + "type": ["null", "array"], + "items": { "type": ["null", "string"] } + } + } + }, + "created": { "type": ["null", "integer"] }, + "livemode": { "type": ["null", "boolean"] }, + "max_redemptions": { "type": ["null", "integer"] }, + "redeem_by": { "type": ["null", "integer"] }, + "times_redeemed": { "type": ["null", "integer"] }, + "valid": { "type": ["null", "boolean"] } + } + }, + "customer": { "type": ["null", "string"] }, + "end": { "type": ["null", "integer"] }, + "start": { "type": ["null", "integer"] }, + "subscription": { "type": ["null", "string"] }, + "object": { "type": ["null", "string"] }, + "checkout_session": { "type": ["null", "string"] }, + "invoice": { "type": ["null", "string"] }, + "invoice_item": { "type": ["null", "string"] }, + "promotion_code": { "type": ["null", "string"] } + } + } + } + } + }, + "price": { + "type": ["null", "object"], + "properties": { + "id": { "type": ["null", "string"] }, + "object": { "type": ["null", "string"] }, + "active": { "type": ["null", "boolean"] }, + "billing_scheme": { "type": ["null", "string"] }, + "created": { "type": ["null", "integer"] }, + "currency": { "type": ["null", "string"] }, + "livemode": { "type": ["null", "boolean"] }, + "lookup_key": { "type": ["null", "string"] }, + "metadata": { + "type": ["null", "object"], + "properties": {} + }, + "nickname": { "type": ["null", "string"] }, + "product": { "type": ["null", "string"] }, + "recurring": { + "type": ["null", "object"], + "properties": { + "aggregate_usage": { "type": ["null", "string"] }, + "interval": { "type": ["null", "string"] }, + "interval_count": { "type": ["null", "integer"] }, + "usage_type": { "type": ["null", "string"] } + } + }, + "tax_behavior": { "type": ["null", "string"] }, + "tiers": { + "type": ["null", "object"], + "properties": { + "flat_amount": { "type": ["null", "integer"] }, + "flat_amount_decimal": { "type": ["null", "string"] }, + "unit_amount": { "type": ["null", "integer"] }, + "unit_amount_decimal": { "type": ["null", "string"] }, + "up_to": { "type": ["null", "integer"] } + } + }, + "tiers_mode": { "type": ["null", "string"] }, + "transform_quantity": { + "type": ["null", "object"], + "properties": { + "divide_by": { "type": ["null", "integer"] }, + "round": { "type": ["null", "string"] } + } + }, + "type": { "type": ["null", "string"] }, + "unit_amount": { "type": ["null", "integer"] }, + "unit_amount_decimal": { "type": ["null", "string"] } + } + }, + "quantity": { "type": ["null", "integer"] }, + "taxes": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "amount": { "types": ["null", "integer"] }, + "rate": { + "type": ["null", "object"], + "properties": { + "id": { "type": ["null", "string"] }, + "object": { "type": ["null", "string"] }, + "active": { "type": ["null", "boolean"] }, + "country": { "type": ["null", "string"] }, + "created": { "type": ["null", "integer"] }, + "description": { "type": ["null", "string"] }, + "display_name": { "type": ["null", "string"] }, + "inclusive": { "type": ["null", "boolean"] }, + "jurisdiction": { "type": ["null", "string"] }, + "livemode": { "type": ["null", "boolean"] }, + "metadata": { + "type": ["null", "object"], + "properties": {} + }, + "percentage": { "type": ["null", "number"] }, + "state": { "type": ["null", "string"] }, + "tax_type": { "type": ["null", "string"] } + } + } + } + } + } + } +} diff --git a/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/promotion_codes.json b/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/promotion_codes.json new file mode 100644 index 0000000000000..030254e5a0ab1 --- /dev/null +++ b/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/promotion_codes.json @@ -0,0 +1,59 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "type": ["null", "object"], + "properties": { + "id": { "type": ["null", "string"] }, + "code": { "type": ["null", "string"] }, + "coupon": { + "type": ["null", "object"], + "properties": { + "id": { "type": ["null", "string"] }, + "amount_off": { "type": ["null", "integer"] }, + "currency": { "type": ["null", "string"] }, + "duration": { "type": ["null", "string"] }, + "duration_in_months": { "type": ["null", "integer"] }, + "metadata": { + "type": ["null", "object"], + "properties": {} + }, + "name": { "type": ["null", "string"] }, + "percent_off": { "type": ["null", "number"] }, + "object": { "type": ["null", "string"] }, + "applies_to": { + "type": ["null", "object"], + "properties": { + "products": { + "type": ["null", "array"], + "items": { "type": ["null", "string"] } + } + } + }, + "created": { "type": ["null", "integer"] }, + "livemode": { "type": ["null", "boolean"] }, + "max_redemptions": { "type": ["null", "integer"] }, + "redeem_by": { "type": ["null", "integer"] }, + "times_redeemed": { "type": ["null", "integer"] }, + "valid": { "type": ["null", "boolean"] } + } + }, + "metadata": { + "type": ["null", "object"], + "properties": {} + }, + "object": { "type": ["null", "string"] }, + "active": { "type": ["null", "boolean"] }, + "created": { "type": ["null", "integer"] }, + "customer": { "type": ["null", "string"] }, + "expires_at": { "type": ["null", "integer"] }, + "livemode": { "type": ["null", "boolean"] }, + "max_redemptions": { "type": ["null", "integer"] }, + "restrictions": { + "type": ["null", "object"], + "properties": { + "first_time_transaction": { "type": ["null", "boolean"] }, + "minimum_amount": { "type": ["null", "integer"] }, + "minimum_amount_currency": { "type": ["null", "string"] } + } + } + } +} diff --git a/airbyte-integrations/connectors/source-stripe/source_stripe/source.py b/airbyte-integrations/connectors/source-stripe/source_stripe/source.py index 3063b3ae52d57..6ca0a21320dba 100644 --- a/airbyte-integrations/connectors/source-stripe/source_stripe/source.py +++ b/airbyte-integrations/connectors/source-stripe/source_stripe/source.py @@ -15,6 +15,8 @@ BalanceTransactions, BankAccounts, Charges, + CheckoutSessions, + CheckoutSessionsLineItems, Coupons, CustomerBalanceTransactions, Customers, @@ -27,6 +29,7 @@ Payouts, Plans, Products, + PromotionCodes, Refunds, SubscriptionItems, Subscriptions, @@ -52,6 +55,8 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: BalanceTransactions(**incremental_args), BankAccounts(**args), Charges(**incremental_args), + CheckoutSessions(**args), + CheckoutSessionsLineItems(**args), Coupons(**incremental_args), CustomerBalanceTransactions(**args), Customers(**incremental_args), @@ -64,6 +69,7 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: Payouts(**incremental_args), Plans(**incremental_args), Products(**incremental_args), + PromotionCodes(**incremental_args), Refunds(**incremental_args), SubscriptionItems(**args), Subscriptions(**incremental_args), diff --git a/airbyte-integrations/connectors/source-stripe/source_stripe/streams.py b/airbyte-integrations/connectors/source-stripe/source_stripe/streams.py index 472c8533ef5d6..febf99b61a424 100644 --- a/airbyte-integrations/connectors/source-stripe/source_stripe/streams.py +++ b/airbyte-integrations/connectors/source-stripe/source_stripe/streams.py @@ -2,7 +2,6 @@ # Copyright (c) 2021 Airbyte, Inc., all rights reserved. # - import math from abc import ABC, abstractmethod from typing import Any, Iterable, Mapping, MutableMapping, Optional @@ -348,3 +347,56 @@ def read_records(self, stream_slice: Optional[Mapping[str, Any]] = None, **kwarg customers_stream = Customers(authenticator=self.authenticator, account_id=self.account_id, start_date=self.start_date) for customer in customers_stream.read_records(sync_mode=SyncMode.full_refresh): yield from super().read_records(stream_slice={"customer_id": customer["id"]}, **kwargs) + + +class CheckoutSessions(StripeStream): + """ + API docs: https://stripe.com/docs/api/checkout/sessions/list + """ + + name = "checkout_sessions" + + def path(self, **kwargs): + return "checkout/sessions" + + +class CheckoutSessionsLineItems(StripeStream): + """ + API docs: https://stripe.com/docs/api/checkout/sessions/line_items + """ + + name = "checkout_sessions_line_items" + + def path(self, stream_slice: Mapping[str, Any] = None, **kwargs): + return f"checkout/sessions/{stream_slice['checkout_session_id']}/line_items" + + def read_records(self, stream_slice: Optional[Mapping[str, Any]] = None, **kwargs) -> Iterable[Mapping[str, Any]]: + checkout_session_stream = CheckoutSessions(authenticator=self.authenticator, account_id=self.account_id, start_date=self.start_date) + for checkout_session in checkout_session_stream.read_records(sync_mode=SyncMode.full_refresh): + yield from super().read_records(stream_slice={"checkout_session_id": checkout_session["id"]}, **kwargs) + + def request_params(self, stream_slice: Mapping[str, Any] = None, **kwargs): + params = super().request_params(stream_slice=stream_slice, **kwargs) + params["expand[]"] = "data.discounts" + return params + + def parse_response(self, response: requests.Response, stream_slice: Mapping[str, Any] = None, **kwargs) -> Iterable[Mapping]: + + response_json = response.json() + data = response_json.get("data", []) + if data and stream_slice: + cs_id = stream_slice.get("checkout_session_id", None) + for e in data: + e["checkout_session_id"] = cs_id + yield from data + + +class PromotionCodes(IncrementalStripeStream): + """ + API docs: https://stripe.com/docs/api/promotion_codes/list + """ + + cursor_field = "created" + + def path(self, **kwargs): + return "promotion_codes" diff --git a/airbyte-integrations/connectors/source-zendesk-support/Dockerfile b/airbyte-integrations/connectors/source-zendesk-support/Dockerfile index f44e3e602d74d..42475d2905f52 100644 --- a/airbyte-integrations/connectors/source-zendesk-support/Dockerfile +++ b/airbyte-integrations/connectors/source-zendesk-support/Dockerfile @@ -25,5 +25,5 @@ COPY source_zendesk_support ./source_zendesk_support ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.3 +LABEL io.airbyte.version=0.1.4 LABEL io.airbyte.name=airbyte/source-zendesk-support diff --git a/airbyte-integrations/connectors/source-zendesk-support/source_zendesk_support/schemas/ticket_metrics.json b/airbyte-integrations/connectors/source-zendesk-support/source_zendesk_support/schemas/ticket_metrics.json index 454ab85dffc8a..a139c863d2b91 100644 --- a/airbyte-integrations/connectors/source-zendesk-support/source_zendesk_support/schemas/ticket_metrics.json +++ b/airbyte-integrations/connectors/source-zendesk-support/source_zendesk_support/schemas/ticket_metrics.json @@ -132,7 +132,7 @@ }, "initially_assigned_at": { "type": ["null", "string"], - "format": "datetime" + "format": "date-time" }, "assigned_at": { "type": ["null", "string"], diff --git a/airbyte-json-validation/src/main/java/io/airbyte/validation/json/JsonSchemaValidator.java b/airbyte-json-validation/src/main/java/io/airbyte/validation/json/JsonSchemaValidator.java index 420e60bd1227f..5aad98731a67b 100644 --- a/airbyte-json-validation/src/main/java/io/airbyte/validation/json/JsonSchemaValidator.java +++ b/airbyte-json-validation/src/main/java/io/airbyte/validation/json/JsonSchemaValidator.java @@ -66,10 +66,9 @@ public void ensure(final JsonNode schemaJson, final JsonNode objectJson) throws } throw new JsonValidationException(String.format( - "json schema validation failed. \nerrors: %s \nschema: \n%s \nobject: \n%s", + "json schema validation failed when comparing the data to the json schema. \nErrors: %s \nSchema: \n%s", Strings.join(validationMessages, ", "), - schemaJson.toPrettyString(), - objectJson.toPrettyString())); + schemaJson.toPrettyString())); } public void ensureAsRuntime(final JsonNode schemaJson, final JsonNode objectJson) { diff --git a/airbyte-migration/src/main/java/io/airbyte/migrate/migrations/MigrationV0_14_3.java b/airbyte-migration/src/main/java/io/airbyte/migrate/migrations/MigrationV0_14_3.java index b13080542e103..38bb4224f4882 100644 --- a/airbyte-migration/src/main/java/io/airbyte/migrate/migrations/MigrationV0_14_3.java +++ b/airbyte-migration/src/main/java/io/airbyte/migrate/migrations/MigrationV0_14_3.java @@ -23,7 +23,7 @@ import java.util.stream.Stream; /** - * This migration fixes a mistake. We should have done a minor version bump from 0.14.2 => 0.14.3 + * This migration fixes a mistake. We should have done a minor version bump from 0.14.2 to 0.14.3 * but we did not. This migration cleans up any problems that might have arisen from that. Then we * will do another migration to 0.15 forcing everyone to migrate (guaranteeing they hit this one) * and getting into a good state. The only change here is that instead of using StandardDataSchema diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/BaseOAuthFlow.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/BaseOAuthFlow.java index 02a8b8af83027..7e03902ce0788 100644 --- a/airbyte-oauth/src/main/java/io/airbyte/oauth/BaseOAuthFlow.java +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/BaseOAuthFlow.java @@ -14,10 +14,8 @@ import java.io.IOException; import java.lang.reflect.Type; import java.net.URI; -import java.net.URISyntaxException; import java.net.URLEncoder; import java.net.http.HttpClient; -import java.net.http.HttpClient.Version; import java.net.http.HttpRequest; import java.net.http.HttpResponse; import java.nio.charset.StandardCharsets; @@ -27,13 +25,16 @@ import java.util.function.Function; import java.util.function.Supplier; import org.apache.commons.lang3.RandomStringUtils; -import org.apache.http.client.utils.URIBuilder; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /* * Class implementing generic oAuth 2.0 flow. */ public abstract class BaseOAuthFlow extends BaseOAuthConfig { + private static final Logger LOGGER = LoggerFactory.getLogger(BaseOAuthFlow.class); + /** * Simple enum of content type strings and their respective encoding functions used for POSTing the * access token request @@ -53,19 +54,12 @@ public enum TOKEN_REQUEST_CONTENT_TYPE { } - protected final HttpClient httpClient; private final TOKEN_REQUEST_CONTENT_TYPE tokenReqContentType; + protected HttpClient httpClient; private final Supplier stateSupplier; - public BaseOAuthFlow(final ConfigRepository configRepository) { - this(configRepository, HttpClient.newBuilder().version(Version.HTTP_1_1).build(), BaseOAuthFlow::generateRandomState); - } - - public BaseOAuthFlow(ConfigRepository configRepository, TOKEN_REQUEST_CONTENT_TYPE tokenReqContentType) { - this(configRepository, - HttpClient.newBuilder().version(Version.HTTP_1_1).build(), - BaseOAuthFlow::generateRandomState, - tokenReqContentType); + public BaseOAuthFlow(final ConfigRepository configRepository, HttpClient httpClient) { + this(configRepository, httpClient, BaseOAuthFlow::generateRandomState); } public BaseOAuthFlow(ConfigRepository configRepository, HttpClient httpClient, Supplier stateSupplier) { @@ -96,31 +90,6 @@ public String getDestinationConsentUrl(final UUID workspaceId, final UUID destin return formatConsentUrl(destinationDefinitionId, getClientIdUnsafe(oAuthParamConfig), redirectUrl); } - protected String formatConsentUrl(String clientId, - String redirectUrl, - String host, - String path, - String scope, - String responseType) - throws IOException { - final URIBuilder builder = new URIBuilder() - .setScheme("https") - .setHost(host) - .setPath(path) - // required - .addParameter("client_id", clientId) - .addParameter("redirect_uri", redirectUrl) - .addParameter("state", getState()) - // optional - .addParameter("response_type", responseType) - .addParameter("scope", scope); - try { - return builder.build().toString(); - } catch (URISyntaxException e) { - throw new IOException("Failed to format Consent URL for OAuth flow", e); - } - } - /** * Depending on the OAuth flow implementation, the URL to grant user's consent may differ, * especially in the query parameters to be provided. This function should generate such consent URL @@ -183,9 +152,9 @@ protected Map completeOAuthFlow(final String clientId, .header("Content-Type", tokenReqContentType.contentType) .header("Accept", "application/json") .build(); - // TODO: Handle error response to report better messages try { - final HttpResponse response = httpClient.send(request, HttpResponse.BodyHandlers.ofString()); + HttpResponse response; + response = httpClient.send(request, HttpResponse.BodyHandlers.ofString()); return extractRefreshToken(Jsons.deserialize(response.body()), accessTokenUrl); } catch (final InterruptedException e) { throw new IOException("Failed to complete OAuth flow", e); @@ -230,7 +199,9 @@ protected Map extractRefreshToken(final JsonNode data, String ac } else if (data.has("access_token")) { result.put("access_token", data.get("access_token").asText()); } else { - throw new IOException(String.format("Missing 'refresh_token' in query params from %s", accessTokenUrl)); + LOGGER.info("Oauth flow failed. Data received from server: {}", data); + throw new IOException(String.format("Missing 'refresh_token' in query params from %s. Response: %s", accessTokenUrl)); + } return Map.of("credentials", result); @@ -257,7 +228,9 @@ private static String toUrlEncodedString(final Map body) { protected static String toJson(final Map body) { final Gson gson = new Gson(); - Type gsonType = new TypeToken>() {}.getType(); + Type gsonType = new TypeToken>() { + + }.getType(); return gson.toJson(body, gsonType); } diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/OAuthImplementationFactory.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/OAuthImplementationFactory.java index f9f450c286d53..1f269911d46bc 100644 --- a/airbyte-oauth/src/main/java/io/airbyte/oauth/OAuthImplementationFactory.java +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/OAuthImplementationFactory.java @@ -6,11 +6,7 @@ import com.google.common.collect.ImmutableMap; import io.airbyte.config.persistence.ConfigRepository; -import io.airbyte.oauth.flows.AsanaOAuthFlow; -import io.airbyte.oauth.flows.GithubOAuthFlow; -import io.airbyte.oauth.flows.SalesforceOAuthFlow; -import io.airbyte.oauth.flows.SurveymonkeyOAuthFlow; -import io.airbyte.oauth.flows.TrelloOAuthFlow; +import io.airbyte.oauth.flows.*; import io.airbyte.oauth.flows.facebook.FacebookMarketingOAuthFlow; import io.airbyte.oauth.flows.facebook.FacebookPagesOAuthFlow; import io.airbyte.oauth.flows.facebook.InstagramOAuthFlow; @@ -18,6 +14,7 @@ import io.airbyte.oauth.flows.google.GoogleAnalyticsOAuthFlow; import io.airbyte.oauth.flows.google.GoogleSearchConsoleOAuthFlow; import io.airbyte.oauth.flows.google.GoogleSheetsOAuthFlow; +import java.net.http.HttpClient; import java.util.Map; import java.util.UUID; @@ -25,20 +22,21 @@ public class OAuthImplementationFactory { private final Map OAUTH_FLOW_MAPPING; - public OAuthImplementationFactory(final ConfigRepository configRepository) { + public OAuthImplementationFactory(final ConfigRepository configRepository, final HttpClient httpClient) { OAUTH_FLOW_MAPPING = ImmutableMap.builder() - .put("airbyte/source-asana", new AsanaOAuthFlow(configRepository)) - .put("airbyte/source-facebook-marketing", new FacebookMarketingOAuthFlow(configRepository)) - .put("airbyte/source-facebook-pages", new FacebookPagesOAuthFlow(configRepository)) - .put("airbyte/source-github", new GithubOAuthFlow(configRepository)) - .put("airbyte/source-google-ads", new GoogleAdsOAuthFlow(configRepository)) - .put("airbyte/source-google-analytics-v4", new GoogleAnalyticsOAuthFlow(configRepository)) - .put("airbyte/source-google-search-console", new GoogleSearchConsoleOAuthFlow(configRepository)) - .put("airbyte/source-google-sheets", new GoogleSheetsOAuthFlow(configRepository)) - .put("airbyte/source-instagram", new InstagramOAuthFlow(configRepository)) - .put("airbyte/source-salesforce", new SalesforceOAuthFlow(configRepository)) - .put("airbyte/source-surveymonkey", new SurveymonkeyOAuthFlow(configRepository)) - .put("airbyte/source-trello", new TrelloOAuthFlow(configRepository)) + .put("airbyte/source-asana", new AsanaOAuthFlow(configRepository, httpClient)) + .put("airbyte/source-facebook-marketing", new FacebookMarketingOAuthFlow(configRepository, httpClient)) + .put("airbyte/source-facebook-pages", new FacebookPagesOAuthFlow(configRepository, httpClient)) + .put("airbyte/source-github", new GithubOAuthFlow(configRepository, httpClient)) + .put("airbyte/source-google-ads", new GoogleAdsOAuthFlow(configRepository, httpClient)) + .put("airbyte/source-google-analytics-v4", new GoogleAnalyticsOAuthFlow(configRepository, httpClient)) + .put("airbyte/source-google-search-console", new GoogleSearchConsoleOAuthFlow(configRepository, httpClient)) + .put("airbyte/source-google-sheets", new GoogleSheetsOAuthFlow(configRepository, httpClient)) + .put("airbyte/source-instagram", new InstagramOAuthFlow(configRepository, httpClient)) + .put("airbyte/source-salesforce", new SalesforceOAuthFlow(configRepository, httpClient)) + .put("airbyte/source-surveymonkey", new SurveymonkeyOAuthFlow(configRepository, httpClient)) + .put("airbyte/source-trello", new TrelloOAuthFlow(configRepository, httpClient)) + .put("airbyte/source-hubspot", new HubspotOAuthFlow(configRepository, httpClient)) .build(); } diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/AsanaOAuthFlow.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/AsanaOAuthFlow.java index 9e273b82b133d..c6a2ec9273c6d 100644 --- a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/AsanaOAuthFlow.java +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/AsanaOAuthFlow.java @@ -4,7 +4,9 @@ package io.airbyte.oauth.flows; +import com.fasterxml.jackson.databind.JsonNode; import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableMap; import io.airbyte.config.persistence.ConfigRepository; import io.airbyte.oauth.BaseOAuthFlow; @@ -24,8 +26,8 @@ public class AsanaOAuthFlow extends BaseOAuthFlow { private static final String AUTHORIZE_URL = "https://app.asana.com/-/oauth_authorize"; private static final String ACCESS_TOKEN_URL = "https://app.asana.com/-/oauth_token"; - public AsanaOAuthFlow(ConfigRepository configRepository) { - super(configRepository); + public AsanaOAuthFlow(ConfigRepository configRepository, HttpClient httpClient) { + super(configRepository, httpClient); } @VisibleForTesting @@ -60,4 +62,18 @@ protected Map getAccessTokenQueryParameters(String clientId, Str .build(); } + @Override + protected String getClientIdUnsafe(final JsonNode config) { + // the config object containing client ID and secret is nested inside the "credentials" object + Preconditions.checkArgument(config.hasNonNull("credentials")); + return super.getClientIdUnsafe(config.get("credentials")); + } + + @Override + protected String getClientSecretUnsafe(final JsonNode config) { + // the config object containing client ID and secret is nested inside the "credentials" object + Preconditions.checkArgument(config.hasNonNull("credentials")); + return super.getClientSecretUnsafe(config.get("credentials")); + } + } diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/GithubOAuthFlow.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/GithubOAuthFlow.java index bb2e14ac11f6c..4b6f4932ea209 100644 --- a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/GithubOAuthFlow.java +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/GithubOAuthFlow.java @@ -26,8 +26,8 @@ public class GithubOAuthFlow extends BaseOAuthFlow { private static final String AUTHORIZE_URL = "https://github.com/login/oauth/authorize"; private static final String ACCESS_TOKEN_URL = "https://github.com/login/oauth/access_token"; - public GithubOAuthFlow(final ConfigRepository configRepository) { - super(configRepository); + public GithubOAuthFlow(final ConfigRepository configRepository, HttpClient httpClient) { + super(configRepository, httpClient); } @VisibleForTesting diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/HubspotOAuthFlow.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/HubspotOAuthFlow.java new file mode 100644 index 0000000000000..45139504d2816 --- /dev/null +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/HubspotOAuthFlow.java @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.oauth.flows; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableMap; +import io.airbyte.config.persistence.ConfigRepository; +import io.airbyte.oauth.BaseOAuthFlow; +import java.io.IOException; +import java.net.URISyntaxException; +import java.net.http.HttpClient; +import java.util.Map; +import java.util.UUID; +import java.util.function.Supplier; +import org.apache.http.client.utils.URIBuilder; + +public class HubspotOAuthFlow extends BaseOAuthFlow { + + private final String AUTHORIZE_URL = "https://app.hubspot.com/oauth/authorize"; + + public HubspotOAuthFlow(ConfigRepository configRepository, HttpClient httpClient) { + super(configRepository, httpClient); + } + + public HubspotOAuthFlow(ConfigRepository configRepository, HttpClient httpClient, Supplier stateSupplier) { + super(configRepository, httpClient, stateSupplier, TOKEN_REQUEST_CONTENT_TYPE.JSON); + } + + /** + * Depending on the OAuth flow implementation, the URL to grant user's consent may differ, + * especially in the query parameters to be provided. This function should generate such consent URL + * accordingly. + * + * @param definitionId The configured definition ID of this client + * @param clientId The configured client ID + * @param redirectUrl the redirect URL + */ + @Override + protected String formatConsentUrl(UUID definitionId, String clientId, String redirectUrl) throws IOException { + try { + return new URIBuilder(AUTHORIZE_URL) + .addParameter("client_id", clientId) + .addParameter("redirect_uri", redirectUrl) + .addParameter("state", getState()) + .addParameter("scopes", getScopes()) + .build().toString(); + } catch (URISyntaxException e) { + throw new IOException("Failed to format Consent URL for OAuth flow", e); + } + } + + @Override + protected Map getAccessTokenQueryParameters(String clientId, String clientSecret, String authCode, String redirectUrl) { + return ImmutableMap.builder() + // required + .put("client_id", clientId) + .put("redirect_uri", redirectUrl) + .put("client_secret", clientSecret) + .put("code", authCode) + .put("grant_type", "authorization_code") + .build(); + } + + private String getScopes() { + return String.join(" ", "content", + "crm.schemas.deals.read", + "crm.objects.owners.read", + "forms", + "tickets", + "e-commerce", + "crm.objects.companies.read", + "crm.lists.read", + "crm.objects.deals.read", + "crm.schemas.contacts.read", + "crm.objects.contacts.read", + "crm.schemas.companies.read", + "files", + "forms-uploaded-files", + "files.ui_hidden.read"); + } + + /** + * Returns the URL where to retrieve the access token from. + * + * @param oAuthParamConfig the configuration map + */ + @Override + protected String getAccessTokenUrl() { + return "https://api.hubapi.com/oauth/v1/token"; + } + + @Override + protected String getClientIdUnsafe(final JsonNode config) { + // the config object containing client ID and secret is nested inside the "credentials" object + Preconditions.checkArgument(config.hasNonNull("credentials")); + return super.getClientIdUnsafe(config.get("credentials")); + } + + @Override + protected String getClientSecretUnsafe(final JsonNode config) { + // the config object containing client ID and secret is nested inside the "credentials" object + Preconditions.checkArgument(config.hasNonNull("credentials")); + return super.getClientSecretUnsafe(config.get("credentials")); + } + +} diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/SalesforceOAuthFlow.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/SalesforceOAuthFlow.java index 22fdb6dbd7912..ea1fd7c154a72 100644 --- a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/SalesforceOAuthFlow.java +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/SalesforceOAuthFlow.java @@ -20,24 +20,26 @@ /** * Following docs from - * https://help.salesforce.com/s/articleView?language=en_US&id=sf.remoteaccess_oauth_web_server_flow.htm + * https://help.salesforce.com/s/articleView?language=en_US&id=sf.remoteaccess_oauth_web_server_flow.htm */ public class SalesforceOAuthFlow extends BaseOAuthFlow { + // Clickable link for IDE + // https://help.salesforce.com/s/articleView?language=en_US&id=sf.remoteaccess_oauth_web_server_flow.htm private static final String AUTHORIZE_URL = "https://login.salesforce.com/services/oauth2/authorize"; private static final String ACCESS_TOKEN_URL = "https://login.salesforce.com/services/oauth2/token"; - public SalesforceOAuthFlow(ConfigRepository configRepository) { - super(configRepository); + public SalesforceOAuthFlow(final ConfigRepository configRepository, HttpClient httpClient) { + super(configRepository, httpClient); } @VisibleForTesting - SalesforceOAuthFlow(ConfigRepository configRepository, HttpClient httpClient, Supplier stateSupplier) { + SalesforceOAuthFlow(final ConfigRepository configRepository, final HttpClient httpClient, final Supplier stateSupplier) { super(configRepository, httpClient, stateSupplier); } @Override - protected String formatConsentUrl(UUID definitionId, String clientId, String redirectUrl) throws IOException { + protected String formatConsentUrl(final UUID definitionId, final String clientId, final String redirectUrl) throws IOException { try { return new URIBuilder(AUTHORIZE_URL) .addParameter("client_id", clientId) @@ -45,7 +47,7 @@ protected String formatConsentUrl(UUID definitionId, String clientId, String red .addParameter("response_type", "code") .addParameter("state", getState()) .build().toString(); - } catch (URISyntaxException e) { + } catch (final URISyntaxException e) { throw new IOException("Failed to format Consent URL for OAuth flow", e); } } @@ -56,7 +58,10 @@ protected String getAccessTokenUrl() { } @Override - protected Map getAccessTokenQueryParameters(String clientId, String clientSecret, String authCode, String redirectUrl) { + protected Map getAccessTokenQueryParameters(final String clientId, + final String clientSecret, + final String authCode, + final String redirectUrl) { return ImmutableMap.builder() .putAll(super.getAccessTokenQueryParameters(clientId, clientSecret, authCode, redirectUrl)) .put("grant_type", "authorization_code") @@ -64,7 +69,7 @@ protected Map getAccessTokenQueryParameters(String clientId, Str } @Override - protected Map extractRefreshToken(JsonNode data, String accessTokenUrl) throws IOException { + protected Map extractRefreshToken(final JsonNode data, final String accessTokenUrl) throws IOException { System.out.println(Jsons.serialize(data)); if (data.has("refresh_token")) { final String refreshToken = data.get("refresh_token").asText(); diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/SurveymonkeyOAuthFlow.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/SurveymonkeyOAuthFlow.java index 9cb40ef924928..9bf22e762a218 100644 --- a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/SurveymonkeyOAuthFlow.java +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/SurveymonkeyOAuthFlow.java @@ -26,8 +26,8 @@ public class SurveymonkeyOAuthFlow extends BaseOAuthFlow { private static final String AUTHORIZE_URL = "https://api.surveymonkey.com/oauth/authorize"; private static final String ACCESS_TOKEN_URL = "https://api.surveymonkey.com/oauth/token"; - public SurveymonkeyOAuthFlow(ConfigRepository configRepository) { - super(configRepository); + public SurveymonkeyOAuthFlow(ConfigRepository configRepository, HttpClient httpClient) { + super(configRepository, httpClient); } @VisibleForTesting diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/TrelloOAuthFlow.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/TrelloOAuthFlow.java index 1273f46e23414..81b5db8d559b3 100644 --- a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/TrelloOAuthFlow.java +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/TrelloOAuthFlow.java @@ -17,6 +17,7 @@ import io.airbyte.config.persistence.ConfigRepository; import io.airbyte.oauth.BaseOAuthConfig; import java.io.IOException; +import java.net.http.HttpClient; import java.util.Map; import java.util.UUID; @@ -38,7 +39,7 @@ public class TrelloOAuthFlow extends BaseOAuthConfig { private static final OAuthHmacSigner signer = new OAuthHmacSigner(); private final HttpTransport transport; - public TrelloOAuthFlow(final ConfigRepository configRepository) { + public TrelloOAuthFlow(final ConfigRepository configRepository, HttpClient httpClient) { super(configRepository); transport = new NetHttpTransport(); } diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/facebook/FacebookMarketingOAuthFlow.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/facebook/FacebookMarketingOAuthFlow.java index 0fe9832caa4e6..afe38d7f54037 100644 --- a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/facebook/FacebookMarketingOAuthFlow.java +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/facebook/FacebookMarketingOAuthFlow.java @@ -13,8 +13,8 @@ public class FacebookMarketingOAuthFlow extends FacebookOAuthFlow { private static final String SCOPES = "ads_management,ads_read,read_insights"; - public FacebookMarketingOAuthFlow(final ConfigRepository configRepository) { - super(configRepository); + public FacebookMarketingOAuthFlow(final ConfigRepository configRepository, HttpClient httpClient) { + super(configRepository, httpClient); } @VisibleForTesting diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/facebook/FacebookOAuthFlow.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/facebook/FacebookOAuthFlow.java index d1520cb1eabf4..08e5feeb64381 100644 --- a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/facebook/FacebookOAuthFlow.java +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/facebook/FacebookOAuthFlow.java @@ -30,8 +30,8 @@ public abstract class FacebookOAuthFlow extends BaseOAuthFlow { private static final String ACCESS_TOKEN_URL = "https://graph.facebook.com/v12.0/oauth/access_token"; private static final String AUTH_CODE_TOKEN_URL = "https://www.facebook.com/v12.0/dialog/oauth"; - public FacebookOAuthFlow(final ConfigRepository configRepository) { - super(configRepository); + public FacebookOAuthFlow(final ConfigRepository configRepository, HttpClient httpClient) { + super(configRepository, httpClient); } @VisibleForTesting diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/facebook/FacebookPagesOAuthFlow.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/facebook/FacebookPagesOAuthFlow.java index 1f5e596a18b56..8f8b832ae76e1 100644 --- a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/facebook/FacebookPagesOAuthFlow.java +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/facebook/FacebookPagesOAuthFlow.java @@ -5,13 +5,14 @@ package io.airbyte.oauth.flows.facebook; import io.airbyte.config.persistence.ConfigRepository; +import java.net.http.HttpClient; public class FacebookPagesOAuthFlow extends FacebookOAuthFlow { private static final String SCOPES = "pages_manage_ads,pages_manage_metadata,pages_read_engagement,pages_read_user_content"; - public FacebookPagesOAuthFlow(final ConfigRepository configRepository) { - super(configRepository); + public FacebookPagesOAuthFlow(final ConfigRepository configRepository, HttpClient httpClient) { + super(configRepository, httpClient); } @Override diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/facebook/InstagramOAuthFlow.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/facebook/InstagramOAuthFlow.java index f4478960a9797..20edf93f3f7e0 100644 --- a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/facebook/InstagramOAuthFlow.java +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/facebook/InstagramOAuthFlow.java @@ -5,14 +5,15 @@ package io.airbyte.oauth.flows.facebook; import io.airbyte.config.persistence.ConfigRepository; +import java.net.http.HttpClient; // Instagram Graph API require Facebook API User token public class InstagramOAuthFlow extends FacebookMarketingOAuthFlow { private static final String SCOPES = "ads_management,instagram_basic,instagram_manage_insights,read_insights"; - public InstagramOAuthFlow(final ConfigRepository configRepository) { - super(configRepository); + public InstagramOAuthFlow(final ConfigRepository configRepository, HttpClient httpClient) { + super(configRepository, httpClient); } @Override diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/google/GoogleAdsOAuthFlow.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/google/GoogleAdsOAuthFlow.java index eedbaf5036a06..5fda937bf6051 100644 --- a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/google/GoogleAdsOAuthFlow.java +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/google/GoogleAdsOAuthFlow.java @@ -16,8 +16,8 @@ public class GoogleAdsOAuthFlow extends GoogleOAuthFlow { @VisibleForTesting static final String SCOPE_URL = "https://www.googleapis.com/auth/adwords"; - public GoogleAdsOAuthFlow(final ConfigRepository configRepository) { - super(configRepository); + public GoogleAdsOAuthFlow(final ConfigRepository configRepository, HttpClient httpClient) { + super(configRepository, httpClient); } @VisibleForTesting diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/google/GoogleAnalyticsOAuthFlow.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/google/GoogleAnalyticsOAuthFlow.java index 8e26336783ae8..40a6322a50654 100644 --- a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/google/GoogleAnalyticsOAuthFlow.java +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/google/GoogleAnalyticsOAuthFlow.java @@ -15,8 +15,8 @@ public class GoogleAnalyticsOAuthFlow extends GoogleOAuthFlow { public static final String SCOPE_URL = "https://www.googleapis.com/auth/analytics.readonly"; - public GoogleAnalyticsOAuthFlow(final ConfigRepository configRepository) { - super(configRepository); + public GoogleAnalyticsOAuthFlow(final ConfigRepository configRepository, HttpClient httpClient) { + super(configRepository, httpClient); } @VisibleForTesting diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/google/GoogleOAuthFlow.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/google/GoogleOAuthFlow.java index 1c460101f8544..500309072d467 100644 --- a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/google/GoogleOAuthFlow.java +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/google/GoogleOAuthFlow.java @@ -23,8 +23,8 @@ public abstract class GoogleOAuthFlow extends BaseOAuthFlow { private static final String ACCESS_TOKEN_URL = "https://oauth2.googleapis.com/token"; - public GoogleOAuthFlow(final ConfigRepository configRepository) { - super(configRepository); + public GoogleOAuthFlow(final ConfigRepository configRepository, final HttpClient httpClient) { + super(configRepository, httpClient); } @VisibleForTesting diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/google/GoogleSearchConsoleOAuthFlow.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/google/GoogleSearchConsoleOAuthFlow.java index 77973683446c7..a4fa700e8d56b 100644 --- a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/google/GoogleSearchConsoleOAuthFlow.java +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/google/GoogleSearchConsoleOAuthFlow.java @@ -19,8 +19,8 @@ public class GoogleSearchConsoleOAuthFlow extends GoogleOAuthFlow { @VisibleForTesting static final String SCOPE_URL = "https://www.googleapis.com/auth/webmasters.readonly"; - public GoogleSearchConsoleOAuthFlow(final ConfigRepository configRepository) { - super(configRepository); + public GoogleSearchConsoleOAuthFlow(final ConfigRepository configRepository, HttpClient httpClient) { + super(configRepository, httpClient); } @VisibleForTesting diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/google/GoogleSheetsOAuthFlow.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/google/GoogleSheetsOAuthFlow.java index 11e2dd08e88d9..bff40fc9ef389 100644 --- a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/google/GoogleSheetsOAuthFlow.java +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/google/GoogleSheetsOAuthFlow.java @@ -18,8 +18,8 @@ public class GoogleSheetsOAuthFlow extends GoogleOAuthFlow { @VisibleForTesting static final String SCOPE_URL = "https://www.googleapis.com/auth/spreadsheets.readonly https://www.googleapis.com/auth/drive.readonly"; - public GoogleSheetsOAuthFlow(final ConfigRepository configRepository) { - super(configRepository); + public GoogleSheetsOAuthFlow(final ConfigRepository configRepository, HttpClient httpClient) { + super(configRepository, httpClient); } @VisibleForTesting diff --git a/airbyte-oauth/src/test-integration/java/io.airbyte.oauth.flows/FacebookOAuthFlowIntegrationTest.java b/airbyte-oauth/src/test-integration/java/io.airbyte.oauth.flows/FacebookOAuthFlowIntegrationTest.java index 88884ff611c59..16f2820cd3c7b 100644 --- a/airbyte-oauth/src/test-integration/java/io.airbyte.oauth.flows/FacebookOAuthFlowIntegrationTest.java +++ b/airbyte-oauth/src/test-integration/java/io.airbyte.oauth.flows/FacebookOAuthFlowIntegrationTest.java @@ -17,6 +17,7 @@ import io.airbyte.oauth.flows.OAuthFlowIntegrationTest; import io.airbyte.validation.json.JsonValidationException; import java.io.IOException; +import java.net.http.HttpClient; import java.nio.file.Files; import java.nio.file.Path; import java.util.List; @@ -31,13 +32,13 @@ public class FacebookOAuthFlowIntegrationTest extends OAuthFlowIntegrationTest { protected static final String REDIRECT_URL = "http://localhost:9000/auth_flow"; @Override - protected Path get_credentials_path() { + protected Path getCredentialsPath() { return CREDENTIALS_PATH; } @Override - protected OAuthFlowImplementation getFlowObject(ConfigRepository configRepository) { - return new FacebookMarketingOAuthFlow(configRepository); + protected OAuthFlowImplementation getFlowImplementation(ConfigRepository configRepository, HttpClient httpClient) { + return new FacebookMarketingOAuthFlow(configRepository, httpClient); } @BeforeEach diff --git a/airbyte-oauth/src/test-integration/java/io.airbyte.oauth.flows/GithubOAuthFlowIntegrationTest.java b/airbyte-oauth/src/test-integration/java/io.airbyte.oauth.flows/GithubOAuthFlowIntegrationTest.java index 7d569291c3045..9677a82a5e5c0 100644 --- a/airbyte-oauth/src/test-integration/java/io.airbyte.oauth.flows/GithubOAuthFlowIntegrationTest.java +++ b/airbyte-oauth/src/test-integration/java/io.airbyte.oauth.flows/GithubOAuthFlowIntegrationTest.java @@ -16,6 +16,7 @@ import io.airbyte.oauth.OAuthFlowImplementation; import io.airbyte.validation.json.JsonValidationException; import java.io.IOException; +import java.net.http.HttpClient; import java.nio.file.Files; import java.nio.file.Path; import java.util.List; @@ -31,13 +32,13 @@ public class GithubOAuthFlowIntegrationTest extends OAuthFlowIntegrationTest { protected static final int SERVER_LISTENING_PORT = 8000; @Override - protected Path get_credentials_path() { + protected Path getCredentialsPath() { return CREDENTIALS_PATH; } @Override - protected OAuthFlowImplementation getFlowObject(ConfigRepository configRepository) { - return new GithubOAuthFlow(configRepository); + protected OAuthFlowImplementation getFlowImplementation(ConfigRepository configRepository, HttpClient httpClient) { + return new GithubOAuthFlow(configRepository, httpClient); } @Override diff --git a/airbyte-oauth/src/test-integration/java/io.airbyte.oauth.flows/SalesforceOAuthFlowIntegrationTest.java b/airbyte-oauth/src/test-integration/java/io.airbyte.oauth.flows/SalesforceOAuthFlowIntegrationTest.java index 536ab196886d5..0cee2aaca757d 100644 --- a/airbyte-oauth/src/test-integration/java/io.airbyte.oauth.flows/SalesforceOAuthFlowIntegrationTest.java +++ b/airbyte-oauth/src/test-integration/java/io.airbyte.oauth.flows/SalesforceOAuthFlowIntegrationTest.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.io.OutputStream; import java.net.InetSocketAddress; +import java.net.http.HttpClient; import java.nio.file.Files; import java.nio.file.Path; import java.util.HashMap; @@ -43,6 +44,7 @@ public class SalesforceOAuthFlowIntegrationTest { private SalesforceOAuthFlow salesforceOAuthFlow; private HttpServer server; private ServerHandler serverHandler; + private HttpClient httpClient; @BeforeEach public void setup() throws IOException { @@ -51,7 +53,8 @@ public void setup() throws IOException { "Must provide path to a oauth credentials file."); } configRepository = mock(ConfigRepository.class); - salesforceOAuthFlow = new SalesforceOAuthFlow(configRepository); + httpClient = HttpClient.newBuilder().version(HttpClient.Version.HTTP_1_1).build(); + salesforceOAuthFlow = new SalesforceOAuthFlow(configRepository, httpClient); server = HttpServer.create(new InetSocketAddress(8000), 0); server.setExecutor(null); // creates a default executor diff --git a/airbyte-oauth/src/test-integration/java/io.airbyte.oauth.flows/SurveymonkeyOAuthFlowIntegrationTest.java b/airbyte-oauth/src/test-integration/java/io.airbyte.oauth.flows/SurveymonkeyOAuthFlowIntegrationTest.java index e5263ebbe1082..5791ed62b1a4c 100644 --- a/airbyte-oauth/src/test-integration/java/io.airbyte.oauth.flows/SurveymonkeyOAuthFlowIntegrationTest.java +++ b/airbyte-oauth/src/test-integration/java/io.airbyte.oauth.flows/SurveymonkeyOAuthFlowIntegrationTest.java @@ -16,6 +16,7 @@ import io.airbyte.oauth.OAuthFlowImplementation; import io.airbyte.validation.json.JsonValidationException; import java.io.IOException; +import java.net.http.HttpClient; import java.nio.file.Files; import java.nio.file.Path; import java.util.List; @@ -30,13 +31,13 @@ public class SurveymonkeyOAuthFlowIntegrationTest extends OAuthFlowIntegrationTe protected static final String REDIRECT_URL = "http://localhost:3000/auth_flow"; @Override - protected Path get_credentials_path() { + protected Path getCredentialsPath() { return CREDENTIALS_PATH; } @Override - protected OAuthFlowImplementation getFlowObject(ConfigRepository configRepository) { - return new SurveymonkeyOAuthFlow(configRepository); + protected OAuthFlowImplementation getFlowImplementation(ConfigRepository configRepository, HttpClient httpClient) { + return new SurveymonkeyOAuthFlow(configRepository, httpClient); } @BeforeEach diff --git a/airbyte-oauth/src/test-integration/java/io.airbyte.oauth.flows/TrelloOAuthFlowIntegrationTest.java b/airbyte-oauth/src/test-integration/java/io.airbyte.oauth.flows/TrelloOAuthFlowIntegrationTest.java index 66fb6691347f8..57a0d2e883e2e 100644 --- a/airbyte-oauth/src/test-integration/java/io.airbyte.oauth.flows/TrelloOAuthFlowIntegrationTest.java +++ b/airbyte-oauth/src/test-integration/java/io.airbyte.oauth.flows/TrelloOAuthFlowIntegrationTest.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.io.OutputStream; import java.net.InetSocketAddress; +import java.net.http.HttpClient; import java.nio.file.Files; import java.nio.file.Path; import java.util.HashMap; @@ -43,6 +44,7 @@ public class TrelloOAuthFlowIntegrationTest { private TrelloOAuthFlow trelloOAuthFlow; private HttpServer server; private ServerHandler serverHandler; + private HttpClient httpClient; @BeforeEach public void setup() throws IOException { @@ -51,7 +53,8 @@ public void setup() throws IOException { "Must provide path to a oauth credentials file."); } configRepository = mock(ConfigRepository.class); - trelloOAuthFlow = new TrelloOAuthFlow(configRepository); + httpClient = HttpClient.newBuilder().version(HttpClient.Version.HTTP_1_1).build(); + trelloOAuthFlow = new TrelloOAuthFlow(configRepository, httpClient); server = HttpServer.create(new InetSocketAddress(8000), 0); server.setExecutor(null); // creates a default executor diff --git a/airbyte-oauth/src/test-integration/java/io/airbyte/oauth/flows/HubspotOAuthFlowIntegrationTest.java b/airbyte-oauth/src/test-integration/java/io/airbyte/oauth/flows/HubspotOAuthFlowIntegrationTest.java new file mode 100644 index 0000000000000..234f31454dfb7 --- /dev/null +++ b/airbyte-oauth/src/test-integration/java/io/airbyte/oauth/flows/HubspotOAuthFlowIntegrationTest.java @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.oauth.flows; + +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.when; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; +import io.airbyte.commons.json.Jsons; +import io.airbyte.config.SourceOAuthParameter; +import io.airbyte.config.persistence.ConfigNotFoundException; +import io.airbyte.config.persistence.ConfigRepository; +import io.airbyte.oauth.OAuthFlowImplementation; +import io.airbyte.validation.json.JsonValidationException; +import java.io.IOException; +import java.net.http.HttpClient; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; +import java.util.Map; +import java.util.UUID; +import org.junit.jupiter.api.Test; + +public class HubspotOAuthFlowIntegrationTest extends OAuthFlowIntegrationTest { + + @Override + protected Path getCredentialsPath() { + return Path.of("secrets/hubspot.json"); + } + + @Override + protected OAuthFlowImplementation getFlowImplementation(ConfigRepository configRepository, HttpClient httpClient) { + return new HubspotOAuthFlow(configRepository, httpClient); + } + + @Test + public void testFullOAuthFlow() throws InterruptedException, ConfigNotFoundException, IOException, JsonValidationException { + int limit = 100; + final UUID workspaceId = UUID.randomUUID(); + final UUID definitionId = UUID.randomUUID(); + final String fullConfigAsString = new String(Files.readAllBytes(getCredentialsPath())); + final JsonNode credentialsJson = Jsons.deserialize(fullConfigAsString); + when(configRepository.listSourceOAuthParam()).thenReturn(List.of(new SourceOAuthParameter() + .withOauthParameterId(UUID.randomUUID()) + .withSourceDefinitionId(definitionId) + .withWorkspaceId(workspaceId) + .withConfiguration(Jsons.jsonNode(ImmutableMap.builder() + .put("client_id", credentialsJson.get("credentials").get("client_id").asText()) + .put("client_secret", credentialsJson.get("credentials").get("client_secret").asText()) + .build())))); + var flowObject = getFlowImplementation(configRepository, httpClient); + final String url = flowObject.getSourceConsentUrl(workspaceId, definitionId, REDIRECT_URL); + LOGGER.info("Waiting for user consent at: {}", url); + // TODO: To automate, start a selenium job to navigate to the Consent URL and click on allowing + // access... + while (!serverHandler.isSucceeded() && limit > 0) { + Thread.sleep(1000); + limit -= 1; + } + assertTrue(serverHandler.isSucceeded(), "Failed to get User consent on time"); + final Map params = flowObject.completeSourceOAuth(workspaceId, definitionId, + Map.of("code", serverHandler.getParamValue()), REDIRECT_URL); + LOGGER.info("Response from completing OAuth Flow is: {}", params.toString()); + assertTrue(params.containsKey("credentials")); + final Map credentials = (Map) params.get("credentials"); + assertTrue(credentials.containsKey("refresh_token")); + assertTrue(credentials.get("refresh_token").toString().length() > 0); + assertTrue(credentials.containsKey("access_token")); + assertTrue(credentials.get("access_token").toString().length() > 0); + } + +} diff --git a/airbyte-oauth/src/test-integration/java/io/airbyte/oauth/flows/OAuthFlowIntegrationTest.java b/airbyte-oauth/src/test-integration/java/io/airbyte/oauth/flows/OAuthFlowIntegrationTest.java index c2d64d6c2e154..67be077899e4b 100644 --- a/airbyte-oauth/src/test-integration/java/io/airbyte/oauth/flows/OAuthFlowIntegrationTest.java +++ b/airbyte-oauth/src/test-integration/java/io/airbyte/oauth/flows/OAuthFlowIntegrationTest.java @@ -14,6 +14,7 @@ import java.io.IOException; import java.io.OutputStream; import java.net.InetSocketAddress; +import java.net.http.HttpClient; import java.nio.file.Files; import java.nio.file.Path; import java.util.HashMap; @@ -33,24 +34,27 @@ public abstract class OAuthFlowIntegrationTest { protected static final String REDIRECT_URL = "http://localhost/auth_flow"; protected static final int SERVER_LISTENING_PORT = 80; + protected HttpClient httpClient; protected ConfigRepository configRepository; protected OAuthFlowImplementation flow; protected HttpServer server; protected ServerHandler serverHandler; - protected abstract Path get_credentials_path(); + protected Path getCredentialsPath() { + return Path.of("secrets/config.json"); + }; - protected abstract OAuthFlowImplementation getFlowObject(ConfigRepository configRepository); + protected abstract OAuthFlowImplementation getFlowImplementation(ConfigRepository configRepository, HttpClient httpClient); @BeforeEach public void setup() throws IOException { - if (!Files.exists(get_credentials_path())) { + if (!Files.exists(getCredentialsPath())) { throw new IllegalStateException( "Must provide path to a oauth credentials file."); } configRepository = mock(ConfigRepository.class); - - flow = this.getFlowObject(configRepository); + httpClient = HttpClient.newBuilder().version(HttpClient.Version.HTTP_1_1).build(); + flow = this.getFlowImplementation(configRepository, httpClient); System.out.println(getServerListeningPort()); server = HttpServer.create(new InetSocketAddress(getServerListeningPort()), 0); diff --git a/airbyte-oauth/src/test-integration/java/io/airbyte/oauth/flows/google/GoogleAdsOAuthFlowIntegrationTest.java b/airbyte-oauth/src/test-integration/java/io/airbyte/oauth/flows/google/GoogleAdsOAuthFlowIntegrationTest.java index a9c1ddfb31d50..29d6d909dba7d 100644 --- a/airbyte-oauth/src/test-integration/java/io/airbyte/oauth/flows/google/GoogleAdsOAuthFlowIntegrationTest.java +++ b/airbyte-oauth/src/test-integration/java/io/airbyte/oauth/flows/google/GoogleAdsOAuthFlowIntegrationTest.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.io.OutputStream; import java.net.InetSocketAddress; +import java.net.http.HttpClient; import java.nio.file.Files; import java.nio.file.Path; import java.util.HashMap; @@ -43,6 +44,7 @@ public class GoogleAdsOAuthFlowIntegrationTest { private GoogleAdsOAuthFlow googleAdsOAuthFlow; private HttpServer server; private ServerHandler serverHandler; + private HttpClient httpClient; @BeforeEach public void setup() throws IOException { @@ -51,7 +53,8 @@ public void setup() throws IOException { "Must provide path to a oauth credentials file."); } configRepository = mock(ConfigRepository.class); - googleAdsOAuthFlow = new GoogleAdsOAuthFlow(configRepository); + httpClient = HttpClient.newBuilder().version(HttpClient.Version.HTTP_1_1).build(); + googleAdsOAuthFlow = new GoogleAdsOAuthFlow(configRepository, httpClient); server = HttpServer.create(new InetSocketAddress(80), 0); server.setExecutor(null); // creates a default executor diff --git a/airbyte-oauth/src/test-integration/java/io/airbyte/oauth/flows/google/GoogleAnalyticsOAuthFlowIntegrationTest.java b/airbyte-oauth/src/test-integration/java/io/airbyte/oauth/flows/google/GoogleAnalyticsOAuthFlowIntegrationTest.java index b7683d8c34921..499f8ad56c0b4 100644 --- a/airbyte-oauth/src/test-integration/java/io/airbyte/oauth/flows/google/GoogleAnalyticsOAuthFlowIntegrationTest.java +++ b/airbyte-oauth/src/test-integration/java/io/airbyte/oauth/flows/google/GoogleAnalyticsOAuthFlowIntegrationTest.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.io.OutputStream; import java.net.InetSocketAddress; +import java.net.http.HttpClient; import java.nio.file.Files; import java.nio.file.Path; import java.util.HashMap; @@ -43,6 +44,7 @@ public class GoogleAnalyticsOAuthFlowIntegrationTest { private GoogleAnalyticsOAuthFlow googleAnalyticsOAuthFlow; private HttpServer server; private ServerHandler serverHandler; + private HttpClient httpClient; @BeforeEach public void setup() throws IOException { @@ -51,7 +53,8 @@ public void setup() throws IOException { "Must provide path to a oauth credentials file."); } configRepository = mock(ConfigRepository.class); - googleAnalyticsOAuthFlow = new GoogleAnalyticsOAuthFlow(configRepository); + httpClient = HttpClient.newBuilder().version(HttpClient.Version.HTTP_1_1).build(); + googleAnalyticsOAuthFlow = new GoogleAnalyticsOAuthFlow(configRepository, httpClient); server = HttpServer.create(new InetSocketAddress(80), 0); server.setExecutor(null); // creates a default executor diff --git a/airbyte-oauth/src/test-integration/java/io/airbyte/oauth/flows/google/GoogleSearchConsoleOAuthFlowIntegrationTest.java b/airbyte-oauth/src/test-integration/java/io/airbyte/oauth/flows/google/GoogleSearchConsoleOAuthFlowIntegrationTest.java index 92812c139bd79..886ba1c91f835 100644 --- a/airbyte-oauth/src/test-integration/java/io/airbyte/oauth/flows/google/GoogleSearchConsoleOAuthFlowIntegrationTest.java +++ b/airbyte-oauth/src/test-integration/java/io/airbyte/oauth/flows/google/GoogleSearchConsoleOAuthFlowIntegrationTest.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.io.OutputStream; import java.net.InetSocketAddress; +import java.net.http.HttpClient; import java.nio.file.Files; import java.nio.file.Path; import java.util.HashMap; @@ -43,6 +44,7 @@ public class GoogleSearchConsoleOAuthFlowIntegrationTest { private GoogleSearchConsoleOAuthFlow googleSearchConsoleOAuthFlow; private HttpServer server; private ServerHandler serverHandler; + private HttpClient httpClient; @BeforeEach public void setup() throws IOException { @@ -51,7 +53,8 @@ public void setup() throws IOException { "Must provide path to a oauth credentials file."); } configRepository = mock(ConfigRepository.class); - googleSearchConsoleOAuthFlow = new GoogleSearchConsoleOAuthFlow(configRepository); + httpClient = HttpClient.newBuilder().version(HttpClient.Version.HTTP_1_1).build(); + googleSearchConsoleOAuthFlow = new GoogleSearchConsoleOAuthFlow(configRepository, httpClient); server = HttpServer.create(new InetSocketAddress(80), 0); server.setExecutor(null); // creates a default executor diff --git a/airbyte-oauth/src/test-integration/java/io/airbyte/oauth/flows/google/GoogleSheetsOAuthFlowIntegrationTest.java b/airbyte-oauth/src/test-integration/java/io/airbyte/oauth/flows/google/GoogleSheetsOAuthFlowIntegrationTest.java index 3d4a84b44ee30..24f4ae1d9a94a 100644 --- a/airbyte-oauth/src/test-integration/java/io/airbyte/oauth/flows/google/GoogleSheetsOAuthFlowIntegrationTest.java +++ b/airbyte-oauth/src/test-integration/java/io/airbyte/oauth/flows/google/GoogleSheetsOAuthFlowIntegrationTest.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.io.OutputStream; import java.net.InetSocketAddress; +import java.net.http.HttpClient; import java.nio.file.Files; import java.nio.file.Path; import java.util.HashMap; @@ -43,6 +44,7 @@ public class GoogleSheetsOAuthFlowIntegrationTest { private GoogleSheetsOAuthFlow googleSheetsOAuthFlow; private HttpServer server; private ServerHandler serverHandler; + private HttpClient httpClient; @BeforeEach public void setup() throws IOException { @@ -51,7 +53,8 @@ public void setup() throws IOException { "Must provide path to a oauth credentials file."); } configRepository = mock(ConfigRepository.class); - googleSheetsOAuthFlow = new GoogleSheetsOAuthFlow(configRepository); + httpClient = HttpClient.newBuilder().version(HttpClient.Version.HTTP_1_1).build(); + googleSheetsOAuthFlow = new GoogleSheetsOAuthFlow(configRepository, httpClient); server = HttpServer.create(new InetSocketAddress(80), 0); server.setExecutor(null); // creates a default executor diff --git a/airbyte-oauth/src/test/java/io/airbyte/oauth/flows/AsanaOAuthFlowTest.java b/airbyte-oauth/src/test/java/io/airbyte/oauth/flows/AsanaOAuthFlowTest.java index 4119254b80e9a..13071ab096751 100644 --- a/airbyte-oauth/src/test/java/io/airbyte/oauth/flows/AsanaOAuthFlowTest.java +++ b/airbyte-oauth/src/test/java/io/airbyte/oauth/flows/AsanaOAuthFlowTest.java @@ -48,10 +48,10 @@ public void setup() throws IOException, JsonValidationException { .withOauthParameterId(UUID.randomUUID()) .withSourceDefinitionId(definitionId) .withWorkspaceId(workspaceId) - .withConfiguration(Jsons.jsonNode(ImmutableMap.builder() + .withConfiguration(Jsons.jsonNode(Map.of("credentials", ImmutableMap.builder() .put("client_id", "test_client_id") .put("client_secret", "test_client_secret") - .build())))); + .build()))))); asanaoAuthFlow = new AsanaOAuthFlow(configRepository, httpClient, AsanaOAuthFlowTest::getConstantState); } diff --git a/airbyte-oauth/src/test/java/io/airbyte/oauth/flows/HubspotOAuthFlowTest.java b/airbyte-oauth/src/test/java/io/airbyte/oauth/flows/HubspotOAuthFlowTest.java new file mode 100644 index 0000000000000..46c5272898e51 --- /dev/null +++ b/airbyte-oauth/src/test/java/io/airbyte/oauth/flows/HubspotOAuthFlowTest.java @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.oauth.flows; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import com.google.common.collect.ImmutableMap; +import io.airbyte.commons.json.Jsons; +import io.airbyte.config.SourceOAuthParameter; +import io.airbyte.config.persistence.ConfigNotFoundException; +import io.airbyte.config.persistence.ConfigRepository; +import io.airbyte.validation.json.JsonValidationException; +import java.io.IOException; +import java.net.http.HttpClient; +import java.net.http.HttpResponse; +import java.util.List; +import java.util.Map; +import java.util.UUID; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +public class HubspotOAuthFlowTest { + + private UUID workspaceId; + private UUID definitionId; + private ConfigRepository configRepository; + private HubspotOAuthFlow flow; + private HttpClient httpClient; + + private static final String REDIRECT_URL = "https://airbyte.io"; + + private static String getConstantState() { + return "state"; + } + + @BeforeEach + public void setup() throws IOException, JsonValidationException { + workspaceId = UUID.randomUUID(); + definitionId = UUID.randomUUID(); + configRepository = mock(ConfigRepository.class); + httpClient = mock(HttpClient.class); + when(configRepository.listSourceOAuthParam()).thenReturn(List.of(new SourceOAuthParameter() + .withOauthParameterId(UUID.randomUUID()) + .withSourceDefinitionId(definitionId) + .withWorkspaceId(workspaceId) + .withConfiguration(Jsons.jsonNode(Map.of("credentials", ImmutableMap.builder() + .put("client_id", "test_client_id") + .put("client_secret", "test_client_secret") + .build()))))); + flow = new HubspotOAuthFlow(configRepository, httpClient, HubspotOAuthFlowTest::getConstantState); + + } + + @Test + public void testGetSourceConcentUrl() throws IOException, ConfigNotFoundException { + final String concentUrl = + flow.getSourceConsentUrl(workspaceId, definitionId, REDIRECT_URL); + assertEquals(concentUrl, + "https://app.hubspot.com/oauth/authorize?client_id=test_client_id&redirect_uri=https%3A%2F%2Fairbyte.io&state=state&scopes=content+crm.schemas.deals.read+crm.objects.owners.read+forms+tickets+e-commerce+crm.objects.companies.read+crm.lists.read+crm.objects.deals.read+crm.schemas.contacts.read+crm.objects.contacts.read+crm.schemas.companies.read+files+forms-uploaded-files+files.ui_hidden.read"); + } + + @Test + public void testCompleteSourceOAuth() throws IOException, InterruptedException, ConfigNotFoundException { + final var response = mock(HttpResponse.class); + var returnedCredentials = "{\"refresh_token\":\"refresh_token_response\"}"; + when(response.body()).thenReturn(returnedCredentials); + when(httpClient.send(any(), any())).thenReturn(response); + final Map queryParams = Map.of("code", "test_code"); + final Map actualQueryParams = + flow.completeSourceOAuth(workspaceId, definitionId, queryParams, REDIRECT_URL); + assertEquals(Jsons.serialize(Map.of("credentials", Jsons.deserialize(returnedCredentials))), Jsons.serialize(actualQueryParams)); + } + +} diff --git a/airbyte-scheduler/app/Dockerfile b/airbyte-scheduler/app/Dockerfile index 8984019c79a17..9e6c32336682b 100644 --- a/airbyte-scheduler/app/Dockerfile +++ b/airbyte-scheduler/app/Dockerfile @@ -5,9 +5,7 @@ ENV APPLICATION airbyte-scheduler WORKDIR /app -COPY build/distributions/${APPLICATION}-0*.tar ${APPLICATION}.tar - -RUN tar xf ${APPLICATION}.tar --strip-components=1 +ADD build/distributions/${APPLICATION}-0.30.34-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.34-alpha/bin/${APPLICATION}"] diff --git a/airbyte-scheduler/app/build.gradle b/airbyte-scheduler/app/build.gradle index ddd6a0cd06996..d40206efb3ae9 100644 --- a/airbyte-scheduler/app/build.gradle +++ b/airbyte-scheduler/app/build.gradle @@ -3,7 +3,7 @@ plugins { } dependencies { - implementation 'io.fabric8:kubernetes-client:5.5.0' + implementation 'io.fabric8:kubernetes-client:5.9.0' implementation 'io.kubernetes:client-java-api:10.0.0' implementation 'io.kubernetes:client-java:10.0.0' implementation 'io.kubernetes:client-java-extended:10.0.0' diff --git a/airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/JobSubmitter.java b/airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/JobSubmitter.java index 839d3dca4a949..c35a56fc57779 100644 --- a/airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/JobSubmitter.java +++ b/airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/JobSubmitter.java @@ -8,8 +8,10 @@ import com.google.common.collect.Sets; import io.airbyte.commons.concurrency.LifecycledCallable; import io.airbyte.commons.enums.Enums; +import io.airbyte.config.Configs.WorkerEnvironment; import io.airbyte.config.JobConfig.ConfigType; import io.airbyte.config.helpers.LogClientSingleton; +import io.airbyte.config.helpers.LogConfigs; import io.airbyte.scheduler.app.worker_run.TemporalWorkerRunFactory; import io.airbyte.scheduler.app.worker_run.WorkerRun; import io.airbyte.scheduler.models.Job; @@ -35,6 +37,8 @@ public class JobSubmitter implements Runnable { private final TemporalWorkerRunFactory temporalWorkerRunFactory; private final JobTracker jobTracker; private final JobNotifier jobNotifier; + private final WorkerEnvironment workerEnvironment; + private final LogConfigs logConfigs; // See attemptJobSubmit() to understand the need for this Concurrent Set. private final Set runningJobs = Sets.newConcurrentHashSet(); @@ -43,12 +47,16 @@ public JobSubmitter(final ExecutorService threadPool, final JobPersistence persistence, final TemporalWorkerRunFactory temporalWorkerRunFactory, final JobTracker jobTracker, - final JobNotifier jobNotifier) { + final JobNotifier jobNotifier, + final WorkerEnvironment workerEnvironment, + final LogConfigs logConfigs) { this.threadPool = threadPool; this.persistence = persistence; this.temporalWorkerRunFactory = temporalWorkerRunFactory; this.jobTracker = jobTracker; this.jobNotifier = jobNotifier; + this.workerEnvironment = workerEnvironment; + this.logConfigs = logConfigs; } @Override @@ -101,6 +109,7 @@ synchronized private Consumer attemptJobSubmit() { @VisibleForTesting void submitJob(final Job job) { + final WorkerRun workerRun = temporalWorkerRunFactory.create(job); // we need to know the attempt number before we begin the job lifecycle. thus we state what the // attempt number should be. if it is not, that the lifecycle will fail. this should not happen as @@ -114,7 +123,7 @@ void submitJob(final Job job) { final Path logFilePath = workerRun.getJobRoot().resolve(LogClientSingleton.LOG_FILENAME); final long persistedAttemptId = persistence.createAttempt(job.getId(), logFilePath); assertSameIds(attemptNumber, persistedAttemptId); - LogClientSingleton.setJobMdc(workerRun.getJobRoot()); + LogClientSingleton.getInstance().setJobMdc(workerEnvironment, logConfigs, workerRun.getJobRoot()); }) .setOnSuccess(output -> { LOGGER.debug("Job id {} succeeded", job.getId()); diff --git a/airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/SchedulerApp.java b/airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/SchedulerApp.java index 5081a5b5f832d..7830b35c0ef9b 100644 --- a/airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/SchedulerApp.java +++ b/airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/SchedulerApp.java @@ -15,8 +15,10 @@ import io.airbyte.commons.concurrency.GracefulShutdownHandler; import io.airbyte.commons.version.AirbyteVersion; import io.airbyte.config.Configs; +import io.airbyte.config.Configs.WorkerEnvironment; import io.airbyte.config.EnvConfigs; import io.airbyte.config.helpers.LogClientSingleton; +import io.airbyte.config.helpers.LogConfigs; import io.airbyte.config.persistence.ConfigPersistence; import io.airbyte.config.persistence.ConfigRepository; import io.airbyte.config.persistence.DatabaseConfigPersistence; @@ -81,6 +83,8 @@ public class SchedulerApp { private final int submitterNumThreads; private final int maxSyncJobAttempts; private final String airbyteVersionOrWarnings; + private final WorkerEnvironment workerEnvironment; + private final LogConfigs logConfigs; public SchedulerApp(final Path workspaceRoot, final JobPersistence jobPersistence, @@ -90,7 +94,9 @@ public SchedulerApp(final Path workspaceRoot, final TemporalClient temporalClient, final Integer submitterNumThreads, final Integer maxSyncJobAttempts, - final String airbyteVersionOrWarnings) { + final String airbyteVersionOrWarnings, + final WorkerEnvironment workerEnvironment, + final LogConfigs logConfigs) { this.workspaceRoot = workspaceRoot; this.jobPersistence = jobPersistence; this.configRepository = configRepository; @@ -100,6 +106,8 @@ public SchedulerApp(final Path workspaceRoot, this.submitterNumThreads = submitterNumThreads; this.maxSyncJobAttempts = maxSyncJobAttempts; this.airbyteVersionOrWarnings = airbyteVersionOrWarnings; + this.workerEnvironment = workerEnvironment; + this.logConfigs = logConfigs; } public void start() throws IOException { @@ -116,7 +124,7 @@ public void start() throws IOException { jobPersistence, temporalWorkerRunFactory, new JobTracker(configRepository, jobPersistence, trackingClient), - jobNotifier); + jobNotifier, workerEnvironment, logConfigs); final Map mdc = MDC.getCopyOfContextMap(); @@ -187,7 +195,8 @@ public static void main(final String[] args) throws IOException, InterruptedExce final Configs configs = new EnvConfigs(); - LogClientSingleton.setWorkspaceMdc(LogClientSingleton.getSchedulerLogsRoot(configs)); + LogClientSingleton.getInstance().setWorkspaceMdc(configs.getWorkerEnvironment(), configs.getLogConfigs(), + LogClientSingleton.getInstance().getSchedulerLogsRoot(configs.getWorkspaceRoot())); final Path workspaceRoot = configs.getWorkspaceRoot(); LOGGER.info("workspaceRoot = " + workspaceRoot); @@ -250,7 +259,7 @@ public static void main(final String[] args) throws IOException, InterruptedExce temporalClient, Integer.parseInt(configs.getSubmitterNumThreads()), configs.getMaxSyncJobAttempts(), - configs.getAirbyteVersionOrWarning()) + configs.getAirbyteVersionOrWarning(), configs.getWorkerEnvironment(), configs.getLogConfigs()) .start(); } diff --git a/airbyte-scheduler/app/src/test/java/io/airbyte/scheduler/app/JobSubmitterTest.java b/airbyte-scheduler/app/src/test/java/io/airbyte/scheduler/app/JobSubmitterTest.java index 5a4b5220b004f..4da251262da22 100644 --- a/airbyte-scheduler/app/src/test/java/io/airbyte/scheduler/app/JobSubmitterTest.java +++ b/airbyte-scheduler/app/src/test/java/io/airbyte/scheduler/app/JobSubmitterTest.java @@ -24,8 +24,10 @@ import com.google.common.collect.ImmutableMap; import com.google.common.util.concurrent.MoreExecutors; +import io.airbyte.config.Configs.WorkerEnvironment; import io.airbyte.config.JobOutput; import io.airbyte.config.helpers.LogClientSingleton; +import io.airbyte.config.helpers.LogConfiguration; import io.airbyte.scheduler.app.worker_run.TemporalWorkerRunFactory; import io.airbyte.scheduler.app.worker_run.WorkerRun; import io.airbyte.scheduler.models.Job; @@ -91,7 +93,7 @@ public void setup() throws IOException { persistence, workerRunFactory, jobTracker, - jobNotifier)); + jobNotifier, WorkerEnvironment.DOCKER, LogConfiguration.EMPTY)); } @Test diff --git a/airbyte-scheduler/client/build.gradle b/airbyte-scheduler/client/build.gradle index d90a0262c97c7..5e319c0418efa 100644 --- a/airbyte-scheduler/client/build.gradle +++ b/airbyte-scheduler/client/build.gradle @@ -5,6 +5,7 @@ plugins { dependencies { implementation project(':airbyte-config:models') implementation project(':airbyte-config:persistence') + implementation project(':airbyte-config:specs') implementation project(':airbyte-json-validation') implementation project(':airbyte-protocol:models') implementation project(':airbyte-scheduler:models') diff --git a/airbyte-scheduler/client/src/main/java/io/airbyte/scheduler/client/BucketSpecCacheSchedulerClient.java b/airbyte-scheduler/client/src/main/java/io/airbyte/scheduler/client/BucketSpecCacheSchedulerClient.java index a615643d0830e..bcdc972c2cb4a 100644 --- a/airbyte-scheduler/client/src/main/java/io/airbyte/scheduler/client/BucketSpecCacheSchedulerClient.java +++ b/airbyte-scheduler/client/src/main/java/io/airbyte/scheduler/client/BucketSpecCacheSchedulerClient.java @@ -4,27 +4,17 @@ package io.airbyte.scheduler.client; -import com.fasterxml.jackson.databind.JsonNode; -import com.google.api.client.util.Preconditions; -import com.google.cloud.storage.Blob; -import com.google.cloud.storage.Storage; import com.google.cloud.storage.StorageOptions; import com.google.common.annotations.VisibleForTesting; -import io.airbyte.commons.json.Jsons; import io.airbyte.config.DestinationConnection; import io.airbyte.config.JobConfig.ConfigType; import io.airbyte.config.SourceConnection; import io.airbyte.config.StandardCheckConnectionOutput; +import io.airbyte.config.specs.GcsBucketSpecFetcher; import io.airbyte.protocol.models.AirbyteCatalog; -import io.airbyte.protocol.models.AirbyteProtocolSchema; import io.airbyte.protocol.models.ConnectorSpecification; -import io.airbyte.validation.json.JsonSchemaValidator; -import io.airbyte.validation.json.JsonValidationException; import java.io.IOException; -import java.nio.charset.StandardCharsets; -import java.nio.file.Path; import java.util.Optional; -import java.util.function.Function; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -33,17 +23,15 @@ public class BucketSpecCacheSchedulerClient implements SynchronousSchedulerClien private static final Logger LOGGER = LoggerFactory.getLogger(BucketSpecCacheSchedulerClient.class); private final SynchronousSchedulerClient client; - private final Function> bucketSpecFetcher; + private final GcsBucketSpecFetcher bucketSpecFetcher; public BucketSpecCacheSchedulerClient(final SynchronousSchedulerClient client, final String bucketName) { - this( - client, - dockerImage -> attemptToFetchSpecFromBucket(StorageOptions.getDefaultInstance().getService(), bucketName, dockerImage)); + this.client = client; + this.bucketSpecFetcher = new GcsBucketSpecFetcher(StorageOptions.getDefaultInstance().getService(), bucketName); } @VisibleForTesting - BucketSpecCacheSchedulerClient(final SynchronousSchedulerClient client, - final Function> bucketSpecFetcher) { + BucketSpecCacheSchedulerClient(final SynchronousSchedulerClient client, final GcsBucketSpecFetcher bucketSpecFetcher) { this.client = client; this.bucketSpecFetcher = bucketSpecFetcher; } @@ -72,7 +60,7 @@ public SynchronousResponse createGetSpecJob(final String Optional cachedSpecOptional; // never want to fail because we could not fetch from off board storage. try { - cachedSpecOptional = bucketSpecFetcher.apply(dockerImage); + cachedSpecOptional = bucketSpecFetcher.attemptFetch(dockerImage); LOGGER.debug("Spec bucket cache: Call to cache did not fail."); } catch (final RuntimeException e) { cachedSpecOptional = Optional.empty(); @@ -88,38 +76,4 @@ public SynchronousResponse createGetSpecJob(final String } } - private static void validateConfig(final JsonNode json) throws JsonValidationException { - final JsonSchemaValidator jsonSchemaValidator = new JsonSchemaValidator(); - final JsonNode specJsonSchema = JsonSchemaValidator.getSchema(AirbyteProtocolSchema.PROTOCOL.getFile(), "ConnectorSpecification"); - jsonSchemaValidator.ensure(specJsonSchema, json); - } - - public static Optional attemptToFetchSpecFromBucket(final Storage storage, - final String bucketName, - final String dockerImage) { - final String[] dockerImageComponents = dockerImage.split(":"); - Preconditions.checkArgument(dockerImageComponents.length == 2, "Invalidate docker image: " + dockerImage); - final String dockerImageName = dockerImageComponents[0]; - final String dockerImageTag = dockerImageComponents[1]; - - final Path specPath = Path.of("specs").resolve(dockerImageName).resolve(dockerImageTag).resolve("spec.json"); - LOGGER.debug("Checking path for cached spec: {} {}", bucketName, specPath); - final Blob specAsBlob = storage.get(bucketName, specPath.toString()); - - // if null it means the object was not found. - if (specAsBlob == null) { - LOGGER.debug("Spec not found in bucket storage"); - return Optional.empty(); - } - - final String specAsString = new String(specAsBlob.getContent(), StandardCharsets.UTF_8); - try { - validateConfig(Jsons.deserialize(specAsString)); - } catch (final JsonValidationException e) { - LOGGER.error("Received invalid spec from bucket store. {}", e.toString()); - return Optional.empty(); - } - return Optional.of(Jsons.deserialize(specAsString, ConnectorSpecification.class)); - } - } diff --git a/airbyte-scheduler/client/src/test/java/io/airbyte/scheduler/client/BucketSpecCacheSchedulerClientTest.java b/airbyte-scheduler/client/src/test/java/io/airbyte/scheduler/client/BucketSpecCacheSchedulerClientTest.java index cf21fd2b160df..01f4595b94685 100644 --- a/airbyte-scheduler/client/src/test/java/io/airbyte/scheduler/client/BucketSpecCacheSchedulerClientTest.java +++ b/airbyte-scheduler/client/src/test/java/io/airbyte/scheduler/client/BucketSpecCacheSchedulerClientTest.java @@ -10,10 +10,10 @@ import static org.mockito.Mockito.verifyNoInteractions; import static org.mockito.Mockito.when; +import io.airbyte.config.specs.GcsBucketSpecFetcher; import io.airbyte.protocol.models.ConnectorSpecification; import java.io.IOException; import java.util.Optional; -import java.util.function.Function; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; @@ -21,18 +21,18 @@ class BucketSpecCacheSchedulerClientTest { private SynchronousSchedulerClient defaultClientMock; - private Function> bucketSpecFetcherMock; + private GcsBucketSpecFetcher bucketSpecFetcherMock; @SuppressWarnings("unchecked") @BeforeEach void setup() { defaultClientMock = mock(SynchronousSchedulerClient.class); - bucketSpecFetcherMock = mock(Function.class); + bucketSpecFetcherMock = mock(GcsBucketSpecFetcher.class); } @Test void testGetsSpecIfPresent() throws IOException { - when(bucketSpecFetcherMock.apply("source-pokeapi:0.1.0")).thenReturn(Optional.of(new ConnectorSpecification())); + when(bucketSpecFetcherMock.attemptFetch("source-pokeapi:0.1.0")).thenReturn(Optional.of(new ConnectorSpecification())); final BucketSpecCacheSchedulerClient client = new BucketSpecCacheSchedulerClient(defaultClientMock, bucketSpecFetcherMock); assertEquals(new ConnectorSpecification(), client.createGetSpecJob("source-pokeapi:0.1.0").getOutput()); verifyNoInteractions(defaultClientMock); @@ -40,7 +40,7 @@ void testGetsSpecIfPresent() throws IOException { @Test void testCallsDelegateIfNotPresent() throws IOException { - when(bucketSpecFetcherMock.apply("source-pokeapi:0.1.0")).thenReturn(Optional.empty()); + when(bucketSpecFetcherMock.attemptFetch("source-pokeapi:0.1.0")).thenReturn(Optional.empty()); when(defaultClientMock.createGetSpecJob("source-pokeapi:0.1.0")) .thenReturn(new SynchronousResponse<>(new ConnectorSpecification(), mock(SynchronousJobMetadata.class))); final BucketSpecCacheSchedulerClient client = new BucketSpecCacheSchedulerClient(defaultClientMock, bucketSpecFetcherMock); @@ -49,7 +49,7 @@ void testCallsDelegateIfNotPresent() throws IOException { @Test void testCallsDelegateIfException() throws IOException { - when(bucketSpecFetcherMock.apply("source-pokeapi:0.1.0")).thenThrow(new RuntimeException("induced exception")); + when(bucketSpecFetcherMock.attemptFetch("source-pokeapi:0.1.0")).thenThrow(new RuntimeException("induced exception")); when(defaultClientMock.createGetSpecJob("source-pokeapi:0.1.0")) .thenReturn(new SynchronousResponse<>(new ConnectorSpecification(), mock(SynchronousJobMetadata.class))); final BucketSpecCacheSchedulerClient client = new BucketSpecCacheSchedulerClient(defaultClientMock, bucketSpecFetcherMock); @@ -62,7 +62,7 @@ void testCallsDelegateIfException() throws IOException { @Disabled @Test void testGetsSpecFromBucket() throws IOException { - when(bucketSpecFetcherMock.apply("source-pokeapi:0.1.0")).thenReturn(Optional.of(new ConnectorSpecification())); + when(bucketSpecFetcherMock.attemptFetch("source-pokeapi:0.1.0")).thenReturn(Optional.of(new ConnectorSpecification())); // todo (cgardens) - replace with prod bucket. final BucketSpecCacheSchedulerClient client = new BucketSpecCacheSchedulerClient(defaultClientMock, "cg-specs"); final ConnectorSpecification actualSpec = client.createGetSpecJob("source-pokeapi:0.1.0").getOutput(); diff --git a/airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/DefaultJobPersistence.java b/airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/DefaultJobPersistence.java index 8feceddbda9a4..7b6b96f27efe3 100644 --- a/airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/DefaultJobPersistence.java +++ b/airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/DefaultJobPersistence.java @@ -23,6 +23,7 @@ import io.airbyte.db.Database; import io.airbyte.db.ExceptionWrappingDatabase; import io.airbyte.db.instance.jobs.JobsDatabaseSchema; +import io.airbyte.db.jdbc.JdbcUtils; import io.airbyte.scheduler.models.Attempt; import io.airbyte.scheduler.models.AttemptStatus; import io.airbyte.scheduler.models.Job; @@ -53,8 +54,6 @@ import org.jooq.Field; import org.jooq.InsertValuesStepN; import org.jooq.JSONB; -import org.jooq.JSONFormat; -import org.jooq.JSONFormat.RecordFormat; import org.jooq.Named; import org.jooq.Record; import org.jooq.Result; @@ -76,7 +75,6 @@ public class DefaultJobPersistence implements JobPersistence { .of("pg_toast", "information_schema", "pg_catalog", "import_backup", "pg_internal", "catalog_history"); - private static final JSONFormat DB_JSON_FORMAT = new JSONFormat().recordFormat(RecordFormat.OBJECT); protected static final String DEFAULT_SCHEMA = "public"; private static final String BACKUP_SCHEMA = "import_backup"; public static final String DEPLOYMENT_ID_KEY = "deployment_id"; @@ -618,7 +616,7 @@ private Stream exportTable(final String schema, final String tableName .filter(f -> f.getDataType().getTypeName().equals("jsonb")) .map(Field::getName) .collect(Collectors.toSet()); - final JsonNode row = Jsons.deserialize(record.formatJSON(DB_JSON_FORMAT)); + final JsonNode row = Jsons.deserialize(record.formatJSON(JdbcUtils.getDefaultJSONFormat())); // for json fields, deserialize them so they are treated as objects instead of strings. this is to // get around that formatJson doesn't handle deserializing them for us. jsonFieldNames.forEach(jsonFieldName -> ((ObjectNode) row).replace(jsonFieldName, Jsons.deserialize(row.get(jsonFieldName).asText()))); diff --git a/airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/JobPersistence.java b/airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/JobPersistence.java index d784c1f3f51ca..023ffda55e43b 100644 --- a/airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/JobPersistence.java +++ b/airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/JobPersistence.java @@ -125,7 +125,7 @@ public interface JobPersistence { void writeOutput(long jobId, int attemptNumber, T output) throws IOException; /** - * @param configType - type of config, e.g. sync + * @param configTypes - type of config, e.g. sync * @param configId - id of that config * @return lists job in descending order by created_at * @throws IOException - what you do when you IO @@ -196,8 +196,6 @@ public interface JobPersistence { /** * Purges job history while ensuring that the latest saved-state information is maintained. - * - * @throws IOException */ void purgeJobHistory(); diff --git a/airbyte-server/Dockerfile b/airbyte-server/Dockerfile index 57d0dbd262cbc..f68e343973bd1 100644 --- a/airbyte-server/Dockerfile +++ b/airbyte-server/Dockerfile @@ -7,9 +7,7 @@ ENV APPLICATION airbyte-server WORKDIR /app -COPY build/distributions/${APPLICATION}-0*.tar ${APPLICATION}.tar - -RUN tar xf ${APPLICATION}.tar --strip-components=1 +ADD build/distributions/${APPLICATION}-0.30.34-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.34-alpha/bin/${APPLICATION}"] diff --git a/airbyte-server/build.gradle b/airbyte-server/build.gradle index edc7c55fb2506..a38db0edc5dbe 100644 --- a/airbyte-server/build.gradle +++ b/airbyte-server/build.gradle @@ -66,6 +66,7 @@ dependencies { implementation project(':airbyte-config:init') implementation project(':airbyte-config:models') implementation project(':airbyte-config:persistence') + implementation project(':airbyte-config:specs') implementation project(':airbyte-db:lib') implementation project(":airbyte-json-validation") implementation project(':airbyte-migration') diff --git a/airbyte-server/src/main/java/io/airbyte/server/ConfigDumpExporter.java b/airbyte-server/src/main/java/io/airbyte/server/ConfigDumpExporter.java index 8c3ffed22ab47..b10aeda10272d 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/ConfigDumpExporter.java +++ b/airbyte-server/src/main/java/io/airbyte/server/ConfigDumpExporter.java @@ -231,7 +231,7 @@ private Collection listDestinationDefinition(fina } /** - * List all configurations of type @param that already exists + * List all configurations of type @param <T> that already exists */ public interface ListConfigCall { diff --git a/airbyte-server/src/main/java/io/airbyte/server/ConfigDumpImporter.java b/airbyte-server/src/main/java/io/airbyte/server/ConfigDumpImporter.java index 9abf855011050..de7ac4084d47f 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/ConfigDumpImporter.java +++ b/airbyte-server/src/main/java/io/airbyte/server/ConfigDumpImporter.java @@ -596,8 +596,8 @@ private Map importIntoWorkspace(final ConfigSchema configSchema, } /** - * List all configurations of type @param that already exists (we'll be using this to know which - * ids are already in use) + * List all configurations of type @param <T> that already exists (we'll be using this to know + * which ids are already in use) */ public interface ListConfigCall { diff --git a/airbyte-server/src/main/java/io/airbyte/server/ConfigurationApiFactory.java b/airbyte-server/src/main/java/io/airbyte/server/ConfigurationApiFactory.java index cb1a52c61e2ae..bdc9dd0105f4d 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/ConfigurationApiFactory.java +++ b/airbyte-server/src/main/java/io/airbyte/server/ConfigurationApiFactory.java @@ -6,7 +6,9 @@ import io.airbyte.analytics.TrackingClient; import io.airbyte.commons.io.FileTtlManager; -import io.airbyte.config.Configs; +import io.airbyte.commons.version.AirbyteVersion; +import io.airbyte.config.Configs.WorkerEnvironment; +import io.airbyte.config.helpers.LogConfigs; import io.airbyte.config.persistence.ConfigPersistence; import io.airbyte.config.persistence.ConfigRepository; import io.airbyte.db.Database; @@ -15,6 +17,8 @@ import io.airbyte.scheduler.persistence.JobPersistence; import io.airbyte.server.apis.ConfigurationApi; import io.temporal.serviceclient.WorkflowServiceStubs; +import java.net.http.HttpClient; +import java.nio.file.Path; import java.util.Map; import org.glassfish.hk2.api.Factory; import org.slf4j.MDC; @@ -27,12 +31,17 @@ public class ConfigurationApiFactory implements Factory { private static ConfigPersistence seed; private static SchedulerJobClient schedulerJobClient; private static CachingSynchronousSchedulerClient synchronousSchedulerClient; - private static Configs configs; private static FileTtlManager archiveTtlManager; private static Map mdc; private static Database configsDatabase; private static Database jobsDatabase; private static TrackingClient trackingClient; + private static WorkerEnvironment workerEnvironment; + private static LogConfigs logConfigs; + private static Path workspaceRoot; + private static String webappUrl; + private static AirbyteVersion airbyteVersion; + private static HttpClient httpClient; public static void setValues( final WorkflowServiceStubs temporalService, @@ -41,24 +50,34 @@ public static void setValues( final ConfigPersistence seed, final SchedulerJobClient schedulerJobClient, final CachingSynchronousSchedulerClient synchronousSchedulerClient, - final Configs configs, final FileTtlManager archiveTtlManager, final Map mdc, final Database configsDatabase, final Database jobsDatabase, - final TrackingClient trackingClient) { + final TrackingClient trackingClient, + final WorkerEnvironment workerEnvironment, + final LogConfigs logConfigs, + final String webappUrl, + final AirbyteVersion airbyteVersion, + final Path workspaceRoot, + final HttpClient httpClient) { ConfigurationApiFactory.configRepository = configRepository; ConfigurationApiFactory.jobPersistence = jobPersistence; ConfigurationApiFactory.seed = seed; ConfigurationApiFactory.schedulerJobClient = schedulerJobClient; ConfigurationApiFactory.synchronousSchedulerClient = synchronousSchedulerClient; - ConfigurationApiFactory.configs = configs; ConfigurationApiFactory.archiveTtlManager = archiveTtlManager; ConfigurationApiFactory.mdc = mdc; ConfigurationApiFactory.temporalService = temporalService; ConfigurationApiFactory.configsDatabase = configsDatabase; ConfigurationApiFactory.jobsDatabase = jobsDatabase; ConfigurationApiFactory.trackingClient = trackingClient; + ConfigurationApiFactory.workerEnvironment = workerEnvironment; + ConfigurationApiFactory.logConfigs = logConfigs; + ConfigurationApiFactory.workspaceRoot = workspaceRoot; + ConfigurationApiFactory.webappUrl = webappUrl; + ConfigurationApiFactory.airbyteVersion = airbyteVersion; + ConfigurationApiFactory.httpClient = httpClient; } @Override @@ -71,12 +90,17 @@ public ConfigurationApi provide() { ConfigurationApiFactory.seed, ConfigurationApiFactory.schedulerJobClient, ConfigurationApiFactory.synchronousSchedulerClient, - ConfigurationApiFactory.configs, ConfigurationApiFactory.archiveTtlManager, ConfigurationApiFactory.temporalService, ConfigurationApiFactory.configsDatabase, ConfigurationApiFactory.jobsDatabase, - ConfigurationApiFactory.trackingClient); + ConfigurationApiFactory.trackingClient, + ConfigurationApiFactory.workerEnvironment, + ConfigurationApiFactory.logConfigs, + ConfigurationApiFactory.webappUrl, + ConfigurationApiFactory.airbyteVersion, + ConfigurationApiFactory.workspaceRoot, + ConfigurationApiFactory.httpClient); } @Override diff --git a/airbyte-server/src/main/java/io/airbyte/server/ServerApp.java b/airbyte-server/src/main/java/io/airbyte/server/ServerApp.java index 3c8b1196d90a5..b05fb8b22451f 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/ServerApp.java +++ b/airbyte-server/src/main/java/io/airbyte/server/ServerApp.java @@ -4,21 +4,28 @@ package io.airbyte.server; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableMap; import io.airbyte.analytics.Deployment; import io.airbyte.analytics.TrackingClient; import io.airbyte.analytics.TrackingClientSingleton; +import io.airbyte.api.model.LogRead; +import io.airbyte.commons.json.Jsons; import io.airbyte.commons.lang.Exceptions; import io.airbyte.commons.resources.MoreResources; import io.airbyte.commons.version.AirbyteVersion; import io.airbyte.config.Configs; import io.airbyte.config.Configs.WorkerEnvironment; import io.airbyte.config.EnvConfigs; +import io.airbyte.config.StandardDestinationDefinition; +import io.airbyte.config.StandardSourceDefinition; import io.airbyte.config.StandardWorkspace; import io.airbyte.config.helpers.LogClientSingleton; +import io.airbyte.config.helpers.LogConfigs; +import io.airbyte.config.init.YamlSeedConfigPersistence; import io.airbyte.config.persistence.ConfigPersistence; import io.airbyte.config.persistence.ConfigRepository; import io.airbyte.config.persistence.DatabaseConfigPersistence; -import io.airbyte.config.persistence.YamlSeedConfigPersistence; import io.airbyte.config.persistence.split_secrets.SecretPersistence; import io.airbyte.config.persistence.split_secrets.SecretsHydrator; import io.airbyte.db.Database; @@ -27,17 +34,20 @@ import io.airbyte.db.instance.configs.ConfigsDatabaseMigrator; import io.airbyte.db.instance.jobs.JobsDatabaseInstance; import io.airbyte.db.instance.jobs.JobsDatabaseMigrator; +import io.airbyte.protocol.models.ConnectorSpecification; import io.airbyte.scheduler.client.BucketSpecCacheSchedulerClient; import io.airbyte.scheduler.client.DefaultSchedulerJobClient; import io.airbyte.scheduler.client.DefaultSynchronousSchedulerClient; import io.airbyte.scheduler.client.SchedulerJobClient; import io.airbyte.scheduler.client.SpecCachingSynchronousSchedulerClient; +import io.airbyte.scheduler.client.SynchronousResponse; import io.airbyte.scheduler.client.SynchronousSchedulerClient; import io.airbyte.scheduler.persistence.DefaultJobCreator; import io.airbyte.scheduler.persistence.DefaultJobPersistence; import io.airbyte.scheduler.persistence.JobPersistence; import io.airbyte.scheduler.persistence.job_factory.OAuthConfigSupplier; import io.airbyte.scheduler.persistence.job_tracker.JobTracker; +import io.airbyte.server.converters.JobConverter; import io.airbyte.server.converters.SpecFetcher; import io.airbyte.server.errors.InvalidInputExceptionMapper; import io.airbyte.server.errors.InvalidJsonExceptionMapper; @@ -51,6 +61,7 @@ import io.airbyte.workers.temporal.TemporalUtils; import io.temporal.serviceclient.WorkflowServiceStubs; import java.io.IOException; +import java.net.http.HttpClient; import java.util.Map; import java.util.Optional; import java.util.Set; @@ -157,7 +168,8 @@ private static void createWorkspaceIfNoneExists(final ConfigRepository configRep public static ServerRunnable getServer(final ServerFactory apiFactory, final ConfigPersistence seed) throws Exception { final Configs configs = new EnvConfigs(); - LogClientSingleton.setWorkspaceMdc(LogClientSingleton.getServerLogsRoot(configs)); + LogClientSingleton.getInstance().setWorkspaceMdc(configs.getWorkerEnvironment(), configs.getLogConfigs(), + LogClientSingleton.getInstance().getServerLogsRoot(configs.getWorkspaceRoot())); LOGGER.info("Creating Staged Resource folder..."); ConfigDumpImporter.initStagedResourceFolder(); @@ -228,12 +240,23 @@ public static ServerRunnable getServer(final ServerFactory apiFactory, final Con jobPersistence, configs); + final HttpClient httpClient = HttpClient.newBuilder().version(HttpClient.Version.HTTP_1_1).build(); + if (airbyteDatabaseVersion.isPresent() && AirbyteVersion.isCompatible(airbyteVersion, airbyteDatabaseVersion.get())) { LOGGER.info("Starting server..."); runFlywayMigration(configs, configDatabase, jobDatabase); configPersistence.loadData(seed); + // todo (lmossman) - this will only exist temporarily to ensure all definitions contain specs. It + // will be removed after the faux major version bump + migrateAllDefinitionsToContainSpec( + configRepository, + cachingSchedulerClient, + trackingClient, + configs.getWorkerEnvironment(), + configs.getLogConfigs()); + return apiFactory.create( schedulerJobClient, cachingSchedulerClient, @@ -243,14 +266,123 @@ public static ServerRunnable getServer(final ServerFactory apiFactory, final Con seed, configDatabase, jobDatabase, - configs, - trackingClient); + trackingClient, + configs.getWorkerEnvironment(), + configs.getLogConfigs(), + configs.getWebappUrl(), + configs.getAirbyteVersion(), + configs.getWorkspaceRoot(), + httpClient); } else { LOGGER.info("Start serving version mismatch errors. Automatic migration either failed or didn't run"); return new VersionMismatchServer(airbyteVersion, airbyteDatabaseVersion.orElseThrow(), PORT); } } + /** + * Check that each spec in the database has a spec. If it doesn't, add it. If it can't be added, + * track the failure in Segment. The goal is to try to end up in a state where all definitions in + * the db contain specs, and to understand what is stopping us from getting there. + * + * @param configRepository - access to the db + * @param schedulerClient - scheduler client so that specs can be fetched as needed + * @param trackingClient + * @param workerEnvironment + * @param logConfigs + */ + @VisibleForTesting + static void migrateAllDefinitionsToContainSpec(final ConfigRepository configRepository, + final SynchronousSchedulerClient schedulerClient, + final TrackingClient trackingClient, + final WorkerEnvironment workerEnvironment, + final LogConfigs logConfigs) + throws JsonValidationException, IOException { + final JobConverter jobConverter = new JobConverter(workerEnvironment, logConfigs); + for (final StandardSourceDefinition sourceDef : configRepository.listStandardSourceDefinitions()) { + try { + if (sourceDef.getSpec() == null) { + LOGGER.info( + "migrateAllDefinitionsToContainSpec - Source Definition {} does not have a spec. Attempting to retrieve spec...", + sourceDef.getName()); + final SynchronousResponse getSpecJob = schedulerClient + .createGetSpecJob(sourceDef.getDockerRepository() + ":" + sourceDef.getDockerImageTag()); + if (getSpecJob.isSuccess()) { + LOGGER.info( + "migrateAllDefinitionsToContainSpec - Spec for Source Definition {} was successfully retrieved. Writing to the db...", + sourceDef.getName()); + final StandardSourceDefinition updatedDef = Jsons.clone(sourceDef).withSpec(getSpecJob.getOutput()); + configRepository.writeStandardSourceDefinition(updatedDef); + LOGGER.info( + "migrateAllDefinitionsToContainSpec - Spec for Source Definition {} was successfully written to the db record.", + sourceDef.getName()); + } else { + final LogRead logRead = jobConverter.getLogRead(getSpecJob.getMetadata().getLogPath()); + LOGGER.info( + "migrateAllDefinitionsToContainSpec - Failed to retrieve spec for Source Definition {}. Logs: {}", + sourceDef.getName(), + logRead.toString()); + throw new RuntimeException(String.format( + "Failed to retrieve spec for Source Definition %s. Logs: %s", + sourceDef.getName(), + logRead.toString())); + } + } + } catch (final Exception e) { + trackSpecBackfillFailure(trackingClient, configRepository, sourceDef.getDockerRepository(), sourceDef.getDockerImageTag(), e); + } + } + + for (final StandardDestinationDefinition destDef : configRepository.listStandardDestinationDefinitions()) { + try { + if (destDef.getSpec() == null) { + LOGGER.info( + "migrateAllDefinitionsToContainSpec - Destination Definition {} does not have a spec. Attempting to retrieve spec...", + destDef.getName()); + final SynchronousResponse getSpecJob = schedulerClient + .createGetSpecJob(destDef.getDockerRepository() + ":" + destDef.getDockerImageTag()); + if (getSpecJob.isSuccess()) { + LOGGER.info( + "migrateAllDefinitionsToContainSpec - Spec for Destination Definition {} was successfully retrieved. Writing to the db...", + destDef.getName()); + final StandardDestinationDefinition updatedDef = Jsons.clone(destDef).withSpec(getSpecJob.getOutput()); + configRepository.writeStandardDestinationDefinition(updatedDef); + LOGGER.info( + "migrateAllDefinitionsToContainSpec - Spec for Destination Definition {} was successfully written to the db record.", + destDef.getName()); + } else { + final LogRead logRead = jobConverter.getLogRead(getSpecJob.getMetadata().getLogPath()); + LOGGER.info( + "migrateAllDefinitionsToContainSpec - Failed to retrieve spec for Destination Definition {}. Logs: {}", + destDef.getName(), + logRead.toString()); + throw new RuntimeException(String.format( + "Failed to retrieve spec for Destination Definition %s. Logs: %s", + destDef.getName(), + logRead.toString())); + } + } + } catch (final Exception e) { + trackSpecBackfillFailure(trackingClient, configRepository, destDef.getDockerRepository(), destDef.getDockerImageTag(), e); + } + } + } + + private static void trackSpecBackfillFailure(final TrackingClient trackingClient, + final ConfigRepository configRepository, + final String dockerRepo, + final String dockerImageTag, + final Exception exception) + throws JsonValidationException, IOException { + // There is guaranteed to be at least one workspace, because the getServer() function enforces that + final UUID workspaceId = configRepository.listStandardWorkspaces(true).get(0).getWorkspaceId(); + + final ImmutableMap metadata = ImmutableMap.of( + "docker_image_name", dockerRepo, + "docker_image_tag", dockerImageTag, + "exception", exception); + trackingClient.track(workspaceId, "failed_spec_backfill", metadata); + } + @Deprecated @SuppressWarnings({"DeprecatedIsStillUsed"}) private static Optional runFileMigration(final AirbyteVersion airbyteVersion, diff --git a/airbyte-server/src/main/java/io/airbyte/server/ServerFactory.java b/airbyte-server/src/main/java/io/airbyte/server/ServerFactory.java index d58f0fce32d81..1bf5528b9809e 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/ServerFactory.java +++ b/airbyte-server/src/main/java/io/airbyte/server/ServerFactory.java @@ -6,7 +6,9 @@ import io.airbyte.analytics.TrackingClient; import io.airbyte.commons.io.FileTtlManager; -import io.airbyte.config.Configs; +import io.airbyte.commons.version.AirbyteVersion; +import io.airbyte.config.Configs.WorkerEnvironment; +import io.airbyte.config.helpers.LogConfigs; import io.airbyte.config.persistence.ConfigPersistence; import io.airbyte.config.persistence.ConfigRepository; import io.airbyte.db.Database; @@ -15,6 +17,8 @@ import io.airbyte.scheduler.persistence.JobPersistence; import io.airbyte.server.apis.ConfigurationApi; import io.temporal.serviceclient.WorkflowServiceStubs; +import java.net.http.HttpClient; +import java.nio.file.Path; import java.util.Set; import java.util.concurrent.TimeUnit; import org.slf4j.MDC; @@ -29,8 +33,13 @@ ServerRunnable create(SchedulerJobClient schedulerJobClient, ConfigPersistence seed, Database configsDatabase, Database jobsDatabase, - Configs configs, - TrackingClient trackingClient); + TrackingClient trackingClient, + WorkerEnvironment workerEnvironment, + LogConfigs logConfigs, + String webappUrl, + AirbyteVersion airbyteVersion, + Path workspaceRoot, + HttpClient httpClient); class Api implements ServerFactory { @@ -43,8 +52,13 @@ public ServerRunnable create(final SchedulerJobClient schedulerJobClient, final ConfigPersistence seed, final Database configsDatabase, final Database jobsDatabase, - final Configs configs, - final TrackingClient trackingClient) { + final TrackingClient trackingClient, + final WorkerEnvironment workerEnvironment, + final LogConfigs logConfigs, + final String webappUrl, + final AirbyteVersion airbyteVersion, + final Path workspaceRoot, + final HttpClient httpClient) { // set static values for factory ConfigurationApiFactory.setValues( temporalService, @@ -53,19 +67,24 @@ public ServerRunnable create(final SchedulerJobClient schedulerJobClient, seed, schedulerJobClient, cachingSchedulerClient, - configs, new FileTtlManager(10, TimeUnit.MINUTES, 10), MDC.getCopyOfContextMap(), configsDatabase, jobsDatabase, - trackingClient); + trackingClient, + workerEnvironment, + logConfigs, + webappUrl, + airbyteVersion, + workspaceRoot, + httpClient); // server configurations final Set> componentClasses = Set.of(ConfigurationApi.class); final Set components = Set.of(new CorsFilter(), new ConfigurationApiBinder()); // construct server - return new ServerApp(configs.getAirbyteVersion(), componentClasses, components); + return new ServerApp(airbyteVersion, componentClasses, components); } } diff --git a/airbyte-server/src/main/java/io/airbyte/server/apis/ConfigurationApi.java b/airbyte-server/src/main/java/io/airbyte/server/apis/ConfigurationApi.java index 259bbcf2de559..2820c89b77040 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/apis/ConfigurationApi.java +++ b/airbyte-server/src/main/java/io/airbyte/server/apis/ConfigurationApi.java @@ -81,7 +81,9 @@ import io.airbyte.api.model.WorkspaceReadList; import io.airbyte.api.model.WorkspaceUpdate; import io.airbyte.commons.io.FileTtlManager; -import io.airbyte.config.Configs; +import io.airbyte.commons.version.AirbyteVersion; +import io.airbyte.config.Configs.WorkerEnvironment; +import io.airbyte.config.helpers.LogConfigs; import io.airbyte.config.persistence.ConfigNotFoundException; import io.airbyte.config.persistence.ConfigPersistence; import io.airbyte.config.persistence.ConfigRepository; @@ -118,6 +120,8 @@ import io.temporal.serviceclient.WorkflowServiceStubs; import java.io.File; import java.io.IOException; +import java.net.http.HttpClient; +import java.nio.file.Path; import java.util.Map; @javax.ws.rs.Path("/v1") @@ -141,23 +145,34 @@ public class ConfigurationApi implements io.airbyte.api.V1Api { private final OpenApiConfigHandler openApiConfigHandler; private final DbMigrationHandler dbMigrationHandler; private final OAuthHandler oAuthHandler; - private final Configs configs; + private final WorkerEnvironment workerEnvironment; + private final LogConfigs logConfigs; + private final Path workspaceRoot; public ConfigurationApi(final ConfigRepository configRepository, final JobPersistence jobPersistence, final ConfigPersistence seed, final SchedulerJobClient schedulerJobClient, final CachingSynchronousSchedulerClient synchronousSchedulerClient, - final Configs configs, final FileTtlManager archiveTtlManager, final WorkflowServiceStubs temporalService, final Database configsDatabase, final Database jobsDatabase, - final TrackingClient trackingClient) { + final TrackingClient trackingClient, + final WorkerEnvironment workerEnvironment, + final LogConfigs logConfigs, + final String webappUrl, + final AirbyteVersion airbyteVersion, + final Path workspaceRoot, + HttpClient httpClient) { + this.workerEnvironment = workerEnvironment; + this.logConfigs = logConfigs; + this.workspaceRoot = workspaceRoot; + final SpecFetcher specFetcher = new SpecFetcher(synchronousSchedulerClient); final JsonSchemaValidator schemaValidator = new JsonSchemaValidator(); final JobNotifier jobNotifier = new JobNotifier( - configs.getWebappUrl(), + webappUrl, configRepository, new WorkspaceHelper(configRepository, jobPersistence), trackingClient); @@ -168,7 +183,7 @@ public ConfigurationApi(final ConfigRepository configRepository, jobPersistence, jobNotifier, temporalService, - new OAuthConfigSupplier(configRepository, false, trackingClient)); + new OAuthConfigSupplier(configRepository, false, trackingClient), workerEnvironment, logConfigs); final WorkspaceHelper workspaceHelper = new WorkspaceHelper(configRepository, jobPersistence); sourceDefinitionsHandler = new SourceDefinitionsHandler(configRepository, synchronousSchedulerClient); connectionsHandler = new ConnectionsHandler(configRepository, workspaceHelper, trackingClient); @@ -177,8 +192,8 @@ public ConfigurationApi(final ConfigRepository configRepository, destinationHandler = new DestinationHandler(configRepository, schemaValidator, specFetcher, connectionsHandler); sourceHandler = new SourceHandler(configRepository, schemaValidator, specFetcher, connectionsHandler); workspacesHandler = new WorkspacesHandler(configRepository, connectionsHandler, destinationHandler, sourceHandler); - jobHistoryHandler = new JobHistoryHandler(jobPersistence); - oAuthHandler = new OAuthHandler(configRepository, trackingClient); + jobHistoryHandler = new JobHistoryHandler(jobPersistence, workerEnvironment, logConfigs); + oAuthHandler = new OAuthHandler(configRepository, httpClient, trackingClient); webBackendConnectionsHandler = new WebBackendConnectionsHandler( connectionsHandler, sourceHandler, @@ -190,7 +205,7 @@ public ConfigurationApi(final ConfigRepository configRepository, webBackendDestinationsHandler = new WebBackendDestinationsHandler(destinationHandler, configRepository, trackingClient); healthCheckHandler = new HealthCheckHandler(configRepository); archiveHandler = new ArchiveHandler( - configs.getAirbyteVersion(), + airbyteVersion, configRepository, jobPersistence, seed, @@ -201,7 +216,6 @@ public ConfigurationApi(final ConfigRepository configRepository, logsHandler = new LogsHandler(); openApiConfigHandler = new OpenApiConfigHandler(); dbMigrationHandler = new DbMigrationHandler(configsDatabase, jobsDatabase); - this.configs = configs; } // WORKSPACE @@ -578,7 +592,7 @@ public JobInfoRead getJobInfo(final JobIdRequestBody jobIdRequestBody) { @Override public File getLogs(final LogsRequestBody logsRequestBody) { - return execute(() -> logsHandler.getLogs(configs, logsRequestBody)); + return execute(() -> logsHandler.getLogs(workspaceRoot, workerEnvironment, logConfigs, logsRequestBody)); } @Override diff --git a/airbyte-server/src/main/java/io/airbyte/server/converters/JobConverter.java b/airbyte-server/src/main/java/io/airbyte/server/converters/JobConverter.java index 3202edf11b580..9841a41d16eb8 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/converters/JobConverter.java +++ b/airbyte-server/src/main/java/io/airbyte/server/converters/JobConverter.java @@ -15,11 +15,12 @@ import io.airbyte.api.model.LogRead; import io.airbyte.api.model.SynchronousJobRead; import io.airbyte.commons.enums.Enums; -import io.airbyte.config.EnvConfigs; +import io.airbyte.config.Configs.WorkerEnvironment; import io.airbyte.config.JobOutput; import io.airbyte.config.StandardSyncOutput; import io.airbyte.config.StandardSyncSummary; import io.airbyte.config.helpers.LogClientSingleton; +import io.airbyte.config.helpers.LogConfigs; import io.airbyte.scheduler.client.SynchronousJobMetadata; import io.airbyte.scheduler.client.SynchronousResponse; import io.airbyte.scheduler.models.Attempt; @@ -32,10 +33,18 @@ public class JobConverter { private static final int LOG_TAIL_SIZE = 1000000; - public static JobInfoRead getJobInfoRead(final Job job) { + private final WorkerEnvironment workerEnvironment; + private final LogConfigs logConfigs; + + public JobConverter(final WorkerEnvironment workerEnvironment, final LogConfigs logConfigs) { + this.workerEnvironment = workerEnvironment; + this.logConfigs = logConfigs; + } + + public JobInfoRead getJobInfoRead(final Job job) { return new JobInfoRead() .job(getJobWithAttemptsRead(job).getJob()) - .attempts(job.getAttempts().stream().map(JobConverter::getAttemptInfoRead).collect(Collectors.toList())); + .attempts(job.getAttempts().stream().map(attempt -> getAttemptInfoRead(attempt)).collect(Collectors.toList())); } public static JobWithAttemptsRead getJobWithAttemptsRead(final Job job) { @@ -50,10 +59,10 @@ public static JobWithAttemptsRead getJobWithAttemptsRead(final Job job) { .createdAt(job.getCreatedAtInSecond()) .updatedAt(job.getUpdatedAtInSecond()) .status(Enums.convertTo(job.getStatus(), JobStatus.class))) - .attempts(job.getAttempts().stream().map(JobConverter::getAttemptRead).collect(Collectors.toList())); + .attempts(job.getAttempts().stream().map(attempt -> getAttemptRead(attempt)).collect(Collectors.toList())); } - public static AttemptInfoRead getAttemptInfoRead(final Attempt attempt) { + public AttemptInfoRead getAttemptInfoRead(final Attempt attempt) { return new AttemptInfoRead() .attempt(getAttemptRead(attempt)) .logs(getLogRead(attempt.getLogPath())); @@ -78,20 +87,19 @@ public static AttemptRead getAttemptRead(final Attempt attempt) { .endedAt(attempt.getEndedAtInSecond().orElse(null)); } - public static LogRead getLogRead(final Path logPath) { + public LogRead getLogRead(final Path logPath) { try { - final var logs = LogClientSingleton.getJobLogFile(new EnvConfigs(), logPath); - return new LogRead().logLines(logs); + return new LogRead().logLines(LogClientSingleton.getInstance().getJobLogFile(workerEnvironment, logConfigs, logPath)); } catch (final IOException e) { throw new RuntimeException(e); } } - public static SynchronousJobRead getSynchronousJobRead(final SynchronousResponse response) { + public SynchronousJobRead getSynchronousJobRead(final SynchronousResponse response) { return getSynchronousJobRead(response.getMetadata()); } - public static SynchronousJobRead getSynchronousJobRead(final SynchronousJobMetadata metadata) { + public SynchronousJobRead getSynchronousJobRead(final SynchronousJobMetadata metadata) { final JobConfigType configType = Enums.convertTo(metadata.getConfigType(), JobConfigType.class); return new SynchronousJobRead() @@ -101,7 +109,7 @@ public static SynchronousJobRead getSynchronousJobRead(final SynchronousJobMetad .createdAt(metadata.getCreatedAt()) .endedAt(metadata.getEndedAt()) .succeeded(metadata.isSucceeded()) - .logs(JobConverter.getLogRead(metadata.getLogPath())); + .logs(getLogRead(metadata.getLogPath())); } } diff --git a/airbyte-server/src/main/java/io/airbyte/server/handlers/JobHistoryHandler.java b/airbyte-server/src/main/java/io/airbyte/server/handlers/JobHistoryHandler.java index e1b6128f52240..0282c79490391 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/handlers/JobHistoryHandler.java +++ b/airbyte-server/src/main/java/io/airbyte/server/handlers/JobHistoryHandler.java @@ -11,8 +11,10 @@ import io.airbyte.api.model.JobReadList; import io.airbyte.api.model.JobWithAttemptsRead; import io.airbyte.commons.enums.Enums; +import io.airbyte.config.Configs.WorkerEnvironment; import io.airbyte.config.JobConfig; import io.airbyte.config.JobConfig.ConfigType; +import io.airbyte.config.helpers.LogConfigs; import io.airbyte.scheduler.models.Job; import io.airbyte.scheduler.persistence.JobPersistence; import io.airbyte.server.converters.JobConverter; @@ -25,8 +27,10 @@ public class JobHistoryHandler { public static final int DEFAULT_PAGE_SIZE = 200; private final JobPersistence jobPersistence; + private final JobConverter jobConverter; - public JobHistoryHandler(final JobPersistence jobPersistence) { + public JobHistoryHandler(final JobPersistence jobPersistence, final WorkerEnvironment workerEnvironment, final LogConfigs logConfigs) { + jobConverter = new JobConverter(workerEnvironment, logConfigs); this.jobPersistence = jobPersistence; } @@ -47,15 +51,14 @@ public JobReadList listJobsFor(final JobListRequestBody request) throws IOExcept : DEFAULT_PAGE_SIZE, (request.getPagination() != null && request.getPagination().getRowOffset() != null) ? request.getPagination().getRowOffset() : 0) .stream() - .map(JobConverter::getJobWithAttemptsRead) + .map(attempt -> jobConverter.getJobWithAttemptsRead(attempt)) .collect(Collectors.toList()); return new JobReadList().jobs(jobReads); } public JobInfoRead getJobInfo(final JobIdRequestBody jobIdRequestBody) throws IOException { final Job job = jobPersistence.getJob(jobIdRequestBody.getId()); - - return JobConverter.getJobInfoRead(job); + return jobConverter.getJobInfoRead(job); } } diff --git a/airbyte-server/src/main/java/io/airbyte/server/handlers/LogsHandler.java b/airbyte-server/src/main/java/io/airbyte/server/handlers/LogsHandler.java index 57fe0650751ae..5d7a86a2cdd71 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/handlers/LogsHandler.java +++ b/airbyte-server/src/main/java/io/airbyte/server/handlers/LogsHandler.java @@ -5,9 +5,11 @@ package io.airbyte.server.handlers; import io.airbyte.api.model.LogsRequestBody; -import io.airbyte.config.Configs; +import io.airbyte.config.Configs.WorkerEnvironment; import io.airbyte.config.helpers.LogClientSingleton; +import io.airbyte.config.helpers.LogConfigs; import java.io.File; +import java.nio.file.Path; /** * This handler is only responsible for server and scheduler logs. Jobs logs paths are determined by @@ -15,13 +17,16 @@ */ public class LogsHandler { - public File getLogs(final Configs configs, final LogsRequestBody logsRequestBody) { + public File getLogs(final Path workspaceRoot, + final WorkerEnvironment workerEnvironment, + final LogConfigs logConfigs, + final LogsRequestBody logsRequestBody) { switch (logsRequestBody.getLogType()) { case SERVER -> { - return LogClientSingleton.getServerLogFile(configs); + return LogClientSingleton.getInstance().getServerLogFile(workspaceRoot, workerEnvironment, logConfigs); } case SCHEDULER -> { - return LogClientSingleton.getSchedulerLogFile(configs); + return LogClientSingleton.getInstance().getSchedulerLogFile(workspaceRoot, workerEnvironment, logConfigs); } default -> throw new IllegalStateException("Unexpected value: " + logsRequestBody.getLogType()); } diff --git a/airbyte-server/src/main/java/io/airbyte/server/handlers/OAuthHandler.java b/airbyte-server/src/main/java/io/airbyte/server/handlers/OAuthHandler.java index 7fa5565bf7765..2d8e0a4b2b54d 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/handlers/OAuthHandler.java +++ b/airbyte-server/src/main/java/io/airbyte/server/handlers/OAuthHandler.java @@ -25,6 +25,7 @@ import io.airbyte.scheduler.persistence.job_tracker.TrackingMetadata; import io.airbyte.validation.json.JsonValidationException; import java.io.IOException; +import java.net.http.HttpClient; import java.util.Map; import java.util.UUID; import org.slf4j.Logger; @@ -38,9 +39,9 @@ public class OAuthHandler { private final OAuthImplementationFactory oAuthImplementationFactory; private final TrackingClient trackingClient; - public OAuthHandler(final ConfigRepository configRepository, final TrackingClient trackingClient) { + public OAuthHandler(final ConfigRepository configRepository, final HttpClient httpClient, final TrackingClient trackingClient) { this.configRepository = configRepository; - this.oAuthImplementationFactory = new OAuthImplementationFactory(configRepository); + this.oAuthImplementationFactory = new OAuthImplementationFactory(configRepository, httpClient); this.trackingClient = trackingClient; } diff --git a/airbyte-server/src/main/java/io/airbyte/server/handlers/SchedulerHandler.java b/airbyte-server/src/main/java/io/airbyte/server/handlers/SchedulerHandler.java index 2a0a0a8cb468e..378a793282f22 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/handlers/SchedulerHandler.java +++ b/airbyte-server/src/main/java/io/airbyte/server/handlers/SchedulerHandler.java @@ -28,6 +28,7 @@ import io.airbyte.api.model.SourceUpdate; import io.airbyte.commons.docker.DockerUtils; import io.airbyte.commons.enums.Enums; +import io.airbyte.config.Configs.WorkerEnvironment; import io.airbyte.config.DestinationConnection; import io.airbyte.config.SourceConnection; import io.airbyte.config.StandardCheckConnectionOutput; @@ -36,6 +37,7 @@ import io.airbyte.config.StandardSync; import io.airbyte.config.StandardSyncOperation; import io.airbyte.config.State; +import io.airbyte.config.helpers.LogConfigs; import io.airbyte.config.persistence.ConfigNotFoundException; import io.airbyte.config.persistence.ConfigRepository; import io.airbyte.protocol.models.AirbyteCatalog; @@ -79,6 +81,9 @@ public class SchedulerHandler { private final JobNotifier jobNotifier; private final WorkflowServiceStubs temporalService; private final OAuthConfigSupplier oAuthConfigSupplier; + private final JobConverter jobConverter; + private final WorkerEnvironment workerEnvironment; + private final LogConfigs logConfigs; public SchedulerHandler(final ConfigRepository configRepository, final SchedulerJobClient schedulerJobClient, @@ -86,7 +91,9 @@ public SchedulerHandler(final ConfigRepository configRepository, final JobPersistence jobPersistence, final JobNotifier jobNotifier, final WorkflowServiceStubs temporalService, - final OAuthConfigSupplier oAuthConfigSupplier) { + final OAuthConfigSupplier oAuthConfigSupplier, + final WorkerEnvironment workerEnvironment, + final LogConfigs logConfigs) { this( configRepository, schedulerJobClient, @@ -97,7 +104,9 @@ public SchedulerHandler(final ConfigRepository configRepository, jobPersistence, jobNotifier, temporalService, - oAuthConfigSupplier); + oAuthConfigSupplier, + workerEnvironment, + logConfigs); } @VisibleForTesting @@ -110,7 +119,9 @@ public SchedulerHandler(final ConfigRepository configRepository, final JobPersistence jobPersistence, final JobNotifier jobNotifier, final WorkflowServiceStubs temporalService, - final OAuthConfigSupplier oAuthConfigSupplier) { + final OAuthConfigSupplier oAuthConfigSupplier, + final WorkerEnvironment workerEnvironment, + final LogConfigs logConfigs) { this.configRepository = configRepository; this.schedulerJobClient = schedulerJobClient; this.synchronousSchedulerClient = synchronousSchedulerClient; @@ -121,6 +132,9 @@ public SchedulerHandler(final ConfigRepository configRepository, this.jobNotifier = jobNotifier; this.temporalService = temporalService; this.oAuthConfigSupplier = oAuthConfigSupplier; + this.workerEnvironment = workerEnvironment; + this.logConfigs = logConfigs; + this.jobConverter = new JobConverter(workerEnvironment, logConfigs); } public CheckConnectionRead checkSourceConnectionFromSourceId(final SourceIdRequestBody sourceIdRequestBody) @@ -226,9 +240,9 @@ public SourceDiscoverSchemaRead discoverSchemaForSourceFromSourceCreate(final So return discoverJobToOutput(response); } - private static SourceDiscoverSchemaRead discoverJobToOutput(final SynchronousResponse response) { + private SourceDiscoverSchemaRead discoverJobToOutput(final SynchronousResponse response) { final SourceDiscoverSchemaRead sourceDiscoverSchemaRead = new SourceDiscoverSchemaRead() - .jobInfo(JobConverter.getSynchronousJobRead(response)); + .jobInfo(jobConverter.getSynchronousJobRead(response)); if (response.isSuccess()) { sourceDiscoverSchemaRead.catalog(CatalogConverter.toApi(response.getOutput())); @@ -244,7 +258,7 @@ public SourceDefinitionSpecificationRead getSourceDefinitionSpecification(final final SynchronousResponse response = specFetcher.getSpecJobResponse(source); final ConnectorSpecification spec = response.getOutput(); final SourceDefinitionSpecificationRead specRead = new SourceDefinitionSpecificationRead() - .jobInfo(JobConverter.getSynchronousJobRead(response)) + .jobInfo(jobConverter.getSynchronousJobRead(response)) .connectionSpecification(spec.getConnectionSpecification()) .documentationUrl(spec.getDocumentationUrl().toString()) .sourceDefinitionId(sourceDefinitionId); @@ -257,7 +271,8 @@ public SourceDefinitionSpecificationRead getSourceDefinitionSpecification(final return specRead; } - public DestinationDefinitionSpecificationRead getDestinationSpecification(final DestinationDefinitionIdRequestBody destinationDefinitionIdRequestBody) + public DestinationDefinitionSpecificationRead getDestinationSpecification( + final DestinationDefinitionIdRequestBody destinationDefinitionIdRequestBody) throws ConfigNotFoundException, IOException, JsonValidationException { final UUID destinationDefinitionId = destinationDefinitionIdRequestBody.getDestinationDefinitionId(); final StandardDestinationDefinition destination = configRepository.getStandardDestinationDefinition(destinationDefinitionId); @@ -265,7 +280,7 @@ public DestinationDefinitionSpecificationRead getDestinationSpecification(final final ConnectorSpecification spec = response.getOutput(); final DestinationDefinitionSpecificationRead specRead = new DestinationDefinitionSpecificationRead() - .jobInfo(JobConverter.getSynchronousJobRead(response)) + .jobInfo(jobConverter.getSynchronousJobRead(response)) .supportedDestinationSyncModes(Enums.convertListTo(spec.getSupportedDestinationSyncModes(), DestinationSyncMode.class)) .connectionSpecification(spec.getConnectionSpecification()) .documentationUrl(spec.getDocumentationUrl().toString()) @@ -320,7 +335,7 @@ public JobInfoRead syncConnection(final ConnectionIdRequestBody connectionIdRequ destinationImageName, standardSyncOperations); - return JobConverter.getJobInfoRead(job); + return jobConverter.getJobInfoRead(job); } public JobInfoRead resetConnection(final ConnectionIdRequestBody connectionIdRequestBody) @@ -341,7 +356,7 @@ public JobInfoRead resetConnection(final ConnectionIdRequestBody connectionIdReq final Job job = schedulerJobClient.createOrGetActiveResetConnectionJob(destination, standardSync, destinationImageName, standardSyncOperations); - return JobConverter.getJobInfoRead(job); + return jobConverter.getJobInfoRead(job); } public ConnectionState getState(final ConnectionIdRequestBody connectionIdRequestBody) throws IOException { @@ -366,7 +381,7 @@ public JobInfoRead cancelJob(final JobIdRequestBody jobIdRequestBody) throws IOE final Job job = jobPersistence.getJob(jobId); jobNotifier.failJob("job was cancelled", job); - return JobConverter.getJobInfoRead(job); + return jobConverter.getJobInfoRead(job); } private void cancelTemporalWorkflowIfPresent(final long jobId) throws IOException { @@ -390,7 +405,7 @@ private void cancelTemporalWorkflowIfPresent(final long jobId) throws IOExceptio private CheckConnectionRead reportConnectionStatus(final SynchronousResponse response) { final CheckConnectionRead checkConnectionRead = new CheckConnectionRead() - .jobInfo(JobConverter.getSynchronousJobRead(response)); + .jobInfo(jobConverter.getSynchronousJobRead(response)); if (response.isSuccess()) { checkConnectionRead diff --git a/airbyte-server/src/test/java/io/airbyte/server/BackfillSpecTest.java b/airbyte-server/src/test/java/io/airbyte/server/BackfillSpecTest.java new file mode 100644 index 0000000000000..541eb3c4a6d31 --- /dev/null +++ b/airbyte-server/src/test/java/io/airbyte/server/BackfillSpecTest.java @@ -0,0 +1,159 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.server; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyMap; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import io.airbyte.analytics.TrackingClient; +import io.airbyte.commons.json.Jsons; +import io.airbyte.config.Configs.WorkerEnvironment; +import io.airbyte.config.JobConfig.ConfigType; +import io.airbyte.config.StandardDestinationDefinition; +import io.airbyte.config.StandardSourceDefinition; +import io.airbyte.config.StandardWorkspace; +import io.airbyte.config.helpers.LogConfigs; +import io.airbyte.config.persistence.ConfigRepository; +import io.airbyte.protocol.models.ConnectorSpecification; +import io.airbyte.scheduler.client.SynchronousJobMetadata; +import io.airbyte.scheduler.client.SynchronousResponse; +import io.airbyte.scheduler.client.SynchronousSchedulerClient; +import io.airbyte.validation.json.JsonValidationException; +import java.io.IOException; +import java.net.URI; +import java.nio.file.Path; +import java.time.Instant; +import java.util.List; +import java.util.UUID; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +class BackfillSpecTest { + + private static final String SOURCE_DOCKER_REPO = "docker-repo/source"; + private static final String DEST_DOCKER_REPO = "docker-repo/destination"; + private static final String DOCKER_IMAGE_TAG = "tag"; + private static final String FAILED_SPEC_BACKFILL_ACTION = "failed_spec_backfill"; + private static final StandardWorkspace WORKSPACE = new StandardWorkspace().withWorkspaceId(UUID.randomUUID()); + + private ConfigRepository configRepository; + private TrackingClient trackingClient; + private SynchronousSchedulerClient schedulerClient; + + @BeforeEach + void setup() throws IOException, JsonValidationException { + configRepository = mock(ConfigRepository.class); + when(configRepository.listStandardWorkspaces(true)).thenReturn(List.of(WORKSPACE)); + + trackingClient = mock(TrackingClient.class); + schedulerClient = mock(SynchronousSchedulerClient.class); + } + + @Test + public void testBackfillSpecSuccessful() throws JsonValidationException, IOException { + final StandardSourceDefinition sourceDef = new StandardSourceDefinition().withDockerRepository(SOURCE_DOCKER_REPO) + .withDockerImageTag(DOCKER_IMAGE_TAG); + final StandardDestinationDefinition destDef = new StandardDestinationDefinition().withDockerRepository(DEST_DOCKER_REPO) + .withDockerImageTag(DOCKER_IMAGE_TAG); + + when(configRepository.listStandardSourceDefinitions()).thenReturn(List.of(sourceDef)); + when(configRepository.listStandardDestinationDefinitions()).thenReturn(List.of(destDef)); + + final ConnectorSpecification sourceSpec = new ConnectorSpecification().withDocumentationUrl(URI.create("http://source.org")); + final ConnectorSpecification destSpec = new ConnectorSpecification().withDocumentationUrl(URI.create("http://dest.org")); + + final SynchronousResponse successfulSourceResponse = new SynchronousResponse<>( + sourceSpec, + mockJobMetadata(true)); + final SynchronousResponse successfulDestResponse = new SynchronousResponse<>( + destSpec, + mockJobMetadata(true)); + + final SynchronousSchedulerClient schedulerClient = mock(SynchronousSchedulerClient.class); + when(schedulerClient.createGetSpecJob(SOURCE_DOCKER_REPO + ":" + DOCKER_IMAGE_TAG)).thenReturn(successfulSourceResponse); + when(schedulerClient.createGetSpecJob(DEST_DOCKER_REPO + ":" + DOCKER_IMAGE_TAG)).thenReturn(successfulDestResponse); + + ServerApp.migrateAllDefinitionsToContainSpec(configRepository, schedulerClient, trackingClient, WorkerEnvironment.DOCKER, mock(LogConfigs.class)); + + final StandardSourceDefinition expectedSourceDef = Jsons.clone(sourceDef).withSpec(sourceSpec); + final StandardDestinationDefinition expectedDestDef = Jsons.clone(destDef).withSpec(destSpec); + verify(configRepository, times(1)).writeStandardSourceDefinition(expectedSourceDef); + verify(configRepository, times(1)).writeStandardDestinationDefinition(expectedDestDef); + } + + @Test + public void testBackfillSpecFailure() throws JsonValidationException, IOException { + final StandardSourceDefinition sourceDef = new StandardSourceDefinition().withDockerRepository(SOURCE_DOCKER_REPO) + .withDockerImageTag(DOCKER_IMAGE_TAG); + final StandardDestinationDefinition destDef = new StandardDestinationDefinition().withDockerRepository(DEST_DOCKER_REPO) + .withDockerImageTag(DOCKER_IMAGE_TAG); + + when(configRepository.listStandardSourceDefinitions()).thenReturn(List.of(sourceDef)); + when(configRepository.listStandardDestinationDefinitions()).thenReturn(List.of(destDef)); + + final ConnectorSpecification sourceSpec = new ConnectorSpecification().withDocumentationUrl(URI.create("http://source.org")); + final ConnectorSpecification destSpec = new ConnectorSpecification().withDocumentationUrl(URI.create("http://dest.org")); + + final SynchronousResponse failureSourceResponse = new SynchronousResponse<>( + sourceSpec, + mockJobMetadata(false)); + final SynchronousResponse failureDestResponse = new SynchronousResponse<>( + destSpec, + mockJobMetadata(false)); + + when(schedulerClient.createGetSpecJob(SOURCE_DOCKER_REPO + ":" + DOCKER_IMAGE_TAG)).thenReturn(failureSourceResponse); + when(schedulerClient.createGetSpecJob(DEST_DOCKER_REPO + ":" + DOCKER_IMAGE_TAG)).thenReturn(failureDestResponse); + + ServerApp.migrateAllDefinitionsToContainSpec(configRepository, schedulerClient, trackingClient, WorkerEnvironment.DOCKER, mock(LogConfigs.class)); + + verify(configRepository, never()).writeStandardSourceDefinition(any()); + verify(configRepository, never()).writeStandardDestinationDefinition(any()); + + verify(trackingClient, times(2)).track(eq(WORKSPACE.getWorkspaceId()), eq(FAILED_SPEC_BACKFILL_ACTION), anyMap()); + } + + @Test + public void testSpecAlreadyExists() throws JsonValidationException, IOException { + final ConnectorSpecification sourceSpec = new ConnectorSpecification().withDocumentationUrl(URI.create("http://source.org")); + final ConnectorSpecification destSpec = new ConnectorSpecification().withDocumentationUrl(URI.create("http://dest.org")); + final StandardSourceDefinition sourceDef = new StandardSourceDefinition().withDockerRepository(SOURCE_DOCKER_REPO) + .withDockerImageTag(DOCKER_IMAGE_TAG).withSpec(sourceSpec); + final StandardDestinationDefinition destDef = new StandardDestinationDefinition().withDockerRepository(DEST_DOCKER_REPO) + .withDockerImageTag(DOCKER_IMAGE_TAG).withSpec(destSpec); + + when(configRepository.listStandardSourceDefinitions()).thenReturn(List.of(sourceDef)); + when(configRepository.listStandardDestinationDefinitions()).thenReturn(List.of(destDef)); + + ServerApp.migrateAllDefinitionsToContainSpec( + configRepository, + mock(SynchronousSchedulerClient.class), + trackingClient, + WorkerEnvironment.DOCKER, + mock(LogConfigs.class)); + + verify(schedulerClient, never()).createGetSpecJob(any()); + verify(configRepository, never()).writeStandardSourceDefinition(any()); + verify(configRepository, never()).writeStandardDestinationDefinition(any()); + } + + private SynchronousJobMetadata mockJobMetadata(final boolean succeeded) { + final long now = Instant.now().toEpochMilli(); + return new SynchronousJobMetadata( + UUID.randomUUID(), + ConfigType.GET_SPEC, + UUID.randomUUID(), + now, + now, + succeeded, + Path.of("path", "to", "logs")); + } + +} diff --git a/airbyte-server/src/test/java/io/airbyte/server/RequestLoggerTest.java b/airbyte-server/src/test/java/io/airbyte/server/RequestLoggerTest.java index 7ac21ccd94f0a..acd6109ac75b4 100644 --- a/airbyte-server/src/test/java/io/airbyte/server/RequestLoggerTest.java +++ b/airbyte-server/src/test/java/io/airbyte/server/RequestLoggerTest.java @@ -5,7 +5,9 @@ package io.airbyte.server; import io.airbyte.commons.io.IOs; +import io.airbyte.config.Configs.WorkerEnvironment; import io.airbyte.config.helpers.LogClientSingleton; +import io.airbyte.config.helpers.LogConfiguration; import java.io.ByteArrayInputStream; import java.io.IOException; import java.nio.file.Files; @@ -92,7 +94,9 @@ public void test(final String inputByteBuffer, final String contentType, final i // set up the mdc so that actually log to a file, so that we can verify that file logging captures // threads. final Path jobRoot = Files.createTempDirectory(Path.of("/tmp"), "mdc_test"); - LogClientSingleton.setJobMdc(jobRoot); + LogClientSingleton.getInstance().setJobMdc(WorkerEnvironment.DOCKER, + LogConfiguration.EMPTY, + jobRoot); // We have to instanciate the logger here, because the MDC config has been changed to log in a // temporary file. diff --git a/airbyte-server/src/test/java/io/airbyte/server/apis/ConfigurationApiTest.java b/airbyte-server/src/test/java/io/airbyte/server/apis/ConfigurationApiTest.java index cdb7b8c07d030..b71d05eda52de 100644 --- a/airbyte-server/src/test/java/io/airbyte/server/apis/ConfigurationApiTest.java +++ b/airbyte-server/src/test/java/io/airbyte/server/apis/ConfigurationApiTest.java @@ -12,6 +12,8 @@ import io.airbyte.commons.io.FileTtlManager; import io.airbyte.commons.version.AirbyteVersion; import io.airbyte.config.Configs; +import io.airbyte.config.Configs.WorkerEnvironment; +import io.airbyte.config.helpers.LogConfiguration; import io.airbyte.config.persistence.ConfigPersistence; import io.airbyte.config.persistence.ConfigRepository; import io.airbyte.db.Database; @@ -19,6 +21,8 @@ import io.airbyte.scheduler.client.SchedulerJobClient; import io.airbyte.scheduler.persistence.JobPersistence; import io.temporal.serviceclient.WorkflowServiceStubs; +import java.net.http.HttpClient; +import java.nio.file.Path; import org.junit.jupiter.api.Test; public class ConfigurationApiTest { @@ -35,12 +39,17 @@ void testImportDefinitions() { mock(ConfigPersistence.class), mock(SchedulerJobClient.class), mock(CachingSynchronousSchedulerClient.class), - configs, mock(FileTtlManager.class), mock(WorkflowServiceStubs.class), mock(Database.class), mock(Database.class), - mock(TrackingClient.class)); + mock(TrackingClient.class), + WorkerEnvironment.DOCKER, + LogConfiguration.EMPTY, + "http://localhost", + new AirbyteVersion("0.1.0-alpha"), + Path.of(""), + mock(HttpClient.class)); assertTrue(configurationApi.canImportDefinitons()); } diff --git a/airbyte-server/src/test/java/io/airbyte/server/converters/JobConverterTest.java b/airbyte-server/src/test/java/io/airbyte/server/converters/JobConverterTest.java index 619c43cec3a82..6dcd9e5b3865d 100644 --- a/airbyte-server/src/test/java/io/airbyte/server/converters/JobConverterTest.java +++ b/airbyte-server/src/test/java/io/airbyte/server/converters/JobConverterTest.java @@ -18,8 +18,10 @@ import io.airbyte.api.model.JobWithAttemptsRead; import io.airbyte.api.model.LogRead; import io.airbyte.commons.enums.Enums; +import io.airbyte.config.Configs.WorkerEnvironment; import io.airbyte.config.JobCheckConnectionConfig; import io.airbyte.config.JobConfig; +import io.airbyte.config.helpers.LogConfiguration; import io.airbyte.scheduler.models.Attempt; import io.airbyte.scheduler.models.AttemptStatus; import io.airbyte.scheduler.models.Job; @@ -45,6 +47,7 @@ class JobConverterTest { private static final Path LOG_PATH = Path.of("log_path"); private static final long CREATED_AT = System.currentTimeMillis() / 1000; + private JobConverter jobConverter; private Job job; private static final JobInfoRead JOB_INFO = @@ -71,6 +74,7 @@ class JobConverterTest { @BeforeEach public void setUp() { + jobConverter = new JobConverter(WorkerEnvironment.DOCKER, LogConfiguration.EMPTY); job = mock(Job.class); final Attempt attempt = mock(Attempt.class); when(job.getId()).thenReturn(JOB_ID); @@ -91,17 +95,17 @@ public void setUp() { @Test public void testGetJobInfoRead() { - assertEquals(JOB_INFO, JobConverter.getJobInfoRead(job)); + assertEquals(JOB_INFO, jobConverter.getJobInfoRead(job)); } @Test public void testGetJobWithAttemptsRead() { - assertEquals(JOB_WITH_ATTEMPTS_READ, JobConverter.getJobWithAttemptsRead(job)); + assertEquals(JOB_WITH_ATTEMPTS_READ, jobConverter.getJobWithAttemptsRead(job)); } @Test public void testGetJobRead() { - final JobWithAttemptsRead jobReadActual = JobConverter.getJobWithAttemptsRead(job); + final JobWithAttemptsRead jobReadActual = jobConverter.getJobWithAttemptsRead(job); assertEquals(JOB_WITH_ATTEMPTS_READ, jobReadActual); } diff --git a/airbyte-server/src/test/java/io/airbyte/server/handlers/ArchiveHandlerTest.java b/airbyte-server/src/test/java/io/airbyte/server/handlers/ArchiveHandlerTest.java index 8a91172c99936..396820fed21d3 100644 --- a/airbyte-server/src/test/java/io/airbyte/server/handlers/ArchiveHandlerTest.java +++ b/airbyte-server/src/test/java/io/airbyte/server/handlers/ArchiveHandlerTest.java @@ -27,10 +27,10 @@ import io.airbyte.config.StandardDestinationDefinition; import io.airbyte.config.StandardSourceDefinition; import io.airbyte.config.StandardWorkspace; +import io.airbyte.config.init.YamlSeedConfigPersistence; import io.airbyte.config.persistence.ConfigPersistence; import io.airbyte.config.persistence.ConfigRepository; import io.airbyte.config.persistence.DatabaseConfigPersistence; -import io.airbyte.config.persistence.YamlSeedConfigPersistence; import io.airbyte.config.persistence.split_secrets.NoOpSecretsHydrator; import io.airbyte.db.Database; import io.airbyte.db.instance.test.TestDatabaseProviders; @@ -319,13 +319,16 @@ private void assertSameConfigDump(final Map> expected, final Set expectedRecords = expected.get(stream).collect(Collectors.toSet()); final Set actualRecords = actual.get(stream).collect(Collectors.toSet()); for (final var expectedRecord : expectedRecords) { - assertTrue(actualRecords.contains(expectedRecord), - String.format("\n Expected record was not found:\n%s\n Actual records were:\n%s\n", + assertTrue( + actualRecords.contains(expectedRecord), + String.format( + "\n Expected record was not found:\n%s\n Actual records were:\n%s\n", expectedRecord, Strings.join(actualRecords, "\n"))); } assertEquals(expectedRecords.size(), actualRecords.size(), - String.format("The expected vs actual records does not match:\n expected records:\n%s\n actual records\n%s\n", + String.format( + "The expected vs actual records does not match:\n expected records:\n%s\n actual records\n%s\n", Strings.join(expectedRecords, "\n"), Strings.join(actualRecords, "\n"))); } diff --git a/airbyte-server/src/test/java/io/airbyte/server/handlers/JobHistoryHandlerTest.java b/airbyte-server/src/test/java/io/airbyte/server/handlers/JobHistoryHandlerTest.java index 44d8a102fc0e8..bb3ed756cbf1e 100644 --- a/airbyte-server/src/test/java/io/airbyte/server/handlers/JobHistoryHandlerTest.java +++ b/airbyte-server/src/test/java/io/airbyte/server/handlers/JobHistoryHandlerTest.java @@ -22,9 +22,11 @@ import io.airbyte.api.model.LogRead; import io.airbyte.api.model.Pagination; import io.airbyte.commons.enums.Enums; +import io.airbyte.config.Configs.WorkerEnvironment; import io.airbyte.config.JobCheckConnectionConfig; import io.airbyte.config.JobConfig; import io.airbyte.config.JobConfig.ConfigType; +import io.airbyte.config.helpers.LogConfiguration; import io.airbyte.scheduler.models.Attempt; import io.airbyte.scheduler.models.AttemptStatus; import io.airbyte.scheduler.models.Job; @@ -101,7 +103,7 @@ public void setUp() { CREATED_AT); jobPersistence = mock(JobPersistence.class); - jobHistoryHandler = new JobHistoryHandler(jobPersistence); + jobHistoryHandler = new JobHistoryHandler(jobPersistence, WorkerEnvironment.DOCKER, LogConfiguration.EMPTY); } @Nested diff --git a/airbyte-server/src/test/java/io/airbyte/server/handlers/LogsHandlerTest.java b/airbyte-server/src/test/java/io/airbyte/server/handlers/LogsHandlerTest.java index e197c454e0c54..425e454273ad8 100644 --- a/airbyte-server/src/test/java/io/airbyte/server/handlers/LogsHandlerTest.java +++ b/airbyte-server/src/test/java/io/airbyte/server/handlers/LogsHandlerTest.java @@ -13,6 +13,7 @@ import io.airbyte.config.Configs; import io.airbyte.config.Configs.WorkerEnvironment; import io.airbyte.config.helpers.LogClientSingleton; +import io.airbyte.config.helpers.LogConfiguration; import java.io.File; import java.nio.file.Path; import org.junit.jupiter.api.Test; @@ -24,9 +25,11 @@ public void testServerLogs() { final Configs configs = mock(Configs.class); when(configs.getWorkspaceRoot()).thenReturn(Path.of("/workspace")); when(configs.getWorkerEnvironment()).thenReturn(WorkerEnvironment.DOCKER); + when(configs.getLogConfigs()).thenReturn(LogConfiguration.EMPTY); final File expected = Path.of(String.format("/workspace/server/logs/%s", LogClientSingleton.LOG_FILENAME)).toFile(); - final File actual = new LogsHandler().getLogs(configs, new LogsRequestBody().logType(LogType.SERVER)); + final File actual = new LogsHandler().getLogs(configs.getWorkspaceRoot(), configs.getWorkerEnvironment(), + configs.getLogConfigs(), new LogsRequestBody().logType(LogType.SERVER)); assertEquals(expected, actual); } @@ -36,9 +39,11 @@ public void testSchedulerLogs() { final Configs configs = mock(Configs.class); when(configs.getWorkspaceRoot()).thenReturn(Path.of("/workspace")); when(configs.getWorkerEnvironment()).thenReturn(WorkerEnvironment.DOCKER); + when(configs.getLogConfigs()).thenReturn(LogConfiguration.EMPTY); final File expected = Path.of(String.format("/workspace/scheduler/logs/%s", LogClientSingleton.LOG_FILENAME)).toFile(); - final File actual = new LogsHandler().getLogs(configs, new LogsRequestBody().logType(LogType.SCHEDULER)); + final File actual = new LogsHandler().getLogs(configs.getWorkspaceRoot(), configs.getWorkerEnvironment(), + configs.getLogConfigs(), new LogsRequestBody().logType(LogType.SCHEDULER)); assertEquals(expected, actual); } diff --git a/airbyte-server/src/test/java/io/airbyte/server/handlers/OAuthHandlerTest.java b/airbyte-server/src/test/java/io/airbyte/server/handlers/OAuthHandlerTest.java index 3ca87acd7cb05..67c74aea83179 100644 --- a/airbyte-server/src/test/java/io/airbyte/server/handlers/OAuthHandlerTest.java +++ b/airbyte-server/src/test/java/io/airbyte/server/handlers/OAuthHandlerTest.java @@ -17,6 +17,7 @@ import io.airbyte.config.persistence.ConfigRepository; import io.airbyte.validation.json.JsonValidationException; import java.io.IOException; +import java.net.http.HttpClient; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -32,12 +33,14 @@ class OAuthHandlerTest { ConfigRepository configRepository; OAuthHandler handler; TrackingClient trackingClient; + private HttpClient httpClient; @BeforeEach public void init() { configRepository = Mockito.mock(ConfigRepository.class); trackingClient = mock(TrackingClient.class); - handler = new OAuthHandler(configRepository, trackingClient); + httpClient = Mockito.mock(HttpClient.class); + handler = new OAuthHandler(configRepository, httpClient, trackingClient); } @Test diff --git a/airbyte-server/src/test/java/io/airbyte/server/handlers/SchedulerHandlerTest.java b/airbyte-server/src/test/java/io/airbyte/server/handlers/SchedulerHandlerTest.java index ca82d1c17d380..0eabedfff9b3a 100644 --- a/airbyte-server/src/test/java/io/airbyte/server/handlers/SchedulerHandlerTest.java +++ b/airbyte-server/src/test/java/io/airbyte/server/handlers/SchedulerHandlerTest.java @@ -35,6 +35,7 @@ import io.airbyte.commons.enums.Enums; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.lang.Exceptions; +import io.airbyte.config.Configs.WorkerEnvironment; import io.airbyte.config.DestinationConnection; import io.airbyte.config.JobConfig; import io.airbyte.config.JobConfig.ConfigType; @@ -48,6 +49,7 @@ import io.airbyte.config.StandardSyncOperation; import io.airbyte.config.StandardSyncOperation.OperatorType; import io.airbyte.config.State; +import io.airbyte.config.helpers.LogConfiguration; import io.airbyte.config.persistence.ConfigNotFoundException; import io.airbyte.config.persistence.ConfigRepository; import io.airbyte.protocol.models.AirbyteCatalog; @@ -153,7 +155,9 @@ void setup() { jobPersistence, jobNotifier, mock(WorkflowServiceStubs.class), - mock(OAuthConfigSupplier.class)); + mock(OAuthConfigSupplier.class), + WorkerEnvironment.DOCKER, + LogConfiguration.EMPTY); } @Test diff --git a/airbyte-server/src/test/java/io/airbyte/server/migration/RunMigrationTest.java b/airbyte-server/src/test/java/io/airbyte/server/migration/RunMigrationTest.java index 017eab48404af..962b4509fec7e 100644 --- a/airbyte-server/src/test/java/io/airbyte/server/migration/RunMigrationTest.java +++ b/airbyte-server/src/test/java/io/airbyte/server/migration/RunMigrationTest.java @@ -27,11 +27,11 @@ import io.airbyte.config.StandardSyncOperation; import io.airbyte.config.StandardSyncOperation.OperatorType; import io.airbyte.config.StandardWorkspace; +import io.airbyte.config.init.YamlSeedConfigPersistence; import io.airbyte.config.persistence.ConfigNotFoundException; import io.airbyte.config.persistence.ConfigPersistence; import io.airbyte.config.persistence.ConfigRepository; import io.airbyte.config.persistence.DatabaseConfigPersistence; -import io.airbyte.config.persistence.YamlSeedConfigPersistence; import io.airbyte.config.persistence.split_secrets.MemorySecretPersistence; import io.airbyte.config.persistence.split_secrets.NoOpSecretsHydrator; import io.airbyte.config.persistence.split_secrets.SecretPersistence; diff --git a/airbyte-tests/src/acceptanceTests/java/io/airbyte/test/acceptance/AcceptanceTests.java b/airbyte-tests/src/acceptanceTests/java/io/airbyte/test/acceptance/AcceptanceTests.java index 24c73947f793e..8f8584934a956 100644 --- a/airbyte-tests/src/acceptanceTests/java/io/airbyte/test/acceptance/AcceptanceTests.java +++ b/airbyte-tests/src/acceptanceTests/java/io/airbyte/test/acceptance/AcceptanceTests.java @@ -1102,6 +1102,7 @@ private Map localConfig(final PostgreSQLContainer psql, final bo dbConfig.put("port", psql.getFirstMappedPort()); dbConfig.put("database", psql.getDatabaseName()); dbConfig.put("username", psql.getUsername()); + dbConfig.put("ssl", false); if (withSchema) { dbConfig.put("schema", "public"); diff --git a/airbyte-webapp/package-lock.json b/airbyte-webapp/package-lock.json index 4ec7dbde8ff68..f5419d28e80ea 100644 --- a/airbyte-webapp/package-lock.json +++ b/airbyte-webapp/package-lock.json @@ -1,6 +1,6 @@ { "name": "airbyte-webapp", - "version": "0.30.25-alpha", + "version": "0.30.34-alpha", "lockfileVersion": 1, "requires": true, "dependencies": { diff --git a/airbyte-webapp/package.json b/airbyte-webapp/package.json index 56d5644fdbfda..c581ef0e01f70 100644 --- a/airbyte-webapp/package.json +++ b/airbyte-webapp/package.json @@ -1,6 +1,6 @@ { "name": "airbyte-webapp", - "version": "0.30.25-alpha", + "version": "0.30.34-alpha", "private": true, "scripts": { "start": "react-scripts start", diff --git a/airbyte-webapp/src/packages/cloud/services/auth/AuthService.tsx b/airbyte-webapp/src/packages/cloud/services/auth/AuthService.tsx index 7ec8173eee198..110b1243989c9 100644 --- a/airbyte-webapp/src/packages/cloud/services/auth/AuthService.tsx +++ b/airbyte-webapp/src/packages/cloud/services/auth/AuthService.tsx @@ -29,7 +29,7 @@ export type AuthConfirmPasswordReset = ( export type AuthLogin = (values: { email: string; password: string; -}) => Promise; +}) => Promise; export type AuthSignUp = (form: { email: string; @@ -77,7 +77,7 @@ export const AuthenticationProvider: React.FC = ({ children }) => { const authService = useMemo(() => new GoogleAuthService(() => auth), [auth]); useEffect(() => { - auth.onAuthStateChanged(async (currentUser) => { + return auth.onAuthStateChanged(async (currentUser) => { if (state.currentUser === null && currentUser) { // token = await currentUser.getIdToken(); @@ -115,13 +115,8 @@ export const AuthenticationProvider: React.FC = ({ children }) => { inited: state.inited, isLoading: state.loading, emailVerified: state.emailVerified, - async login(values: { - email: string; - password: string; - }): Promise { + async login(values: { email: string; password: string }): Promise { await authService.login(values.email, values.password); - - return null; }, async logout(): Promise { await authService.signOut(); diff --git a/airbyte-webapp/src/packages/cloud/views/auth/LoginPage/LoginPage.tsx b/airbyte-webapp/src/packages/cloud/views/auth/LoginPage/LoginPage.tsx index badbdf965ad19..98045be799280 100644 --- a/airbyte-webapp/src/packages/cloud/views/auth/LoginPage/LoginPage.tsx +++ b/airbyte-webapp/src/packages/cloud/views/auth/LoginPage/LoginPage.tsx @@ -36,12 +36,12 @@ const LoginPage: React.FC = () => { password: "", }} validationSchema={LoginPageValidationSchema} - onSubmit={async (values, { setFieldError, setStatus }) => + onSubmit={async (values, { setFieldError }) => login(values).catch((err) => { if (err instanceof FieldError) { setFieldError(err.field, err.message); } else { - setStatus(err.message); + setFieldError("password", err.message); } }) } diff --git a/airbyte-workers/Dockerfile b/airbyte-workers/Dockerfile index 513bfc194030d..0f093dd81ec88 100644 --- a/airbyte-workers/Dockerfile +++ b/airbyte-workers/Dockerfile @@ -1,7 +1,7 @@ ARG JDK_VERSION=14.0.2 FROM openjdk:${JDK_VERSION}-slim AS worker -ARG ARCH=amd64 +ARG DOCKER_BUILD_ARCH=amd64 # Install Docker to launch worker images. Eventually should be replaced with Docker-java. # See https://gitter.im/docker-java/docker-java?at=5f3eb87ba8c1780176603f4e for more information on why we are not currently using Docker-java @@ -13,7 +13,7 @@ RUN apt-get update && apt-get install -y \ software-properties-common RUN curl -fsSL https://download.docker.com/linux/debian/gpg | apt-key add - RUN add-apt-repository \ - "deb [arch=${ARCH}] https://download.docker.com/linux/debian \ + "deb [arch=${DOCKER_BUILD_ARCH}] https://download.docker.com/linux/debian \ $(lsb_release -cs) \ stable" RUN apt-get update && apt-get install -y docker-ce-cli jq @@ -22,15 +22,8 @@ ENV APPLICATION airbyte-workers WORKDIR /app -# Install kubectl -RUN curl -LO https://storage.googleapis.com/kubernetes-release/release/v1.17.14/bin/linux/${ARCH}/kubectl -RUN chmod +x ./kubectl -RUN mv ./kubectl /usr/local/bin - -# Move and run worker -COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar - -RUN tar xf ${APPLICATION}.tar --strip-components=1 +# Move worker app +ADD build/distributions/${APPLICATION}-0.30.34-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.34-alpha/bin/${APPLICATION}"] diff --git a/airbyte-workers/README.md b/airbyte-workers/README.md new file mode 100644 index 0000000000000..2195b1864819e --- /dev/null +++ b/airbyte-workers/README.md @@ -0,0 +1,45 @@ +# Temporal Development + +## Versioning + +Temporal is maintaining an internal history of the activity it runs. This history is based on a specific order. If we restart a temporal workflow with +a new implementation that has a different order, the workflow will be stuck and will need manual action to be properly restarted. Temporal provides +an API to be able to manage those changes smoothly. However, temporal is very permissive with version rules. Airbyte will follow +the following rules: + +- There will be one global version per workflow, meaning that we will use a single tag per workflow. +- All the following code modifications will need to bump the version number, it won't be limited to a release of a new airbyte version + - Addition of an activity + - Deletion of an activity + - Change of the input of an activity + - Addition of a temporal sleep timer + +The way to use this version should be the following: + +If no prior version usage is present: + +``` +final int version = Workflow.getVersion(VERSION_LABEL, MINIMAL_VERSION, CURRENT_VERSION); + +if (version >= CURRENT_VERSION) { + // New implemenation +} +``` + +if some prior version usage is present (we bump the version from 4 to 5 in this example): + +``` +final int version = Workflow.getVersion(VERSION_LABEL, MINIMAL_VERSION, CURRENT_VERSION); + +if (version <= 4 && version >= MINIMAL_VERSION) { + // old implemenation +} else if (version >= CURRENT_VERSION) { + // New implemenation +} +``` + +## Removing a version + +Removing a version is a potential breaking change and should be done version carefully. We should maintain a MINIMAL_VERSION to keep track of the +current minimal version. Both MINIMAL_VERSION and CURRENT_VERSION needs to be present on the workflow file even if they are unused (if they have been +used once). diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/DbtTransformationRunner.java b/airbyte-workers/src/main/java/io/airbyte/workers/DbtTransformationRunner.java index 249d5eae4a8c9..a7c37fb05250f 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/DbtTransformationRunner.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/DbtTransformationRunner.java @@ -31,10 +31,9 @@ public class DbtTransformationRunner implements AutoCloseable { private static final Logger LOGGER = LoggerFactory.getLogger(DbtTransformationRunner.class); private static final String DBT_ENTRYPOINT_SH = "entrypoint.sh"; - private static final MdcScope CONTAINER_LOG_MDC = new Builder() + private static final MdcScope.Builder CONTAINER_LOG_MDC_BUILDER = new Builder() .setLogPrefix("dbt") - .setPrefixColor(Color.CYAN) - .build(); + .setPrefixColor(Color.CYAN); private final ProcessFactory processFactory; private final NormalizationRunner normalizationRunner; @@ -93,9 +92,8 @@ public boolean transform(final String jobId, processFactory.create(jobId, attempt, jobRoot, dbtConfig.getDockerImage(), false, files, "/bin/bash", resourceRequirements, Map.of(KubeProcessFactory.JOB_TYPE, KubeProcessFactory.SYNC_JOB, KubeProcessFactory.SYNC_STEP, KubeProcessFactory.CUSTOM_STEP), dbtArguments); - - LineGobbler.gobble(process.getInputStream(), LOGGER::info, CONTAINER_LOG_MDC); - LineGobbler.gobble(process.getErrorStream(), LOGGER::error, CONTAINER_LOG_MDC); + LineGobbler.gobble(process.getInputStream(), LOGGER::info, CONTAINER_LOG_MDC_BUILDER); + LineGobbler.gobble(process.getErrorStream(), LOGGER::error, CONTAINER_LOG_MDC_BUILDER); WorkerUtils.wait(process); diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/WorkerApp.java b/airbyte-workers/src/main/java/io/airbyte/workers/WorkerApp.java index 8cfbd7ecb4410..1e9ee445d51f2 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/WorkerApp.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/WorkerApp.java @@ -9,6 +9,7 @@ import io.airbyte.config.EnvConfigs; import io.airbyte.config.MaxWorkersConfig; import io.airbyte.config.helpers.LogClientSingleton; +import io.airbyte.config.helpers.LogConfigs; import io.airbyte.config.persistence.ConfigPersistence; import io.airbyte.config.persistence.ConfigRepository; import io.airbyte.config.persistence.DatabaseConfigPersistence; @@ -57,24 +58,40 @@ public class WorkerApp { private final ProcessFactory processFactory; private final SecretsHydrator secretsHydrator; private final WorkflowServiceStubs temporalService; + private final ConfigRepository configRepository; private final MaxWorkersConfig maxWorkers; private final WorkerEnvironment workerEnvironment; - private final ConfigRepository configRepository; + private final LogConfigs logConfigs; + private final String databaseUser; + private final String databasePassword; + private final String databaseUrl; + private final String airbyteVersion; public WorkerApp(final Path workspaceRoot, final ProcessFactory processFactory, final SecretsHydrator secretsHydrator, final WorkflowServiceStubs temporalService, final MaxWorkersConfig maxWorkers, + final ConfigRepository configRepository, final WorkerEnvironment workerEnvironment, - final ConfigRepository configRepository) { + final LogConfigs logConfigs, + final String databaseUser, + final String databasePassword, + final String databaseUrl, + final String airbyteVersion) { + this.workspaceRoot = workspaceRoot; this.processFactory = processFactory; this.secretsHydrator = secretsHydrator; this.temporalService = temporalService; this.maxWorkers = maxWorkers; - this.workerEnvironment = workerEnvironment; this.configRepository = configRepository; + this.workerEnvironment = workerEnvironment; + this.logConfigs = logConfigs; + this.databaseUser = databaseUser; + this.databasePassword = databasePassword; + this.databaseUrl = databaseUrl; + this.airbyteVersion = airbyteVersion; } public void start() { @@ -93,25 +110,34 @@ public void start() { final Worker specWorker = factory.newWorker(TemporalJobType.GET_SPEC.name(), getWorkerOptions(maxWorkers.getMaxSpecWorkers())); specWorker.registerWorkflowImplementationTypes(SpecWorkflowImpl.class); - specWorker.registerActivitiesImplementations(new SpecActivityImpl(processFactory, workspaceRoot)); + specWorker.registerActivitiesImplementations( + new SpecActivityImpl(processFactory, workspaceRoot, workerEnvironment, logConfigs, databaseUser, databasePassword, databaseUrl, + airbyteVersion)); final Worker checkConnectionWorker = factory.newWorker(TemporalJobType.CHECK_CONNECTION.name(), getWorkerOptions(maxWorkers.getMaxCheckWorkers())); checkConnectionWorker.registerWorkflowImplementationTypes(CheckConnectionWorkflowImpl.class); checkConnectionWorker - .registerActivitiesImplementations(new CheckConnectionActivityImpl(processFactory, secretsHydrator, workspaceRoot)); + .registerActivitiesImplementations( + new CheckConnectionActivityImpl(processFactory, secretsHydrator, workspaceRoot, workerEnvironment, logConfigs, databaseUser, + databasePassword, databaseUrl, airbyteVersion)); final Worker discoverWorker = factory.newWorker(TemporalJobType.DISCOVER_SCHEMA.name(), getWorkerOptions(maxWorkers.getMaxDiscoverWorkers())); discoverWorker.registerWorkflowImplementationTypes(DiscoverCatalogWorkflowImpl.class); discoverWorker - .registerActivitiesImplementations(new DiscoverCatalogActivityImpl(processFactory, secretsHydrator, workspaceRoot)); + .registerActivitiesImplementations( + new DiscoverCatalogActivityImpl(processFactory, secretsHydrator, workspaceRoot, workerEnvironment, logConfigs, databaseUser, + databasePassword, databaseUrl, airbyteVersion)); final Worker syncWorker = factory.newWorker(TemporalJobType.SYNC.name(), getWorkerOptions(maxWorkers.getMaxSyncWorkers())); syncWorker.registerWorkflowImplementationTypes(SyncWorkflow.WorkflowImpl.class); syncWorker.registerActivitiesImplementations( - new SyncWorkflow.ReplicationActivityImpl(processFactory, secretsHydrator, workspaceRoot), - new SyncWorkflow.NormalizationActivityImpl(processFactory, secretsHydrator, workspaceRoot, workerEnvironment), - new SyncWorkflow.DbtTransformationActivityImpl(processFactory, secretsHydrator, workspaceRoot), + new SyncWorkflow.ReplicationActivityImpl(processFactory, secretsHydrator, workspaceRoot, workerEnvironment, logConfigs, databaseUser, + databasePassword, databaseUrl, airbyteVersion), + new SyncWorkflow.NormalizationActivityImpl(processFactory, secretsHydrator, workspaceRoot, workerEnvironment, logConfigs, databaseUser, + databasePassword, databaseUrl, airbyteVersion), + new SyncWorkflow.DbtTransformationActivityImpl(processFactory, secretsHydrator, workspaceRoot, workerEnvironment, logConfigs, databaseUser, + databasePassword, databaseUrl, airbyteVersion), new SyncWorkflow.PersistStateActivityImpl(workspaceRoot, configRepository)); factory.start(); } @@ -142,7 +168,8 @@ private static WorkerOptions getWorkerOptions(final int max) { public static void main(final String[] args) throws IOException, InterruptedException { final Configs configs = new EnvConfigs(); - LogClientSingleton.setWorkspaceMdc(LogClientSingleton.getSchedulerLogsRoot(configs)); + LogClientSingleton.getInstance().setWorkspaceMdc(configs.getWorkerEnvironment(), configs.getLogConfigs(), + LogClientSingleton.getInstance().getSchedulerLogsRoot(configs.getWorkspaceRoot())); final Path workspaceRoot = configs.getWorkspaceRoot(); LOGGER.info("workspaceRoot = " + workspaceRoot); @@ -172,8 +199,13 @@ public static void main(final String[] args) throws IOException, InterruptedExce secretsHydrator, temporalService, configs.getMaxWorkers(), + configRepository, configs.getWorkerEnvironment(), - configRepository).start(); + configs.getLogConfigs(), + configs.getDatabaseUser(), + configs.getDatabasePassword(), + configs.getDatabaseUrl(), + configs.getAirbyteVersionOrWarning()).start(); } } diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/normalization/DefaultNormalizationRunner.java b/airbyte-workers/src/main/java/io/airbyte/workers/normalization/DefaultNormalizationRunner.java index 39cbc52562dcc..3a287286c01b7 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/normalization/DefaultNormalizationRunner.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/normalization/DefaultNormalizationRunner.java @@ -30,10 +30,9 @@ public class DefaultNormalizationRunner implements NormalizationRunner { private static final Logger LOGGER = LoggerFactory.getLogger(DefaultNormalizationRunner.class); - private static final MdcScope CONTAINER_LOG_MDC = new Builder() + private static final MdcScope.Builder CONTAINER_LOG_MDC_BUILDER = new Builder() .setLogPrefix("normalization") - .setPrefixColor(Color.GREEN) - .build(); + .setPrefixColor(Color.GREEN); private final DestinationType destinationType; private final ProcessFactory processFactory; @@ -116,8 +115,8 @@ private boolean runProcess(final String jobId, process = processFactory.create(jobId, attempt, jobRoot, normalizationImageName, false, files, null, resourceRequirements, Map.of(KubeProcessFactory.JOB_TYPE, KubeProcessFactory.SYNC_JOB, KubeProcessFactory.SYNC_STEP, KubeProcessFactory.NORMALISE_STEP), args); - LineGobbler.gobble(process.getInputStream(), LOGGER::info, CONTAINER_LOG_MDC); - LineGobbler.gobble(process.getErrorStream(), LOGGER::error, CONTAINER_LOG_MDC); + LineGobbler.gobble(process.getInputStream(), LOGGER::info, CONTAINER_LOG_MDC_BUILDER); + LineGobbler.gobble(process.getErrorStream(), LOGGER::error, CONTAINER_LOG_MDC_BUILDER); WorkerUtils.wait(process); diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodProcess.java b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodProcess.java index d24e781f2a490..3937cf80d8d0a 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodProcess.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodProcess.java @@ -58,6 +58,7 @@ * stderr streams and copy configuration files over. * * This is made possible by: + *
    *
  • 1) An init container that creates 3 named pipes corresponding to stdin, stdout and std err on * a shared volume.
  • *
  • 2) Config files (e.g. config.json, catalog.json etc) are copied from the parent process into @@ -76,7 +77,7 @@ * handling.
  • *
  • 8) A heartbeat sidecar checks if the worker that launched the pod is still alive. If not, the * pod will fail.
  • - * + *
* The docker image used for this pod process must expose a AIRBYTE_ENTRYPOINT which contains the * entrypoint we will wrap when creating the main container in the pod. * @@ -325,21 +326,21 @@ public KubePodProcess(final String processRunnerHost, final Container remoteStdin = new ContainerBuilder() .withName("remote-stdin") .withImage("alpine/socat:1.7.4.1-r1") - .withCommand("sh", "-c", "socat -d -d -d TCP-L:9001 STDOUT > " + STDIN_PIPE_FILE) + .withCommand("sh", "-c", "socat -d TCP-L:9001 STDOUT > " + STDIN_PIPE_FILE) .withVolumeMounts(pipeVolumeMount, terminationVolumeMount) .build(); final Container relayStdout = new ContainerBuilder() .withName("relay-stdout") .withImage("alpine/socat:1.7.4.1-r1") - .withCommand("sh", "-c", String.format("cat %s | socat -d -d -d - TCP:%s:%s", STDOUT_PIPE_FILE, processRunnerHost, stdoutLocalPort)) + .withCommand("sh", "-c", String.format("cat %s | socat -d - TCP:%s:%s", STDOUT_PIPE_FILE, processRunnerHost, stdoutLocalPort)) .withVolumeMounts(pipeVolumeMount, terminationVolumeMount) .build(); final Container relayStderr = new ContainerBuilder() .withName("relay-stderr") .withImage("alpine/socat:1.7.4.1-r1") - .withCommand("sh", "-c", String.format("cat %s | socat -d -d -d - TCP:%s:%s", STDERR_PIPE_FILE, processRunnerHost, stderrLocalPort)) + .withCommand("sh", "-c", String.format("cat %s | socat -d - TCP:%s:%s", STDERR_PIPE_FILE, processRunnerHost, stderrLocalPort)) .withVolumeMounts(pipeVolumeMount, terminationVolumeMount) .build(); diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePortManagerSingleton.java b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePortManagerSingleton.java index 6e58a12d5d746..6583c4becc62f 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePortManagerSingleton.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePortManagerSingleton.java @@ -36,8 +36,6 @@ private KubePortManagerSingleton(final Set ports) { /** * Make sure init(ports) is called once prior to repeatedly using getInstance(). - * - * @return */ public static synchronized KubePortManagerSingleton getInstance() { if (instance == null) { @@ -49,8 +47,6 @@ public static synchronized KubePortManagerSingleton getInstance() { /** * Sets up the port range; make sure init(ports) is called once prior to repeatedly using * getInstance(). - * - * @return */ public static synchronized void init(final Set ports) { if (instance != null) { diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/protocols/airbyte/DefaultAirbyteDestination.java b/airbyte-workers/src/main/java/io/airbyte/workers/protocols/airbyte/DefaultAirbyteDestination.java index de4bacfcdc9b2..f05bdb288b505 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/protocols/airbyte/DefaultAirbyteDestination.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/protocols/airbyte/DefaultAirbyteDestination.java @@ -33,10 +33,9 @@ public class DefaultAirbyteDestination implements AirbyteDestination { private static final Logger LOGGER = LoggerFactory.getLogger(DefaultAirbyteDestination.class); - private static final MdcScope CONTAINER_LOG_MDC = new Builder() + private static final MdcScope.Builder CONTAINER_LOG_MDC_BUILDER = new Builder() .setLogPrefix("destination") - .setPrefixColor(Color.MAGENTA) - .build(); + .setPrefixColor(Color.MAGENTA); private final IntegrationLauncher integrationLauncher; private final AirbyteStreamFactory streamFactory; @@ -48,7 +47,7 @@ public class DefaultAirbyteDestination implements AirbyteDestination { private Iterator messageIterator = null; public DefaultAirbyteDestination(final IntegrationLauncher integrationLauncher) { - this(integrationLauncher, new DefaultAirbyteStreamFactory(CONTAINER_LOG_MDC)); + this(integrationLauncher, new DefaultAirbyteStreamFactory(CONTAINER_LOG_MDC_BUILDER)); } @@ -70,7 +69,7 @@ public void start(final WorkerDestinationConfig destinationConfig, final Path jo WorkerConstants.DESTINATION_CATALOG_JSON_FILENAME, Jsons.serialize(destinationConfig.getCatalog())); // stdout logs are logged elsewhere since stdout also contains data - LineGobbler.gobble(destinationProcess.getErrorStream(), LOGGER::error, "airbyte-destination", CONTAINER_LOG_MDC); + LineGobbler.gobble(destinationProcess.getErrorStream(), LOGGER::error, "airbyte-destination", CONTAINER_LOG_MDC_BUILDER); writer = new BufferedWriter(new OutputStreamWriter(destinationProcess.getOutputStream(), Charsets.UTF_8)); diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/protocols/airbyte/DefaultAirbyteSource.java b/airbyte-workers/src/main/java/io/airbyte/workers/protocols/airbyte/DefaultAirbyteSource.java index fadf68e44ef03..d24c567f1528a 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/protocols/airbyte/DefaultAirbyteSource.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/protocols/airbyte/DefaultAirbyteSource.java @@ -38,10 +38,9 @@ public class DefaultAirbyteSource implements AirbyteSource { private static final Duration GRACEFUL_SHUTDOWN_DURATION = Duration.of(10, ChronoUnit.HOURS); private static final Duration FORCED_SHUTDOWN_DURATION = Duration.of(1, ChronoUnit.MINUTES); - private static final MdcScope CONTAINER_LOG_MDC = new Builder() + private static final MdcScope.Builder CONTAINER_LOG_MDC_BUILDER = new Builder() .setLogPrefix("source") - .setPrefixColor(Color.BLUE) - .build(); + .setPrefixColor(Color.BLUE); private final IntegrationLauncher integrationLauncher; private final AirbyteStreamFactory streamFactory; @@ -51,7 +50,7 @@ public class DefaultAirbyteSource implements AirbyteSource { private Iterator messageIterator = null; public DefaultAirbyteSource(final IntegrationLauncher integrationLauncher) { - this(integrationLauncher, new DefaultAirbyteStreamFactory(CONTAINER_LOG_MDC), new HeartbeatMonitor(HEARTBEAT_FRESH_DURATION)); + this(integrationLauncher, new DefaultAirbyteStreamFactory(CONTAINER_LOG_MDC_BUILDER), new HeartbeatMonitor(HEARTBEAT_FRESH_DURATION)); } @VisibleForTesting @@ -75,7 +74,7 @@ public void start(final WorkerSourceConfig sourceConfig, final Path jobRoot) thr sourceConfig.getState() == null ? null : WorkerConstants.INPUT_STATE_JSON_FILENAME, sourceConfig.getState() == null ? null : Jsons.serialize(sourceConfig.getState().getState())); // stdout logs are logged elsewhere since stdout also contains data - LineGobbler.gobble(sourceProcess.getErrorStream(), LOGGER::error, "airbyte-source", CONTAINER_LOG_MDC); + LineGobbler.gobble(sourceProcess.getErrorStream(), LOGGER::error, "airbyte-source", CONTAINER_LOG_MDC_BUILDER); messageIterator = streamFactory.create(IOs.newBufferedReader(sourceProcess.getInputStream())) .peek(message -> heartbeatMonitor.beat()) diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/protocols/airbyte/DefaultAirbyteStreamFactory.java b/airbyte-workers/src/main/java/io/airbyte/workers/protocols/airbyte/DefaultAirbyteStreamFactory.java index e0c8c89e9da5b..4fb66377b9516 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/protocols/airbyte/DefaultAirbyteStreamFactory.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/protocols/airbyte/DefaultAirbyteStreamFactory.java @@ -7,7 +7,6 @@ import com.fasterxml.jackson.databind.JsonNode; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.logging.MdcScope; -import io.airbyte.commons.logging.MdcScope.Builder; import io.airbyte.protocol.models.AirbyteLogMessage; import io.airbyte.protocol.models.AirbyteMessage; import java.io.BufferedReader; @@ -30,22 +29,22 @@ public class DefaultAirbyteStreamFactory implements AirbyteStreamFactory { private static final Logger LOGGER = LoggerFactory.getLogger(DefaultAirbyteStreamFactory.class); - private final MdcScope containerLogMDC; + private final MdcScope.Builder containerLogMdcBuilder; private final AirbyteProtocolPredicate protocolValidator; private final Logger logger; public DefaultAirbyteStreamFactory() { - this(new Builder().build()); + this(MdcScope.DEFAULT_BUILDER); } - public DefaultAirbyteStreamFactory(final MdcScope containerLogMDC) { - this(new AirbyteProtocolPredicate(), LOGGER, containerLogMDC); + public DefaultAirbyteStreamFactory(final MdcScope.Builder containerLogMdcBuilder) { + this(new AirbyteProtocolPredicate(), LOGGER, containerLogMdcBuilder); } - DefaultAirbyteStreamFactory(final AirbyteProtocolPredicate protocolPredicate, final Logger logger, final MdcScope containerLogMDC) { + DefaultAirbyteStreamFactory(final AirbyteProtocolPredicate protocolPredicate, final Logger logger, final MdcScope.Builder containerLogMdcBuilder) { protocolValidator = protocolPredicate; this.logger = logger; - this.containerLogMDC = containerLogMDC; + this.containerLogMdcBuilder = containerLogMdcBuilder; } @Override @@ -58,7 +57,7 @@ public Stream create(final BufferedReader bufferedReader) { // we log as info all the lines that are not valid json // some sources actually log their process on stdout, we // want to make sure this info is available in the logs. - try (containerLogMDC) { + try (final var mdcScope = containerLogMdcBuilder.build()) { logger.info(line); } } @@ -83,7 +82,7 @@ public Stream create(final BufferedReader bufferedReader) { .filter(airbyteMessage -> { final boolean isLog = airbyteMessage.getType() == AirbyteMessage.Type.LOG; if (isLog) { - try (containerLogMDC) { + try (final var mdcScope = containerLogMdcBuilder.build()) { internalLog(airbyteMessage.getLog()); } } diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/CancellationHandler.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/CancellationHandler.java index 93645a0abb522..090cc0c107f8d 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/CancellationHandler.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/CancellationHandler.java @@ -4,7 +4,6 @@ package io.airbyte.workers.temporal; -import io.airbyte.workers.WorkerException; import io.temporal.activity.Activity; import io.temporal.activity.ActivityExecutionContext; import io.temporal.client.ActivityCompletionException; @@ -35,7 +34,6 @@ public TemporalCancellationHandler() { * * @param onCancellationCallback a runnable that will only run when Temporal indicates the activity * should be killed (cancellation or timeout). - * @throws WorkerException */ @Override public void checkAndHandleCancellation(final Runnable onCancellationCallback) { diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/SyncWorkflow.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/SyncWorkflow.java index c2ccdc20dd310..4f27c1d35e1a4 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/SyncWorkflow.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/SyncWorkflow.java @@ -21,6 +21,7 @@ import io.airbyte.config.StandardSyncOutput; import io.airbyte.config.StandardSyncSummary; import io.airbyte.config.State; +import io.airbyte.config.helpers.LogConfigs; import io.airbyte.config.persistence.ConfigRepository; import io.airbyte.config.persistence.split_secrets.SecretsHydrator; import io.airbyte.scheduler.models.IntegrationLauncherConfig; @@ -156,20 +157,49 @@ class ReplicationActivityImpl implements ReplicationActivity { private final SecretsHydrator secretsHydrator; private final Path workspaceRoot; private final AirbyteConfigValidator validator; - - public ReplicationActivityImpl(final ProcessFactory processFactory, final SecretsHydrator secretsHydrator, final Path workspaceRoot) { - this(processFactory, secretsHydrator, workspaceRoot, new AirbyteConfigValidator()); + private final WorkerEnvironment workerEnvironment; + private final LogConfigs logConfigs; + + private final String databaseUser; + private final String databasePassword; + private final String databaseUrl; + private final String airbyteVersion; + + public ReplicationActivityImpl( + final ProcessFactory processFactory, + final SecretsHydrator secretsHydrator, + final Path workspaceRoot, + final WorkerEnvironment workerEnvironment, + final LogConfigs logConfigs, + final String databaseUser, + final String databasePassword, + final String databaseUrl, + final String airbyteVersion) { + this(processFactory, secretsHydrator, workspaceRoot, workerEnvironment, logConfigs, new AirbyteConfigValidator(), databaseUser, + databasePassword, databaseUrl, airbyteVersion); } @VisibleForTesting ReplicationActivityImpl(final ProcessFactory processFactory, final SecretsHydrator secretsHydrator, final Path workspaceRoot, - final AirbyteConfigValidator validator) { + final WorkerEnvironment workerEnvironment, + final LogConfigs logConfigs, + final AirbyteConfigValidator validator, + final String databaseUser, + final String databasePassword, + final String databaseUrl, + final String airbyteVersion) { this.processFactory = processFactory; this.secretsHydrator = secretsHydrator; this.workspaceRoot = workspaceRoot; this.validator = validator; + this.workerEnvironment = workerEnvironment; + this.logConfigs = logConfigs; + this.databaseUser = databaseUser; + this.databasePassword = databasePassword; + this.databaseUrl = databaseUrl; + this.airbyteVersion = airbyteVersion; } @Override @@ -191,11 +221,11 @@ public StandardSyncOutput replicate(final JobRunConfig jobRunConfig, }; final TemporalAttemptExecution temporalAttempt = new TemporalAttemptExecution<>( - workspaceRoot, + workspaceRoot, workerEnvironment, logConfigs, jobRunConfig, getWorkerFactory(sourceLauncherConfig, destinationLauncherConfig, jobRunConfig, syncInput), inputSupplier, - new CancellationHandler.TemporalCancellationHandler()); + new CancellationHandler.TemporalCancellationHandler(), databaseUser, databasePassword, databaseUrl, airbyteVersion); final ReplicationOutput attemptOutput = temporalAttempt.get(); final StandardSyncOutput standardSyncOutput = reduceReplicationOutput(attemptOutput); @@ -280,12 +310,23 @@ class NormalizationActivityImpl implements NormalizationActivity { private final Path workspaceRoot; private final AirbyteConfigValidator validator; private final WorkerEnvironment workerEnvironment; + private final LogConfigs logConfigs; + private final String databaseUser; + private final String databasePassword; + private final String databaseUrl; + private final String airbyteVersion; public NormalizationActivityImpl(final ProcessFactory processFactory, final SecretsHydrator secretsHydrator, final Path workspaceRoot, - final WorkerEnvironment workerEnvironment) { - this(processFactory, secretsHydrator, workspaceRoot, new AirbyteConfigValidator(), workerEnvironment); + final WorkerEnvironment workerEnvironment, + final LogConfigs logConfig, + final String databaseUser, + final String databasePassword, + final String databaseUrl, + final String airbyteVersion) { + this(processFactory, secretsHydrator, workspaceRoot, new AirbyteConfigValidator(), workerEnvironment, logConfig, databaseUser, databasePassword, + databaseUrl, airbyteVersion); } @VisibleForTesting @@ -293,12 +334,22 @@ public NormalizationActivityImpl(final ProcessFactory processFactory, final SecretsHydrator secretsHydrator, final Path workspaceRoot, final AirbyteConfigValidator validator, - final WorkerEnvironment workerEnvironment) { + final WorkerEnvironment workerEnvironment, + final LogConfigs logConfigs, + final String databaseUser, + final String databasePassword, + final String databaseUrl, + final String airbyteVersion) { this.processFactory = processFactory; this.secretsHydrator = secretsHydrator; this.workspaceRoot = workspaceRoot; this.validator = validator; this.workerEnvironment = workerEnvironment; + this.logConfigs = logConfigs; + this.databaseUser = databaseUser; + this.databasePassword = databasePassword; + this.databaseUrl = databaseUrl; + this.airbyteVersion = airbyteVersion; } @Override @@ -315,11 +366,11 @@ public Void normalize(final JobRunConfig jobRunConfig, }; final TemporalAttemptExecution temporalAttemptExecution = new TemporalAttemptExecution<>( - workspaceRoot, + workspaceRoot, workerEnvironment, logConfigs, jobRunConfig, getWorkerFactory(destinationLauncherConfig, jobRunConfig), inputSupplier, - new CancellationHandler.TemporalCancellationHandler()); + new CancellationHandler.TemporalCancellationHandler(), databaseUser, databasePassword, databaseUrl, airbyteVersion); return temporalAttemptExecution.get(); } @@ -356,23 +407,47 @@ class DbtTransformationActivityImpl implements DbtTransformationActivity { private final SecretsHydrator secretsHydrator; private final Path workspaceRoot; private final AirbyteConfigValidator validator; + private final WorkerEnvironment workerEnvironment; + private final LogConfigs logConfigs; + private final String databaseUser; + private final String databasePassword; + private final String databaseUrl; + private final String airbyteVersion; - public DbtTransformationActivityImpl( - final ProcessFactory processFactory, + public DbtTransformationActivityImpl(final ProcessFactory processFactory, final SecretsHydrator secretsHydrator, - final Path workspaceRoot) { - this(processFactory, secretsHydrator, workspaceRoot, new AirbyteConfigValidator()); + final Path workspaceRoot, + final WorkerEnvironment workerEnvironment, + final LogConfigs logConfigs, + final String databaseUser, + final String databasePassword, + final String databaseUrl, + final String airbyteVersion) { + this(processFactory, secretsHydrator, workspaceRoot, new AirbyteConfigValidator(), workerEnvironment, logConfigs, databaseUser, + databasePassword, databaseUrl, airbyteVersion); } @VisibleForTesting DbtTransformationActivityImpl(final ProcessFactory processFactory, final SecretsHydrator secretsHydrator, final Path workspaceRoot, - final AirbyteConfigValidator validator) { + final AirbyteConfigValidator validator, + final WorkerEnvironment workerEnvironment, + final LogConfigs logConfigs, + final String databaseUser, + final String databasePassword, + final String databaseUrl, + final String airbyteVersion) { this.processFactory = processFactory; this.secretsHydrator = secretsHydrator; this.workspaceRoot = workspaceRoot; this.validator = validator; + this.workerEnvironment = workerEnvironment; + this.logConfigs = logConfigs; + this.databaseUser = databaseUser; + this.databasePassword = databasePassword; + this.databaseUrl = databaseUrl; + this.airbyteVersion = airbyteVersion; } @Override @@ -390,11 +465,11 @@ public Void run(final JobRunConfig jobRunConfig, }; final TemporalAttemptExecution temporalAttemptExecution = new TemporalAttemptExecution<>( - workspaceRoot, + workspaceRoot, workerEnvironment, logConfigs, jobRunConfig, getWorkerFactory(destinationLauncherConfig, jobRunConfig, resourceRequirements), inputSupplier, - new CancellationHandler.TemporalCancellationHandler()); + new CancellationHandler.TemporalCancellationHandler(), databaseUser, databasePassword, databaseUrl, airbyteVersion); return temporalAttemptExecution.get(); } diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/TemporalAttemptExecution.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/TemporalAttemptExecution.java index d3ec246d936cb..c3021e8960181 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/TemporalAttemptExecution.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/TemporalAttemptExecution.java @@ -6,9 +6,9 @@ import com.google.common.annotations.VisibleForTesting; import io.airbyte.commons.functional.CheckedSupplier; -import io.airbyte.config.Configs; -import io.airbyte.config.EnvConfigs; +import io.airbyte.config.Configs.WorkerEnvironment; import io.airbyte.config.helpers.LogClientSingleton; +import io.airbyte.config.helpers.LogConfigs; import io.airbyte.db.Database; import io.airbyte.db.instance.jobs.JobsDatabaseInstance; import io.airbyte.scheduler.models.JobRunConfig; @@ -43,47 +43,69 @@ public class TemporalAttemptExecution implements Supplier private static final Duration HEARTBEAT_INTERVAL = Duration.ofSeconds(10); private final JobRunConfig jobRunConfig; + private final WorkerEnvironment workerEnvironment; + private final LogConfigs logConfigs; private final Path jobRoot; private final CheckedSupplier, Exception> workerSupplier; private final Supplier inputSupplier; private final Consumer mdcSetter; private final CancellationHandler cancellationHandler; private final Supplier workflowIdProvider; - private final Configs configs; + private final String databaseUser; + private final String databasePassword; + private final String databaseUrl; + private final String airbyteVersion; public TemporalAttemptExecution(final Path workspaceRoot, + final WorkerEnvironment workerEnvironment, + final LogConfigs logConfigs, final JobRunConfig jobRunConfig, final CheckedSupplier, Exception> workerSupplier, final Supplier inputSupplier, - final CancellationHandler cancellationHandler) { + final CancellationHandler cancellationHandler, + final String databaseUser, + final String databasePassword, + final String databaseUrl, + final String airbyteVersion) { this( - workspaceRoot, + workspaceRoot, workerEnvironment, logConfigs, jobRunConfig, workerSupplier, inputSupplier, - LogClientSingleton::setJobMdc, - cancellationHandler, - () -> Activity.getExecutionContext().getInfo().getWorkflowId(), - new EnvConfigs()); + (path -> LogClientSingleton.getInstance().setJobMdc(workerEnvironment, logConfigs, path)), + cancellationHandler, databaseUser, databasePassword, databaseUrl, + () -> Activity.getExecutionContext().getInfo().getWorkflowId(), airbyteVersion); } @VisibleForTesting TemporalAttemptExecution(final Path workspaceRoot, + final WorkerEnvironment workerEnvironment, + final LogConfigs logConfigs, final JobRunConfig jobRunConfig, final CheckedSupplier, Exception> workerSupplier, final Supplier inputSupplier, final Consumer mdcSetter, final CancellationHandler cancellationHandler, + final String databaseUser, + final String databasePassword, + final String databaseUrl, final Supplier workflowIdProvider, - final Configs configs) { + final String airbyteVersion) { this.jobRunConfig = jobRunConfig; + this.workerEnvironment = workerEnvironment; + this.logConfigs = logConfigs; + this.jobRoot = WorkerUtils.getJobRoot(workspaceRoot, jobRunConfig.getJobId(), jobRunConfig.getAttemptId()); this.workerSupplier = workerSupplier; this.inputSupplier = inputSupplier; this.mdcSetter = mdcSetter; this.cancellationHandler = cancellationHandler; this.workflowIdProvider = workflowIdProvider; - this.configs = configs; + + this.databaseUser = databaseUser; + this.databasePassword = databasePassword; + this.databaseUrl = databaseUrl; + this.airbyteVersion = airbyteVersion; } @Override @@ -91,10 +113,10 @@ public OUTPUT get() { try { mdcSetter.accept(jobRoot); - LOGGER.info("Executing worker wrapper. Airbyte version: {}", new EnvConfigs().getAirbyteVersionOrWarning()); + LOGGER.info("Executing worker wrapper. Airbyte version: {}", airbyteVersion); // TODO(Davin): This will eventually run into scaling problems, since it opens a DB connection per // workflow. See https://github.com/airbytehq/airbyte/issues/5936. - saveWorkflowIdForCancellation(); + saveWorkflowIdForCancellation(databaseUser, databasePassword, databaseUrl); final Worker worker = workerSupplier.get(); final CompletableFuture outputFuture = new CompletableFuture<>(); @@ -120,16 +142,16 @@ public OUTPUT get() { } } - private void saveWorkflowIdForCancellation() throws IOException { + private void saveWorkflowIdForCancellation(final String databaseUser, final String databasePassword, final String databaseUrl) throws IOException { // If the jobId is not a number, it means the job is a synchronous job. No attempt is created for // it, and it cannot be cancelled, so do not save the workflowId. See // SynchronousSchedulerClient.java // for info. if (NumberUtils.isCreatable(jobRunConfig.getJobId())) { final Database jobDatabase = new JobsDatabaseInstance( - configs.getDatabaseUser(), - configs.getDatabasePassword(), - configs.getDatabaseUrl()) + databaseUser, + databasePassword, + databaseUrl) .getInitialized(); final JobPersistence jobPersistence = new DefaultJobPersistence(jobDatabase); final String workflowId = workflowIdProvider.get(); @@ -156,10 +178,10 @@ private Thread getWorkerThread(final Worker worker, final Complet * requests are routed to the Temporal Scheduler via the cancelJob function in * SchedulerHandler.java. This manifests as a {@link io.temporal.client.ActivityCompletionException} * when the {@link CancellationHandler} heartbeats to the Temporal Scheduler. - * + *

* The callback defined in this function is executed after the above exception is caught, and * defines the clean up operations executed as part of cancel. - * + *

* See {@link CancellationHandler} for more info. */ private Runnable getCancellationChecker(final Worker worker, diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/check/connection/CheckConnectionActivityImpl.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/check/connection/CheckConnectionActivityImpl.java index 438b2faad77bd..7b1730e9e22b4 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/check/connection/CheckConnectionActivityImpl.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/check/connection/CheckConnectionActivityImpl.java @@ -6,8 +6,10 @@ import com.fasterxml.jackson.databind.JsonNode; import io.airbyte.commons.functional.CheckedSupplier; +import io.airbyte.config.Configs.WorkerEnvironment; import io.airbyte.config.StandardCheckConnectionInput; import io.airbyte.config.StandardCheckConnectionOutput; +import io.airbyte.config.helpers.LogConfigs; import io.airbyte.config.persistence.split_secrets.SecretsHydrator; import io.airbyte.scheduler.models.IntegrationLauncherConfig; import io.airbyte.scheduler.models.JobRunConfig; @@ -27,11 +29,31 @@ public class CheckConnectionActivityImpl implements CheckConnectionActivity { private final ProcessFactory processFactory; private final SecretsHydrator secretsHydrator; private final Path workspaceRoot; + private final WorkerEnvironment workerEnvironment; + private final LogConfigs logConfigs; + private final String databaseUser; + private final String databasePassword; + private final String databaseUrl; + private final String airbyteVersion; - public CheckConnectionActivityImpl(final ProcessFactory processFactory, final SecretsHydrator secretsHydrator, final Path workspaceRoot) { + public CheckConnectionActivityImpl(final ProcessFactory processFactory, + final SecretsHydrator secretsHydrator, + final Path workspaceRoot, + final WorkerEnvironment workerEnvironment, + final LogConfigs logConfigs, + final String databaseUser, + final String databasePassword, + final String databaseUrl, + final String airbyteVersion) { this.processFactory = processFactory; this.secretsHydrator = secretsHydrator; this.workspaceRoot = workspaceRoot; + this.workerEnvironment = workerEnvironment; + this.logConfigs = logConfigs; + this.databaseUser = databaseUser; + this.databasePassword = databasePassword; + this.databaseUrl = databaseUrl; + this.airbyteVersion = airbyteVersion; } public StandardCheckConnectionOutput run(final JobRunConfig jobRunConfig, @@ -47,11 +69,11 @@ public StandardCheckConnectionOutput run(final JobRunConfig jobRunConfig, final TemporalAttemptExecution temporalAttemptExecution = new TemporalAttemptExecution<>( - workspaceRoot, + workspaceRoot, workerEnvironment, logConfigs, jobRunConfig, getWorkerFactory(launcherConfig), inputSupplier, - new CancellationHandler.TemporalCancellationHandler()); + new CancellationHandler.TemporalCancellationHandler(), databaseUser, databasePassword, databaseUrl, airbyteVersion); return temporalAttemptExecution.get(); } diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/discover/catalog/DiscoverCatalogActivityImpl.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/discover/catalog/DiscoverCatalogActivityImpl.java index 9b972a2f3426d..d65d14b7fbb40 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/discover/catalog/DiscoverCatalogActivityImpl.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/discover/catalog/DiscoverCatalogActivityImpl.java @@ -6,7 +6,9 @@ import com.fasterxml.jackson.databind.JsonNode; import io.airbyte.commons.functional.CheckedSupplier; +import io.airbyte.config.Configs.WorkerEnvironment; import io.airbyte.config.StandardDiscoverCatalogInput; +import io.airbyte.config.helpers.LogConfigs; import io.airbyte.config.persistence.split_secrets.SecretsHydrator; import io.airbyte.protocol.models.AirbyteCatalog; import io.airbyte.scheduler.models.IntegrationLauncherConfig; @@ -29,11 +31,32 @@ public class DiscoverCatalogActivityImpl implements DiscoverCatalogActivity { private final ProcessFactory processFactory; private final SecretsHydrator secretsHydrator; private final Path workspaceRoot; + private final WorkerEnvironment workerEnvironment; + private final LogConfigs logConfigs; + private final String databaseUser; + private final String databasePassword; + private final String databaseUrl; + private final String airbyteVersion; - public DiscoverCatalogActivityImpl(final ProcessFactory processFactory, final SecretsHydrator secretsHydrator, final Path workspaceRoot) { + public DiscoverCatalogActivityImpl(final ProcessFactory processFactory, + final SecretsHydrator secretsHydrator, + final Path workspaceRoot, + final WorkerEnvironment workerEnvironment, + final LogConfigs logConfigs, + final String databaseUser, + final String databasePassword, + final String databaseUrl, + final String airbyteVersion) { this.processFactory = processFactory; this.secretsHydrator = secretsHydrator; this.workspaceRoot = workspaceRoot; + this.workerEnvironment = workerEnvironment; + this.logConfigs = logConfigs; + this.databaseUser = databaseUser; + this.databasePassword = databasePassword; + this.databaseUrl = databaseUrl; + this.airbyteVersion = airbyteVersion; + } public AirbyteCatalog run(final JobRunConfig jobRunConfig, @@ -49,10 +72,12 @@ public AirbyteCatalog run(final JobRunConfig jobRunConfig, final TemporalAttemptExecution temporalAttemptExecution = new TemporalAttemptExecution<>( workspaceRoot, + workerEnvironment, + logConfigs, jobRunConfig, getWorkerFactory(launcherConfig), inputSupplier, - new CancellationHandler.TemporalCancellationHandler()); + new CancellationHandler.TemporalCancellationHandler(), databaseUser, databasePassword, databaseUrl, airbyteVersion); return temporalAttemptExecution.get(); } diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/spec/SpecActivityImpl.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/spec/SpecActivityImpl.java index 2cae77228900d..159a15bb1bac7 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/spec/SpecActivityImpl.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/spec/SpecActivityImpl.java @@ -5,7 +5,9 @@ package io.airbyte.workers.temporal.spec; import io.airbyte.commons.functional.CheckedSupplier; +import io.airbyte.config.Configs.WorkerEnvironment; import io.airbyte.config.JobGetSpecConfig; +import io.airbyte.config.helpers.LogConfigs; import io.airbyte.protocol.models.ConnectorSpecification; import io.airbyte.scheduler.models.IntegrationLauncherConfig; import io.airbyte.scheduler.models.JobRunConfig; @@ -24,10 +26,29 @@ public class SpecActivityImpl implements SpecActivity { private final ProcessFactory processFactory; private final Path workspaceRoot; + private final WorkerEnvironment workerEnvironment; + private final LogConfigs logConfigs; + private final String databaseUser; + private final String databasePassword; + private final String databaseUrl; + private final String airbyteVersion; - public SpecActivityImpl(final ProcessFactory processFactory, final Path workspaceRoot) { + public SpecActivityImpl(final ProcessFactory processFactory, + final Path workspaceRoot, + final WorkerEnvironment workerEnvironment, + final LogConfigs logConfigs, + final String databaseUser, + final String databasePassword, + final String databaseUrl, + final String airbyteVersion) { this.processFactory = processFactory; this.workspaceRoot = workspaceRoot; + this.workerEnvironment = workerEnvironment; + this.logConfigs = logConfigs; + this.databaseUser = databaseUser; + this.databasePassword = databasePassword; + this.databaseUrl = databaseUrl; + this.airbyteVersion = airbyteVersion; } public ConnectorSpecification run(final JobRunConfig jobRunConfig, final IntegrationLauncherConfig launcherConfig) { @@ -35,15 +56,18 @@ public ConnectorSpecification run(final JobRunConfig jobRunConfig, final Integra final TemporalAttemptExecution temporalAttemptExecution = new TemporalAttemptExecution<>( workspaceRoot, + workerEnvironment, + logConfigs, jobRunConfig, getWorkerFactory(launcherConfig), inputSupplier, - new CancellationHandler.TemporalCancellationHandler()); + new CancellationHandler.TemporalCancellationHandler(), databaseUser, databasePassword, databaseUrl, airbyteVersion); return temporalAttemptExecution.get(); } - private CheckedSupplier, Exception> getWorkerFactory(final IntegrationLauncherConfig launcherConfig) { + private CheckedSupplier, Exception> getWorkerFactory( + final IntegrationLauncherConfig launcherConfig) { return () -> { final IntegrationLauncher integrationLauncher = new AirbyteIntegrationLauncher( launcherConfig.getJobId(), diff --git a/airbyte-workers/src/test/java/io/airbyte/workers/DefaultReplicationWorkerTest.java b/airbyte-workers/src/test/java/io/airbyte/workers/DefaultReplicationWorkerTest.java index 23d1a358cd65b..9fdba8f1833f9 100644 --- a/airbyte-workers/src/test/java/io/airbyte/workers/DefaultReplicationWorkerTest.java +++ b/airbyte-workers/src/test/java/io/airbyte/workers/DefaultReplicationWorkerTest.java @@ -22,6 +22,7 @@ import io.airbyte.commons.json.Jsons; import io.airbyte.commons.string.Strings; import io.airbyte.config.ConfigSchema; +import io.airbyte.config.Configs.WorkerEnvironment; import io.airbyte.config.ReplicationAttemptSummary; import io.airbyte.config.ReplicationOutput; import io.airbyte.config.StandardSync; @@ -31,6 +32,7 @@ import io.airbyte.config.WorkerDestinationConfig; import io.airbyte.config.WorkerSourceConfig; import io.airbyte.config.helpers.LogClientSingleton; +import io.airbyte.config.helpers.LogConfiguration; import io.airbyte.protocol.models.AirbyteMessage; import io.airbyte.validation.json.JsonSchemaValidator; import io.airbyte.workers.protocols.airbyte.AirbyteDestination; @@ -137,7 +139,7 @@ void testLoggingInThreads() throws IOException, WorkerException { // set up the mdc so that actually log to a file, so that we can verify that file logging captures // threads. final Path jobRoot = Files.createTempDirectory(Path.of("/tmp"), "mdc_test"); - LogClientSingleton.setJobMdc(jobRoot); + LogClientSingleton.getInstance().setJobMdc(WorkerEnvironment.DOCKER, LogConfiguration.EMPTY, jobRoot); final ReplicationWorker worker = new DefaultReplicationWorker( JOB_ID, diff --git a/airbyte-workers/src/test/java/io/airbyte/workers/normalization/DefaultNormalizationRunnerTest.java b/airbyte-workers/src/test/java/io/airbyte/workers/normalization/DefaultNormalizationRunnerTest.java index 554f91ad90a80..5bfc29630d14a 100644 --- a/airbyte-workers/src/test/java/io/airbyte/workers/normalization/DefaultNormalizationRunnerTest.java +++ b/airbyte-workers/src/test/java/io/airbyte/workers/normalization/DefaultNormalizationRunnerTest.java @@ -17,7 +17,9 @@ import io.airbyte.commons.io.IOs; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.logging.LoggingHelper.Color; +import io.airbyte.config.Configs.WorkerEnvironment; import io.airbyte.config.helpers.LogClientSingleton; +import io.airbyte.config.helpers.LogConfiguration; import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; import io.airbyte.workers.WorkerConstants; import io.airbyte.workers.WorkerException; @@ -45,7 +47,7 @@ class DefaultNormalizationRunnerTest { static { try { logJobRoot = Files.createTempDirectory(Path.of("/tmp"), "mdc_test"); - LogClientSingleton.setJobMdc(logJobRoot); + LogClientSingleton.getInstance().setJobMdc(WorkerEnvironment.DOCKER, LogConfiguration.EMPTY, logJobRoot); } catch (final IOException e) { e.printStackTrace(); } diff --git a/airbyte-workers/src/test/java/io/airbyte/workers/protocols/airbyte/DefaultAirbyteDestinationTest.java b/airbyte-workers/src/test/java/io/airbyte/workers/protocols/airbyte/DefaultAirbyteDestinationTest.java index 52d0b958fa7e5..4e4af48ae9c50 100644 --- a/airbyte-workers/src/test/java/io/airbyte/workers/protocols/airbyte/DefaultAirbyteDestinationTest.java +++ b/airbyte-workers/src/test/java/io/airbyte/workers/protocols/airbyte/DefaultAirbyteDestinationTest.java @@ -18,8 +18,10 @@ import io.airbyte.commons.io.IOs; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.logging.LoggingHelper.Color; +import io.airbyte.config.Configs.WorkerEnvironment; import io.airbyte.config.WorkerDestinationConfig; import io.airbyte.config.helpers.LogClientSingleton; +import io.airbyte.config.helpers.LogConfiguration; import io.airbyte.protocol.models.AirbyteMessage; import io.airbyte.workers.TestConfigHelpers; import io.airbyte.workers.WorkerConstants; @@ -60,7 +62,7 @@ class DefaultAirbyteDestinationTest { static { try { logJobRoot = Files.createTempDirectory(Path.of("/tmp"), "mdc_test"); - LogClientSingleton.setJobMdc(logJobRoot); + LogClientSingleton.getInstance().setJobMdc(WorkerEnvironment.DOCKER, LogConfiguration.EMPTY, logJobRoot); } catch (final IOException e) { e.printStackTrace(); } diff --git a/airbyte-workers/src/test/java/io/airbyte/workers/protocols/airbyte/DefaultAirbyteSourceTest.java b/airbyte-workers/src/test/java/io/airbyte/workers/protocols/airbyte/DefaultAirbyteSourceTest.java index 412f4ec3c3963..3d4bd7ae13fc6 100644 --- a/airbyte-workers/src/test/java/io/airbyte/workers/protocols/airbyte/DefaultAirbyteSourceTest.java +++ b/airbyte-workers/src/test/java/io/airbyte/workers/protocols/airbyte/DefaultAirbyteSourceTest.java @@ -20,9 +20,11 @@ import io.airbyte.commons.io.IOs; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.logging.LoggingHelper.Color; +import io.airbyte.config.Configs.WorkerEnvironment; import io.airbyte.config.State; import io.airbyte.config.WorkerSourceConfig; import io.airbyte.config.helpers.LogClientSingleton; +import io.airbyte.config.helpers.LogConfiguration; import io.airbyte.protocol.models.AirbyteMessage; import io.airbyte.protocol.models.CatalogHelpers; import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; @@ -76,7 +78,7 @@ class DefaultAirbyteSourceTest { static { try { logJobRoot = Files.createTempDirectory(Path.of("/tmp"), "mdc_test"); - LogClientSingleton.setJobMdc(logJobRoot); + LogClientSingleton.getInstance().setJobMdc(WorkerEnvironment.DOCKER, LogConfiguration.EMPTY, logJobRoot); } catch (final IOException e) { e.printStackTrace(); } @@ -110,7 +112,7 @@ public void setup() throws IOException, WorkerException { streamFactory = noop -> MESSAGES.stream(); - LogClientSingleton.setJobMdc(logJobRoot); + LogClientSingleton.getInstance().setJobMdc(WorkerEnvironment.DOCKER, LogConfiguration.EMPTY, logJobRoot); } @AfterEach diff --git a/airbyte-workers/src/test/java/io/airbyte/workers/protocols/airbyte/DefaultAirbyteStreamFactoryTest.java b/airbyte-workers/src/test/java/io/airbyte/workers/protocols/airbyte/DefaultAirbyteStreamFactoryTest.java index 596f9e0f11f53..d990d719778b1 100644 --- a/airbyte-workers/src/test/java/io/airbyte/workers/protocols/airbyte/DefaultAirbyteStreamFactoryTest.java +++ b/airbyte-workers/src/test/java/io/airbyte/workers/protocols/airbyte/DefaultAirbyteStreamFactoryTest.java @@ -121,7 +121,7 @@ public void testMissingNewLineBetweenValidRecords() { private Stream stringToMessageStream(final String inputString) { final InputStream inputStream = new ByteArrayInputStream(inputString.getBytes()); final BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream)); - return new DefaultAirbyteStreamFactory(protocolPredicate, logger, new Builder().build()).create(bufferedReader); + return new DefaultAirbyteStreamFactory(protocolPredicate, logger, new Builder()).create(bufferedReader); } } diff --git a/airbyte-workers/src/test/java/io/airbyte/workers/temporal/TemporalAttemptExecutionTest.java b/airbyte-workers/src/test/java/io/airbyte/workers/temporal/TemporalAttemptExecutionTest.java index de7e5827a2e8d..6a6c5e2019828 100644 --- a/airbyte-workers/src/test/java/io/airbyte/workers/temporal/TemporalAttemptExecutionTest.java +++ b/airbyte-workers/src/test/java/io/airbyte/workers/temporal/TemporalAttemptExecutionTest.java @@ -73,12 +73,15 @@ void setup() throws IOException { attemptExecution = new TemporalAttemptExecution<>( workspaceRoot, + configs.getWorkerEnvironment(), configs.getLogConfigs(), JOB_RUN_CONFIG, execution, () -> "", mdcSetter, mock(CancellationHandler.class), - () -> "workflow_id", - configs); + SOURCE_USERNAME, + SOURCE_PASSWORD, + container.getJdbcUrl(), + () -> "workflow_id", configs.getAirbyteVersionOrWarning()); } @AfterAll diff --git a/build.gradle b/build.gradle index 271b27c23feb4..e45384dd0a55a 100644 --- a/build.gradle +++ b/build.gradle @@ -73,7 +73,8 @@ def createSpotlessTarget = { pattern -> 'normalization_test_output', 'tools', 'secrets', - 'charts' // Helm charts often have injected template strings that will fail general linting. Helm linting is done separately. + 'charts', // Helm charts often have injected template strings that will fail general linting. Helm linting is done separately. + 'resources/seed/*_specs.yaml' ] if (System.getenv().containsKey("SUB_BUILD")) { @@ -111,6 +112,26 @@ spotless { } check.dependsOn 'spotlessApply' +@SuppressWarnings('GroovyAssignabilityCheck') +def Task getDockerBuildTask(String artifactName, String projectDir) { + return task ("buildDockerImage-$artifactName" (type: DockerBuildImage) { + def buildTag = System.getenv('VERSION') ?: 'dev' + def buildPlatform = System.getenv('DOCKER_BUILD_PLATFORM') ?: 'linux/amd64' + def alpineImage = System.getenv('ALPINE_IMAGE') ?: 'alpine:3.4' + def postgresImage = System.getenv('POSTGRES_IMAGE') ?: 'postgres:13-alpine' + def jdkVersion = System.getenv('JDK_VERSION') ?: '14.0.2' + def buildArch = System.getenv('DOCKER_BUILD_ARCH') ?: 'amd64' + + inputDir = file("$projectDir/build/docker") + platform = buildPlatform + images.add("airbyte/$artifactName:$buildTag") + buildArgs.put('JDK_VERSION', jdkVersion) + buildArgs.put('DOCKER_BUILD_ARCH', buildArch) + buildArgs.put('ALPINE_IMAGE', alpineImage) + buildArgs.put('POSTGRES_IMAGE', postgresImage) + }) +} + allprojects { apply plugin: 'base' diff --git a/charts/airbyte/Chart.yaml b/charts/airbyte/Chart.yaml index f41f0b93bd520..4eaad31fb81c9 100644 --- a/charts/airbyte/Chart.yaml +++ b/charts/airbyte/Chart.yaml @@ -21,7 +21,7 @@ version: 0.3.0 # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "0.30.25-alpha" +appVersion: "0.30.34-alpha" dependencies: - name: common diff --git a/charts/airbyte/README.md b/charts/airbyte/README.md index 7e94fbf812a48..fe00d1305495e 100644 --- a/charts/airbyte/README.md +++ b/charts/airbyte/README.md @@ -29,7 +29,7 @@ | `webapp.replicaCount` | Number of webapp replicas | `1` | | `webapp.image.repository` | The repository to use for the airbyte webapp image. | `airbyte/webapp` | | `webapp.image.pullPolicy` | the pull policy to use for the airbyte webapp image | `IfNotPresent` | -| `webapp.image.tag` | The airbyte webapp image tag. Defaults to the chart's AppVersion | `0.30.25-alpha` | +| `webapp.image.tag` | The airbyte webapp image tag. Defaults to the chart's AppVersion | `0.30.34-alpha` | | `webapp.podAnnotations` | Add extra annotations to the webapp pod(s) | `{}` | | `webapp.service.type` | The service type to use for the webapp service | `ClusterIP` | | `webapp.service.port` | The service port to expose the webapp on | `80` | @@ -56,7 +56,7 @@ | `scheduler.replicaCount` | Number of scheduler replicas | `1` | | `scheduler.image.repository` | The repository to use for the airbyte scheduler image. | `airbyte/scheduler` | | `scheduler.image.pullPolicy` | the pull policy to use for the airbyte scheduler image | `IfNotPresent` | -| `scheduler.image.tag` | The airbyte scheduler image tag. Defaults to the chart's AppVersion | `0.30.25-alpha` | +| `scheduler.image.tag` | The airbyte scheduler image tag. Defaults to the chart's AppVersion | `0.30.34-alpha` | | `scheduler.podAnnotations` | Add extra annotations to the scheduler pod | `{}` | | `scheduler.resources.limits` | The resources limits for the scheduler container | `{}` | | `scheduler.resources.requests` | The requested resources for the scheduler container | `{}` | @@ -87,7 +87,7 @@ | `server.replicaCount` | Number of server replicas | `1` | | `server.image.repository` | The repository to use for the airbyte server image. | `airbyte/server` | | `server.image.pullPolicy` | the pull policy to use for the airbyte server image | `IfNotPresent` | -| `server.image.tag` | The airbyte server image tag. Defaults to the chart's AppVersion | `0.30.25-alpha` | +| `server.image.tag` | The airbyte server image tag. Defaults to the chart's AppVersion | `0.30.34-alpha` | | `server.podAnnotations` | Add extra annotations to the server pod | `{}` | | `server.livenessProbe.enabled` | Enable livenessProbe on the server | `true` | | `server.livenessProbe.initialDelaySeconds` | Initial delay seconds for livenessProbe | `30` | @@ -121,7 +121,7 @@ | `worker.replicaCount` | Number of worker replicas | `1` | | `worker.image.repository` | The repository to use for the airbyte worker image. | `airbyte/worker` | | `worker.image.pullPolicy` | the pull policy to use for the airbyte worker image | `IfNotPresent` | -| `worker.image.tag` | The airbyte worker image tag. Defaults to the chart's AppVersion | `0.30.25-alpha` | +| `worker.image.tag` | The airbyte worker image tag. Defaults to the chart's AppVersion | `0.30.34-alpha` | | `worker.podAnnotations` | Add extra annotations to the worker pod(s) | `{}` | | `worker.livenessProbe.enabled` | Enable livenessProbe on the worker | `true` | | `worker.livenessProbe.initialDelaySeconds` | Initial delay seconds for livenessProbe | `30` | diff --git a/charts/airbyte/values.yaml b/charts/airbyte/values.yaml index fe01794f85b92..674a5d1c3a699 100644 --- a/charts/airbyte/values.yaml +++ b/charts/airbyte/values.yaml @@ -44,7 +44,7 @@ webapp: image: repository: airbyte/webapp pullPolicy: IfNotPresent - tag: 0.30.25-alpha + tag: 0.30.34-alpha ## @param webapp.podAnnotations [object] Add extra annotations to the webapp pod(s) ## @@ -141,7 +141,7 @@ scheduler: image: repository: airbyte/scheduler pullPolicy: IfNotPresent - tag: 0.30.25-alpha + tag: 0.30.34-alpha ## @param scheduler.podAnnotations [object] Add extra annotations to the scheduler pod ## @@ -248,7 +248,7 @@ server: image: repository: airbyte/server pullPolicy: IfNotPresent - tag: 0.30.25-alpha + tag: 0.30.34-alpha ## @param server.podAnnotations [object] Add extra annotations to the server pod ## @@ -360,7 +360,7 @@ worker: image: repository: airbyte/worker pullPolicy: IfNotPresent - tag: 0.30.25-alpha + tag: 0.30.34-alpha ## @param worker.podAnnotations [object] Add extra annotations to the worker pod(s) ## diff --git a/docker-compose.build-m1.yaml b/docker-compose.build-m1.yaml index 71e8dc4f6d5e1..2dc62df2c91cb 100644 --- a/docker-compose.build-m1.yaml +++ b/docker-compose.build-m1.yaml @@ -41,7 +41,7 @@ services: build: dockerfile: Dockerfile args: - ARCH: ${DOCKER_BUILD_ARCH} + DOCKER_BUILD_ARCH: ${DOCKER_BUILD_ARCH} JDK_VERSION: ${JDK_VERSION} context: airbyte-workers labels: diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 8309718c31801..f3f371e432011 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -112,6 +112,7 @@ * [Salesforce](integrations/sources/salesforce.md) * [SalesLoft](integrations/sources/salesloft.md) * [Sendgrid](integrations/sources/sendgrid.md) + * [Sentry](integrations/sources/sentry.md) * [Shopify](integrations/sources/shopify.md) * [Shortio](integrations/sources/shortio.md) * [Slack](integrations/sources/slack.md) @@ -158,9 +159,11 @@ * [MySQL](integrations/destinations/mysql.md) * [Oracle DB](integrations/destinations/oracle.md) * [Postgres](integrations/destinations/postgres.md) + * [Pulsar](integrations/destinations/pulsar.md) * [Redshift](integrations/destinations/redshift.md) * [S3](integrations/destinations/s3.md) * [Snowflake](integrations/destinations/snowflake.md) + * [Cassandra](integrations/destinations/cassandra.md) * [Custom or New Connector](integrations/custom-connectors.md) * [Connector Development](connector-development/README.md) * [Tutorials](connector-development/tutorials/README.md) @@ -219,6 +222,7 @@ * [Technical Stack](understanding-airbyte/tech-stack.md) * [Change Data Capture \(CDC\)](understanding-airbyte/cdc.md) * [Namespaces](understanding-airbyte/namespaces.md) + * [Json to Avro Conversion](understanding-airbyte/json-avro-conversion.md) * [Glossary of Terms](understanding-airbyte/glossary.md) * [API documentation](api-documentation.md) * [Project Overview](project-overview/README.md) diff --git a/docs/connector-development/README.md b/docs/connector-development/README.md index fe8ce35eb402c..ebf9705e57843 100644 --- a/docs/connector-development/README.md +++ b/docs/connector-development/README.md @@ -107,14 +107,8 @@ The steps for updating an existing connector are the same as for building a new Once you've finished iterating on the changes to a connector as specified in its `README.md`, follow these instructions to ship the new version of the connector with Airbyte out of the box. 1. Bump the version in the `Dockerfile` of the connector \(`LABEL io.airbyte.version=X.X.X`\). -2. Update the connector definition in the Airbyte connector index to use the new version: - * `airbyte-config/init/src/main/resources/seed/source_definitions.yaml` if it is a source - * `airbyte-config/init/src/main/resources/seed/destination_definitions.yaml` if it is a destination. -3. Update the connector JSON definition. To find the appropriate JSON file to update, find a JSON file `.json` where the UUID portion is the ID specified in the YAML file you modified in step 2. The relevant directories are: - * `airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/.json` for sources - * `airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/.json` for destinations -4. Submit a PR containing the changes you made. -5. One of Airbyte maintainers will review the change and publish the new version of the connector to Docker hub. Triggering tests and publishing connectors can be done by leaving a comment on the PR with the following format \(the PR must be from the Airbyte repo, not a fork\): +2. Submit a PR containing the changes you made. +3. One of Airbyte maintainers will review the change and publish the new version of the connector to Docker hub. Triggering tests and publishing connectors can be done by leaving a comment on the PR with the following format \(the PR must be from the Airbyte repo, not a fork\): ```text # to run integration tests for the connector @@ -125,8 +119,13 @@ Once you've finished iterating on the changes to a connector as specified in its # Example: /publish connector=connectors/source-hubspot /publish connector=(connectors|bases)/ ``` - -6. The new version of the connector is now available for everyone who uses it. Thank you! +4. Update the connector definition in the Airbyte connector index to use the new version: + * `airbyte-config/init/src/main/resources/seed/source_definitions.yaml` if it is a source + * `airbyte-config/init/src/main/resources/seed/destination_definitions.yaml` if it is a destination. + + Then rebuild the platform to generate the seed spec yaml files, and commit the changes to the PR. See [this readme](https://github.com/airbytehq/airbyte/tree/a534bb2a8f29b20e3cc7c52fef1bc3c34783695d/airbyte-config/specs) for more information. + +5. The new version of the connector is now available for everyone who uses it. Thank you! ## Using credentials in CI diff --git a/docs/contributing-to-airbyte/developing-locally.md b/docs/contributing-to-airbyte/developing-locally.md index bfaa11d6283c2..ab7e4329e75ac 100644 --- a/docs/contributing-to-airbyte/developing-locally.md +++ b/docs/contributing-to-airbyte/developing-locally.md @@ -40,6 +40,8 @@ some additional environment variables: ```bash export DOCKER_BUILD_PLATFORM=linux/arm64 export DOCKER_BUILD_ARCH=arm64 +export ALPINE_IMAGE=arm64v8/alpine:3.14 +export POSTGRES_IMAGE=arm64v8/postgres:13-alpine export JDK_VERSION=17 export NODE_VERSION=16.11.1 SUB_BUILD=PLATFORM ./gradlew build diff --git a/docs/deploying-airbyte/on-kubernetes.md b/docs/deploying-airbyte/on-kubernetes.md index eece6c1f7ad41..48b3ee793d262 100644 --- a/docs/deploying-airbyte/on-kubernetes.md +++ b/docs/deploying-airbyte/on-kubernetes.md @@ -10,8 +10,8 @@ Airbyte allows scaling sync workloads horizontally using Kubernetes. The core co For local testing we recommend following one of the following setup guides: -* [Docker Desktop \(Mac\)](https://docs.docker.com/desktop/kubernetes/) -* [Minikube](https://minikube.sigs.k8s.io/docs/start/) +* [Docker Desktop \(Mac\)](https://docs.docker.com/desktop/kubernetes) +* [Minikube](https://minikube.sigs.k8s.io/docs/start) * NOTE: Start Minikube with at least 4gb RAM with `minikube start --memory=4000` * [Kind](https://kind.sigs.k8s.io/docs/user/quick-start/) diff --git a/docs/integrations/README.md b/docs/integrations/README.md index 4b6086e77f16d..51a7d00939997 100644 --- a/docs/integrations/README.md +++ b/docs/integrations/README.md @@ -96,6 +96,7 @@ Airbyte uses a grading system for connectors to help users understand what to ex | [Salesloft](./sources/salesloft.md)| Alpha | | [SAP Business One](sources/sap-business-one.md) | Beta | | [Sendgrid](sources/sendgrid.md) | Certified | +| [Sentry](sources/sentry.md) | Alpha | | [Shopify](sources/shopify.md) | Certified | | [Short.io](sources/shortio.md) | Beta | | [Slack](sources/slack.md) | Beta | @@ -142,8 +143,10 @@ Airbyte uses a grading system for connectors to help users understand what to ex | [MySQL](destinations/mysql.md) | Beta | | [Oracle](destinations/oracle.md) | Alpha | | [Postgres](destinations/postgres.md) | Certified | +| [Pulsar](destinations/pulsar.md) | Alpha | | [Redshift](destinations/redshift.md) | Certified | | [S3](destinations/s3.md) | Certified | | [SQL Server \(MSSQL\)](destinations/mssql.md) | Alpha | | [Snowflake](destinations/snowflake.md) | Certified | +| [Cassandra](destinations/cassandra.md) | Alpha | diff --git a/docs/integrations/destinations/bigquery.md b/docs/integrations/destinations/bigquery.md index 3b691446003a4..ede3b62ab2336 100644 --- a/docs/integrations/destinations/bigquery.md +++ b/docs/integrations/destinations/bigquery.md @@ -169,6 +169,7 @@ Therefore, Airbyte BigQuery destination will convert any invalid characters into | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.1.8 | 2021-10-27 | [\#7413](https://github.com/airbytehq/airbyte/issues/7413) | Fixed DATETIME conversion for BigQuery | | 0.1.7 | 2021-10-26 | [\#7240](https://github.com/airbytehq/airbyte/issues/7240) | Output partitioned/clustered tables | | 0.1.6 | 2021-09-16 | [\#6145](https://github.com/airbytehq/airbyte/pull/6145) | BigQuery Denormalized support for date, datetime & timestamp types through the json "format" key | | 0.1.5 | 2021-09-07 | [\#5881](https://github.com/airbytehq/airbyte/pull/5881) | BigQuery Denormalized NPE fix | diff --git a/docs/integrations/destinations/cassandra.md b/docs/integrations/destinations/cassandra.md new file mode 100644 index 0000000000000..2280daf9da0ab --- /dev/null +++ b/docs/integrations/destinations/cassandra.md @@ -0,0 +1,49 @@ +# Cassandra + +## Sync overview + +### Output schema + +The incoming airbyte data is structured in keyspaces and tables and is partitioned and replicated across different nodes +in the cluster. This connector maps an incoming `stream` to a Cassandra `table` and a `namespace` to a +Cassandra`keyspace`. Fields in the airbyte message become different columns in the Cassandra tables. Each table will +contain the following columns. + +* `_airbyte_ab_id`: A random uuid generator to be used as a partition key. +* `_airbyte_emitted_at`: a timestamp representing when the event was received from the data source. +* `_airbyte_data`: a json text representing the extracted data. + +### Features + +| Feature | Support | Notes | +| :--- | :---: | :--- | +| Full Refresh Sync | ✅ | Warning: this mode deletes all previously synced data in the configured DynamoDB table. | +| Incremental - Append Sync | ✅ | | +| Incremental - Deduped History | ❌ | As this connector does not support dbt, we don't support this sync mode on this destination. | +| Namespaces | ✅ | Namespace will be used as part of the table name. | + + + +### Performance considerations + +Cassandra is designed to handle large amounts of data by using different nodes in the cluster in order to perform write +operations. As long as you have enough nodes in the cluster the database can scale infinitely and handle any amount of +data from the connector. + +## Getting started + +### Requirements + +* The driver is compatible with _Cassandra >= 2.1_ +* Configuration + * Keyspace [default keyspace to use when writing data] + * Username [authentication username] + * Password [authentication password] + * Address [cluster address] + * Port [default: 9042] + * Datacenter [optional] [default: datacenter1] + * Replication [optional] [default: 1] + +### Setup guide + +######TODO: more info, screenshots?, etc... diff --git a/docs/integrations/destinations/databricks.md b/docs/integrations/destinations/databricks.md index b10eb2254db42..e14fd99fe7fde 100644 --- a/docs/integrations/destinations/databricks.md +++ b/docs/integrations/destinations/databricks.md @@ -89,7 +89,7 @@ Each table will have the following columns: | `_airbyte_emitted_at` | timestamp | Data emission timestamp. | | Data fields from the source stream | various | All fields in the staging Parquet files will be expanded in the table. | -Learn how source data is converted to Parquet and the current limitations [here](https://docs.airbyte.io/integrations/destinations/s3#data-schema). +Under the hood, an Airbyte data stream in Json schema is first converted to an Avro schema, then the Json object is converted to an Avro record, and finally the Avro record is outputted to the Parquet format. Because the data stream can come from any data source, the Json to Avro conversion process has arbitrary rules and limitations. Learn more about how source data is converted to Avro and the current limitations [here](https://docs.airbyte.io/understanding-airbyte/json-avro-conversion). ## Getting started @@ -103,6 +103,6 @@ Learn how source data is converted to Parquet and the current limitations [here] | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.1.2 | 2021-11-03 | [\#7288](https://github.com/airbytehq/airbyte/issues/7288) | Support Json `additionalProperties`. | | 0.1.1 | 2021-10-05 | [\#6792](https://github.com/airbytehq/airbyte/pull/6792) | Require users to accept Databricks JDBC Driver [Terms & Conditions](https://databricks.com/jdbc-odbc-driver-license). | | 0.1.0 | 2021-09-14 | [\#5998](https://github.com/airbytehq/airbyte/pull/5998) | Initial private release. | - diff --git a/docs/integrations/destinations/gcs.md b/docs/integrations/destinations/gcs.md index d0f4806f40c24..8feba67751575 100644 --- a/docs/integrations/destinations/gcs.md +++ b/docs/integrations/destinations/gcs.md @@ -98,157 +98,7 @@ Here is the available compression codecs: #### Data schema -Under the hood, an Airbyte data stream in Json schema is converted to an Avro schema, and then the Json object is converted to an Avro record based on the Avro schema. Because the data stream can come from any data source, the Avro S3 destination connector has the following arbitrary rules. - -1. Json schema types are mapped to Avro typea as follows: - -| Json Data Type | Avro Data Type | -| :---: | :---: | -| string | string | -| number | double | -| integer | int | -| boolean | boolean | -| null | null | -| object | record | -| array | array | - -1. Built-in Json schema formats are not mapped to Avro logical types at this moment. -2. Combined restrictions \("allOf", "anyOf", and "oneOf"\) will be converted to type unions. The corresponding Avro schema can be less stringent. For example, the following Json schema - - ```javascript - { - "oneOf": [ - { "type": "string" }, - { "type": "integer" } - ] - } - ``` - - will become this in Avro schema: - - ```javascript - { - "type": ["null", "string", "int"] - } - ``` - -3. Keyword `not` is not supported, as there is no equivalent validation mechanism in Avro schema. -4. Only alphanumeric characters and underscores \(`/a-zA-Z0-9_/`\) are allowed in a stream or field name. Any special character will be converted to an alphabet or underscore. For example, `spécial:character_names` will become `special_character_names`. The original names will be stored in the `doc` property in this format: `_airbyte_original_name:`. -5. All field will be nullable. For example, a `string` Json field will be typed as `["null", "string"]` in Avro. This is necessary because the incoming data stream may have optional fields. -6. For array fields in Json schema, when the `items` property is an array, it means that each element in the array should follow its own schema sequentially. For example, the following specification means the first item in the array should be a string, and the second a number. - - ```javascript - { - "array_field": { - "type": "array", - "items": [ - { "type": "string" }, - { "type": "number" } - ] - } - } - ``` - -This is not supported in Avro schema. As a compromise, the converter creates a union, \["string", "number"\], which is less stringent: - -```javascript - { - "name": "array_field", - "type": [ - "null", - { - "type": "array", - "items": ["null", "string"] - } - ], - "default": null - } -``` - -1. Two Airbyte specific fields will be added to each Avro record: - -| Field | Schema | Document | -| :--- | :--- | :---: | -| `_airbyte_ab_id` | `uuid` | [link](http://avro.apache.org/docs/current/spec.html#UUID) | -| `_airbyte_emitted_at` | `timestamp-millis` | [link](http://avro.apache.org/docs/current/spec.html#Timestamp+%28millisecond+precision%29) | - -1. Currently `additionalProperties` is not supported. This means if the source is schemaless \(e.g. Mongo\), or has flexible fields, they will be ignored. We will have a solution soon. Feel free to submit a new issue if this is blocking for you. - -For example, given the following Json schema: - -```javascript -{ - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "properties": { - "id": { - "type": "integer" - }, - "user": { - "type": ["null", "object"], - "properties": { - "id": { - "type": "integer" - }, - "field_with_spécial_character": { - "type": "integer" - } - } - }, - "created_at": { - "type": ["null", "string"], - "format": "date-time" - } - } -} -``` - -Its corresponding Avro schema will be: - -```javascript -{ - "name" : "stream_name", - "type" : "record", - "fields" : [ { - "name" : "_airbyte_ab_id", - "type" : { - "type" : "string", - "logicalType" : "uuid" - } - }, { - "name" : "_airbyte_emitted_at", - "type" : { - "type" : "long", - "logicalType" : "timestamp-millis" - } - }, { - "name" : "id", - "type" : [ "null", "int" ], - "default" : null - }, { - "name" : "user", - "type" : [ "null", { - "type" : "record", - "name" : "user", - "fields" : [ { - "name" : "id", - "type" : [ "null", "int" ], - "default" : null - }, { - "name" : "field_with_special_character", - "type" : [ "null", "int" ], - "doc" : "_airbyte_original_name:field_with_spécial_character", - "default" : null - } ] - } ], - "default" : null - }, { - "name" : "created_at", - "type" : [ "null", "string" ], - "default" : null - } ] -} -``` +Under the hood, an Airbyte data stream in Json schema is first converted to an Avro schema, then the Json object is converted to an Avro record. Because the data stream can come from any data source, the Json to Avro conversion process has arbitrary rules and limitations. Learn more about how source data is converted to Avro and the current limitations [here](https://docs.airbyte.io/understanding-airbyte/json-avro-conversion). ### CSV @@ -263,7 +113,7 @@ Like most of the other Airbyte destination connectors, usually the output has th For example, given the following json object from a source: -```javascript +```json { "user_id": 123, "name": { @@ -289,7 +139,7 @@ With root level normalization, the output CSV is: [Json Lines](https://jsonlines.org/) is a text format with one JSON per line. Each line has a structure as follows: -```javascript +```json { "_airbyte_ab_id": "", "_airbyte_emitted_at": "", @@ -299,7 +149,7 @@ With root level normalization, the output CSV is: For example, given the following two json objects from a source: -```javascript +```json [ { "user_id": 123, @@ -344,7 +194,7 @@ These parameters are related to the `ParquetOutputFormat`. See the [Java doc](ht #### Data schema -Under the hood, an Airbyte data stream in Json schema is first converted to an Avro schema, then the Json object is converted to an Avro record, and finally the Avro record is outputted to the Parquet format. See the `Data schema` section from the [Avro output](gcs.md#avro) for rules and limitations. +Under the hood, an Airbyte data stream in Json schema is first converted to an Avro schema, then the Json object is converted to an Avro record, and finally the Avro record is outputted to the Parquet format. Because the data stream can come from any data source, the Json to Avro conversion process has arbitrary rules and limitations. Learn more about how source data is converted to Avro and the current limitations [here](https://docs.airbyte.io/understanding-airbyte/json-avro-conversion). ## Getting started @@ -372,7 +222,7 @@ Under the hood, an Airbyte data stream in Json schema is first converted to an A | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.1.13 | 2021-11-03 | [\#7288](https://github.com/airbytehq/airbyte/issues/7288) | Support Json `additionalProperties`. | | 0.1.2 | 2021-09-12 | [\#5720](https://github.com/airbytehq/airbyte/issues/5720) | Added configurable block size for stream. Each stream is limited to 10,000 by GCS | | 0.1.1 | 2021-08-26 | [\#5296](https://github.com/airbytehq/airbyte/issues/5296) | Added storing gcsCsvFileLocation property for CSV format. This is used by destination-bigquery \(GCS Staging upload type\) | | 0.1.0 | 2021-07-16 | [\#4329](https://github.com/airbytehq/airbyte/pull/4784) | Initial release. | - diff --git a/docs/integrations/destinations/pulsar.md b/docs/integrations/destinations/pulsar.md new file mode 100644 index 0000000000000..c6279745c7ecd --- /dev/null +++ b/docs/integrations/destinations/pulsar.md @@ -0,0 +1,87 @@ +# Pulsar + +## Overview + +The Airbyte Pulsar destination allows you to sync data to Pulsar. Each stream is written to the corresponding Pulsar topic. + +### Sync overview + +#### Output schema + +Each stream will be output into a Pulsar topic. + +Currently, this connector only writes data with JSON format. More formats \(e.g. Apache Avro\) will be supported in the future. + +Each record will contain in its key the uuid assigned by Airbyte, and in the value these 3 fields: + +* `_airbyte_ab_id`: a uuid assigned by Airbyte to each event that is processed. +* `_airbyte_emitted_at`: a timestamp representing when the event was pulled from the data source. +* `_airbyte_data`: a json blob representing with the event data encoded in base64 . +* `_airbyte_stream`: the name of each record's stream. + +#### Features + +| Feature | Supported?\(Yes/No\) | Notes | +| :--- | :--- | :--- | +| Full Refresh Sync | No | | +| Incremental - Append Sync | Yes | | +| Incremental - Deduped History | No | As this connector does not support dbt, we don't support this sync mode on this destination. | +| Namespaces | Yes | | + +## Getting started + +### Requirements + +To use the Pulsar destination, you'll need: + +* A Pulsar cluster 2.8 or above. + +### Setup guide + +#### Network Access + +Make sure your Pulsar brokers can be accessed by Airbyte. + +#### **Permissions** + +Airbyte should be allowed to write messages into topics, and these topics should be created before writing into Pulsar or, at least, enable the configuration in the brokers `allowAutoTopicCreation` \(which is not recommended for production environments\). + +Note that if you choose to use dynamic topic names, you will probably need to enable `allowAutoTopicCreation` to avoid your connection failing if there was an update to the source connector's schema. Otherwise a hardcoded topic name may be best. + +Also, notice that the messages will be sent to topics based on the configured Pulsar `topic_tenant` and `topic_namespace` configs with their `topic_type`. + +#### Target topics + +You can determine the topics to which messages are written via the `topic_pattern` configuration parameter in its corresponding Pulsar `topic_tenant`-`topic_namespace`. Messages can be written to either a hardcoded, pre-defined topic, or dynamically written to different topics based on the [namespace](https://docs.airbyte.io/understanding-airbyte/namespaces) or stream they came from. + +To write all messages to a single hardcoded topic, enter its name in the `topic_pattern` field e.g: setting `topic_pattern` to `my-topic-name` will write all messages from all streams and namespaces to that topic. + +To define the output topics dynamically, you can leverage the `{namespace}` and `{stream}` pattern variables, which cause messages to be written to different topics based on the values present when producing the records. For example, setting the `topic_pattern` parameter to `airbyte_syncs/{namespace}/{stream}` means that messages from namespace `n1` and stream `s1` will get written to the topic `airbyte_syncs/n1/s1`, and messages from `s2` to `airbyte_syncs/n1/s2` etc. + +If you define output topic dynamically, you might want to enable `allowAutoTopicCreation` to avoid your connection failing if there was an update to the source connector's schema. Otherwise, you'll need to manually create topics in Pulsar as they are added/updated in the source, which is the recommended option for production environments. + +**NOTICE**: a naming convention transformation will be applied to the target topic name using the `StandardNameTransformer` so that some special characters will be replaced. + +### Setup the Pulsar destination in Airbyte + +You should now have all the requirements needed to configure Pulsar as a destination in the UI. You can configure the following parameters on the Pulsar destination \(though many of these are optional or have default values\): + +* **Pulsar brokers** +* **Use TLS** +* **Topic type** +* **Topic tenant** +* **Topic namespace** +* **Topic pattern** +* **Test topic** +* **Producer name** +* **Sync producer** +* **Compression type** +* **Message send timeout** +* **Max pending messages** +* **Max pending messages across partitions** +* **Enable batching** +* **Batching max messages** +* **Batching max publish delay** +* **Block if queue is full** + +More info about this can be found in the [Pulsar producer configs documentation site](https://pulsar.apache.org/docs/en/client-libraries-java/#producer). diff --git a/docs/integrations/destinations/s3.md b/docs/integrations/destinations/s3.md index ab811c48bf0c8..d85654362606c 100644 --- a/docs/integrations/destinations/s3.md +++ b/docs/integrations/destinations/s3.md @@ -97,158 +97,7 @@ Here is the available compression codecs: #### Data schema -Under the hood, an Airbyte data stream in Json schema is converted to an Avro schema, and then the Json object is converted to an Avro record based on the Avro schema. Because the data stream can come from any data source, the Avro S3 destination connector has the following arbitrary rules. - -1. Json schema types are mapped to Avro types as follows: - - | Json Data Type | Avro Data Type | - | :---: | :---: | - | string | string | - | number | double | - | integer | int | - | boolean | boolean | - | null | null | - | object | record | - | array | array | - -2. Built-in Json schema formats are not mapped to Avro logical types at this moment. -3. Combined restrictions \("allOf", "anyOf", and "oneOf"\) will be converted to type unions. The corresponding Avro schema can be less stringent. For example, the following Json schema - - ```javascript - { - "oneOf": [ - { "type": "string" }, - { "type": "integer" } - ] - } - ``` - - will become this in Avro schema: - - ```javascript - { - "type": ["null", "string", "int"] - } - ``` - -4. Keyword `not` is not supported, as there is no equivalent validation mechanism in Avro schema. -5. Only alphanumeric characters and underscores \(`/a-zA-Z0-9_/`\) are allowed in a stream or field name. Any special character will be converted to an alphabet or underscore. For example, `spécial:character_names` will become `special_character_names`. The original names will be stored in the `doc` property in this format: `_airbyte_original_name:`. -6. The field name cannot start with a number, so an underscore will be added to the field name at the beginning. -7. All field will be nullable. For example, a `string` Json field will be typed as `["null", "string"]` in Avro. This is necessary because the incoming data stream may have optional fields. -8. For array fields in Json schema, when the `items` property is an array, it means that each element in the array should follow its own schema sequentially. For example, the following specification means the first item in the array should be a string, and the second a number. - - ```javascript - { - "array_field": { - "type": "array", - "items": [ - { "type": "string" }, - { "type": "number" } - ] - } - } - ``` - - This is not supported in Avro schema. As a compromise, the converter creates a union, \["string", "number"\], which is less stringent: - - ```javascript - { - "name": "array_field", - "type": [ - "null", - { - "type": "array", - "items": ["null", "string"] - } - ], - "default": null - } - ``` - -9. Two Airbyte specific fields will be added to each Avro record: - - | Field | Schema | Document | - | :--- | :--- | :---: | - | `_airbyte_ab_id` | `uuid` | [link](http://avro.apache.org/docs/current/spec.html#UUID) | - | `_airbyte_emitted_at` | `timestamp-millis` | [link](http://avro.apache.org/docs/current/spec.html#Timestamp+%28millisecond+precision%29) | - -10. Currently `additionalProperties` is not supported. This means if the source is schemaless \(e.g. Mongo\), or has flexible fields, they will be ignored. We will have a solution soon. Feel free to submit a new issue if this is blocking for you. - -For example, given the following Json schema: - -```javascript -{ - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "properties": { - "id": { - "type": "integer" - }, - "user": { - "type": ["null", "object"], - "properties": { - "id": { - "type": "integer" - }, - "field_with_spécial_character": { - "type": "integer" - } - } - }, - "created_at": { - "type": ["null", "string"], - "format": "date-time" - } - } -} -``` - -Its corresponding Avro schema will be: - -```javascript -{ - "name" : "stream_name", - "type" : "record", - "fields" : [ { - "name" : "_airbyte_ab_id", - "type" : { - "type" : "string", - "logicalType" : "uuid" - } - }, { - "name" : "_airbyte_emitted_at", - "type" : { - "type" : "long", - "logicalType" : "timestamp-millis" - } - }, { - "name" : "id", - "type" : [ "null", "int" ], - "default" : null - }, { - "name" : "user", - "type" : [ "null", { - "type" : "record", - "name" : "user", - "fields" : [ { - "name" : "id", - "type" : [ "null", "int" ], - "default" : null - }, { - "name" : "field_with_special_character", - "type" : [ "null", "int" ], - "doc" : "_airbyte_original_name:field_with_spécial_character", - "default" : null - } ] - } ], - "default" : null - }, { - "name" : "created_at", - "type" : [ "null", "string" ], - "default" : null - } ] -} -``` +Under the hood, an Airbyte data stream in Json schema is first converted to an Avro schema, then the Json object is converted to an Avro record. Because the data stream can come from any data source, the Json to Avro conversion process has arbitrary rules and limitations. Learn more about how source data is converted to Avro and the current limitations [here](https://docs.airbyte.io/understanding-airbyte/json-avro-conversion). ### CSV @@ -263,7 +112,7 @@ Like most of the other Airbyte destination connectors, usually the output has th For example, given the following json object from a source: -```javascript +```json { "user_id": 123, "name": { @@ -289,7 +138,7 @@ With root level normalization, the output CSV is: [Json Lines](https://jsonlines.org/) is a text format with one JSON per line. Each line has a structure as follows: -```javascript +```json { "_airbyte_ab_id": "", "_airbyte_emitted_at": "", @@ -299,7 +148,7 @@ With root level normalization, the output CSV is: For example, given the following two json objects from a source: -```javascript +```json [ { "user_id": 123, @@ -344,7 +193,7 @@ These parameters are related to the `ParquetOutputFormat`. See the [Java doc](ht #### Data schema -Under the hood, an Airbyte data stream in Json schema is first converted to an Avro schema, then the Json object is converted to an Avro record, and finally the Avro record is outputted to the Parquet format. See the `Data schema` section from the [Avro output](s3.md#avro) for rules and limitations. +Under the hood, an Airbyte data stream in Json schema is first converted to an Avro schema, then the Json object is converted to an Avro record, and finally the Avro record is outputted to the Parquet format. Because the data stream can come from any data source, the Json to Avro conversion process has arbitrary rules and limitations. Learn more about how source data is converted to Avro and the current limitations [here](https://docs.airbyte.io/understanding-airbyte/json-avro-conversion). ## Getting Started \(Airbyte Open-Source / Airbyte Cloud\) @@ -375,6 +224,7 @@ Under the hood, an Airbyte data stream in Json schema is first converted to an A | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.1.13 | 2021-11-03 | [\#7288](https://github.com/airbytehq/airbyte/issues/7288) | Support Json `additionalProperties`. | | 0.1.12 | 2021-09-13 | [\#5720](https://github.com/airbytehq/airbyte/issues/5720) | Added configurable block size for stream. Each stream is limited to 10,000 by S3 | | 0.1.11 | 2021-09-10 | [\#5729](https://github.com/airbytehq/airbyte/pull/5729) | For field names that start with a digit, a `_` will be appended at the beginning for the`Parquet` and `Avro` formats. | | 0.1.10 | 2021-08-17 | [\#4699](https://github.com/airbytehq/airbyte/pull/4699) | Added json config validator | diff --git a/docs/integrations/sources/amplitude.md b/docs/integrations/sources/amplitude.md index 8ec31cf76db2f..ab694e5c6aadf 100644 --- a/docs/integrations/sources/amplitude.md +++ b/docs/integrations/sources/amplitude.md @@ -38,7 +38,7 @@ The Amplitude connector should gracefully handle Amplitude API limitations under * Amplitude Secret Key ### Setup guide - + Please read [How to get your API key and Secret key](https://help.amplitude.com/hc/en-us/articles/360058073772-Create-and-manage-organizations-and-projects#view-and-edit-your-project-information). ## Changelog diff --git a/docs/integrations/sources/facebook-marketing.md b/docs/integrations/sources/facebook-marketing.md index 82cfa1780422f..1a6ef22e57872 100644 --- a/docs/integrations/sources/facebook-marketing.md +++ b/docs/integrations/sources/facebook-marketing.md @@ -96,6 +96,7 @@ As a summary, custom insights allows to replicate only some fields, resulting in | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.2.22 | 2021-11-05 | [4864](https://github.com/airbytehq/airbyte/pull/7605) | Add job retry logics to AdsInsights stream | | 0.2.21 | 2021-10-05 | [4864](https://github.com/airbytehq/airbyte/pull/4864) | Update insights streams with custom entries for fields, breakdowns and action_breakdowns | | 0.2.20 | 2021-10-04 | [6719](https://github.com/airbytehq/airbyte/pull/6719) | Update version of facebook\_bussiness package to 12.0 | | 0.2.19 | 2021-09-30 | [6438](https://github.com/airbytehq/airbyte/pull/6438) | Annotate Oauth2 flow initialization parameters in connector specification | diff --git a/docs/integrations/sources/facebook-pages.md b/docs/integrations/sources/facebook-pages.md index 4a494e6d826b3..4336e4895255d 100644 --- a/docs/integrations/sources/facebook-pages.md +++ b/docs/integrations/sources/facebook-pages.md @@ -83,6 +83,7 @@ You can easily get the page id from the page url. For example, if you have a pag | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.1.3 | 2021-10-28 | [7440](https://github.com/airbytehq/airbyte/pull/7440) | Generate Page token from config access token | | 0.1.2 | 2021-10-18 | [7128](https://github.com/airbytehq/airbyte/pull/7128) | Upgrade Facebook API to v.12 | | 0.1.1 | 2021-09-30 | [6438](https://github.com/airbytehq/airbyte/pull/6438) | Annotate Oauth2 flow initialization parameters in connector specification | | 0.1.0 | 2021-09-01 | [5158](https://github.com/airbytehq/airbyte/pull/5158) | Initial Release | diff --git a/docs/integrations/sources/google-directory.md b/docs/integrations/sources/google-directory.md index b82e6f06f2f34..e80608000e364 100644 --- a/docs/integrations/sources/google-directory.md +++ b/docs/integrations/sources/google-directory.md @@ -35,9 +35,19 @@ This Source is capable of syncing the following Streams: This connector attempts to back off gracefully when it hits Directory API's rate limits. To find more information about limits, see [Google Directory's Limits and Quotas](https://developers.google.com/admin-sdk/directory/v1/limits) documentation. -## Getting started +## Getting Started \(Airbyte Cloud\) -### Requirements +1. Click `OAuth2.0 authorization` then `Authenticate your Google Directory account`. +2. You're done. + +## Getting Started \(Airbyte Open-Source\) + +Google APIs use the OAuth 2.0 protocol for authentication and authorization. This connector supports [Web server application](https://developers.google.com/identity/protocols/oauth2#webserver) and [Service accounts](https://developers.google.com/identity/protocols/oauth2#serviceaccount) scenarios. Therefore, there are 2 options of setting up authorization for this source: + +* Use your Google account and authorize over Google's OAuth on connection setup. Select "Default OAuth2.0 authorization" from dropdown list. +* Create service account specifically for Airbyte. + +### Service account requirements * Credentials to a Google Service Account with delegated Domain Wide Authority * Email address of the workspace admin which created the Service Account @@ -58,5 +68,8 @@ You should now be ready to use the Google Directory connector in Airbyte. | Version | Date | Pull Request | Subject | | :------ | :-------- | :----- | :------ | +| 0.1.8 | 2021-11-02 | [7409](https://github.com/airbytehq/airbyte/pull/7409) | Support oauth (update publish) | +| 0.1.7 | 2021-11-02 | [7409](https://github.com/airbytehq/airbyte/pull/7409) | Support oauth | +| 0.1.6 | 2021-11-02 | [7464](https://github.com/airbytehq/airbyte/pull/7464) | Migrate to the CDK | | 0.1.5 | 2021-10-20 | [6930](https://github.com/airbytehq/airbyte/pull/6930) | Fix crash when a group don't have members | | 0.1.4 | 2021-10-19 | [7167](https://github.com/airbytehq/airbyte/pull/7167) | Add organizations and phones to `users` schema | diff --git a/docs/integrations/sources/greenhouse.md b/docs/integrations/sources/greenhouse.md index a8b57e1459c70..cdd9dd4e7a826 100644 --- a/docs/integrations/sources/greenhouse.md +++ b/docs/integrations/sources/greenhouse.md @@ -57,5 +57,6 @@ Please follow the [Greenhouse documentation for generating an API key](https://d | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.2.6 | 2021-11-08 | [7607](https://github.com/airbytehq/airbyte/pull/7607) | Implement demographics streams support. Update SAT for demographics streams | | 0.2.5 | 2021-09-22 | [6377](https://github.com/airbytehq/airbyte/pull/6377) | Refactor the connector to use CDK. Implement additional stream support | | 0.2.4 | 2021-09-15 | [6238](https://github.com/airbytehq/airbyte/pull/6238) | added identification of accessible streams for API keys with limited permissions | diff --git a/docs/integrations/sources/hubspot.md b/docs/integrations/sources/hubspot.md index 2836f6b08ed00..f7caa93fa2e57 100644 --- a/docs/integrations/sources/hubspot.md +++ b/docs/integrations/sources/hubspot.md @@ -96,6 +96,8 @@ If you are using Oauth, most of the streams require the appropriate [scopes](htt | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.1.23 | 2021-11-08 | [7730](https://github.com/airbytehq/airbyte/pull/7730) | Fix oAuth flow schema| +| 0.1.22 | 2021-11-03 | [7562](https://github.com/airbytehq/airbyte/pull/7562) | Migrate Hubspot source to CDK structure | | 0.1.21 | 2021-10-27 | [7405](https://github.com/airbytehq/airbyte/pull/7405) | Change of package `import` from `urllib` to `urllib.parse` | | 0.1.20 | 2021-10-26 | [7393](https://github.com/airbytehq/airbyte/pull/7393) | Hotfix for `split_properties` function, add the length of separator symbol `,`(`%2C` in HTTP format) to the checking of the summary URL length | | 0.1.19 | 2021-10-26 | [6954](https://github.com/airbytehq/airbyte/pull/6954) | Fix issue with getting `414` HTTP error for streams | diff --git a/docs/integrations/sources/iterable.md b/docs/integrations/sources/iterable.md index b949e92cc722f..9b9830a8af0b9 100644 --- a/docs/integrations/sources/iterable.md +++ b/docs/integrations/sources/iterable.md @@ -51,13 +51,15 @@ The Iterable connector should not run into Iterable API limitations under normal * Iterable API Key ### Setup guide - + Please read [How to find your API key](https://support.iterable.com/hc/en-us/articles/360043464871-API-Keys-#creating-api-keys). ## CHANGELOG | Version | Date | Pull Request | Subject | | :------ | :-------- | :----- | :------ | +| `0.1.11` | 2021-11-03 | [7619](https://github.com/airbytehq/airbyte/pull/7619) | Bugfix type error while incrementally loading the `Templates` stream | +| `0.1.10` | 2021-11-03 | [7591](https://github.com/airbytehq/airbyte/pull/7591) | Optimize export streams memory consumption for large requests | | `0.1.9` | 2021-10-06 | [5915](https://github.com/airbytehq/airbyte/pull/5915) | Enable campaign_metrics stream | | `0.1.8` | 2021-09-20 | [5915](https://github.com/airbytehq/airbyte/pull/5915) | Add new streams: campaign_metrics, events | | `0.1.7` | 2021-09-20 | [6242](https://github.com/airbytehq/airbyte/pull/6242) | Updated schema for: campaigns, lists, templates, metadata | diff --git a/docs/integrations/sources/klaviyo.md b/docs/integrations/sources/klaviyo.md index f2a293e714ae1..04dffda082f58 100644 --- a/docs/integrations/sources/klaviyo.md +++ b/docs/integrations/sources/klaviyo.md @@ -44,6 +44,7 @@ The Klaviyo connector should not run into Klaviyo API limitations under normal u ### Setup guide + Please follow these [steps](https://help.klaviyo.com/hc/en-us/articles/115005062267-How-to-Manage-Your-Account-s-API-Keys#your-private-api-keys3) to obtain Private API Key for your account. ## CHANGELOG diff --git a/docs/integrations/sources/mixpanel.md b/docs/integrations/sources/mixpanel.md index 4c3e3dc1ce984..4249393b686d6 100644 --- a/docs/integrations/sources/mixpanel.md +++ b/docs/integrations/sources/mixpanel.md @@ -47,15 +47,18 @@ The Mixpanel connector should not run into Mixpanel API limitations under normal * Project region `US` or `EU` ### Setup guide - + Please read [Find API Secret](https://help.mixpanel.com/hc/en-us/articles/115004502806-Find-Project-Token-). + Select the correct region \(EU or US\) for your Mixpanel project. See detail [here](https://help.mixpanel.com/hc/en-us/articles/360039135652-Data-Residency-in-EU) ## CHANGELOG | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| `0.1.3` | 2021-10-30 | [7505](https://github.com/airbytehq/airbyte/issues/7505) | Guarantee that standard and custom mixpanel properties in the `Engage` stream are written as strings | +| `0.1.2` | 2021-11-02 | [7439](https://github.com/airbytehq/airbyte/issues/7439) | Added delay for all streams to match API limitation of requests rate | | `0.1.1` | 2021-09-16 | [6075](https://github.com/airbytehq/airbyte/issues/6075) | Added option to select project region | | `0.1.0` | 2021-07-06 | [3698](https://github.com/airbytehq/airbyte/issues/3698) | created CDK native mixpanel connector | diff --git a/docs/integrations/sources/mssql.md b/docs/integrations/sources/mssql.md index d15213490f0e1..35e3621541df3 100644 --- a/docs/integrations/sources/mssql.md +++ b/docs/integrations/sources/mssql.md @@ -294,6 +294,7 @@ If you do not see a type in this list, assume that it is coerced into a string. | Version | Date | Pull Request | Subject | | | :--- | :--- | :--- | :--- | :--- | +| 0.3.8 | 2021-10-26 | [7386](https://github.com/airbytehq/airbyte/pull/7386) | Fixed data type (smalldatetime, smallmoney) conversion from mssql source | | | 0.3.7 | 2021-09-30 | [6585](https://github.com/airbytehq/airbyte/pull/6585) | Improved SSH Tunnel key generation steps | | | 0.3.6 | 2021-09-17 | [6318](https://github.com/airbytehq/airbyte/pull/6318) | Added option to connect to DB via SSH | | | 0.3.4 | 2021-08-13 | [4699](https://github.com/airbytehq/airbyte/pull/4699) | Added json config validator | | diff --git a/docs/integrations/sources/mysql.md b/docs/integrations/sources/mysql.md index 3f34993f79545..d502cd5f6de3b 100644 --- a/docs/integrations/sources/mysql.md +++ b/docs/integrations/sources/mysql.md @@ -180,6 +180,7 @@ If you do not see a type in this list, assume that it is coerced into a string. | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.4.9 | 2021-11-02 | [7559](https://github.com/airbytehq/airbyte/pull/7559) | Correctly process large unsigned short integer values which may fall outside java's `Short` data type capability| | 0.4.8 | 2021-09-16 | [6093](https://github.com/airbytehq/airbyte/pull/6093) | Improve reliability of processing various data types like decimals, dates, datetime, binary, and text | | 0.4.7 | 2021-09-30 | [6585](https://github.com/airbytehq/airbyte/pull/6585) | Improved SSH Tunnel key generation steps | | 0.4.6 | 2021-09-29 | [6510](https://github.com/airbytehq/airbyte/pull/6510) | Support SSL connection | diff --git a/docs/integrations/sources/okta.md b/docs/integrations/sources/okta.md index c459e0b77159a..99e07056785b6 100644 --- a/docs/integrations/sources/okta.md +++ b/docs/integrations/sources/okta.md @@ -59,6 +59,7 @@ Different Okta APIs require different admin privilege levels. API tokens inherit | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.1.4 | 2021-11-02 | [7584](https://github.com/airbytehq/airbyte/pull/7584) | Fix incremental params for log stream | | 0.1.3 | 2021-09-08 | [5905](https://github.com/airbytehq/airbyte/pull/5905) | Fix incremental stream defect | | 0.1.2 | 2021-07-01 | [4456](https://github.com/airbytehq/airbyte/pull/4456) | Bugfix infinite pagination in logs stream | | 0.1.1 | 2021-06-09 | [3937](https://github.com/airbytehq/airbyte/pull/3973) | Add `AIRBYTE_ENTRYPOINT` env variable for kubernetes support | diff --git a/docs/integrations/sources/postgres.md b/docs/integrations/sources/postgres.md index ec56709bf8c19..c0733190dad75 100644 --- a/docs/integrations/sources/postgres.md +++ b/docs/integrations/sources/postgres.md @@ -235,7 +235,7 @@ Postgres data types are mapped to the following data types when synchronizing da | `lseg` | string | | | `macaddr` | string | | | `macaddr8` | string | | -| `money` | string | | +| `money` | string | When running logical replication (CDC), `money` values larger than 999999999999999 (15 nines) or smaller than -999999999999999 (15 nines) are transmitted as null; | | `mood` | string | | | `numeric` | number | | | `path` | string | | @@ -263,6 +263,7 @@ Postgres data types are mapped to the following data types when synchronizing da | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.3.13 | 2021-10-26 | [7339](https://github.com/airbytehq/airbyte/pull/7339) | Support or improve support for Interval, Money, Date, various geometric data types, inventory_items, and others | | 0.3.12 | 2021-09-30 | [6585](https://github.com/airbytehq/airbyte/pull/6585) | Improved SSH Tunnel key generation steps | | 0.3.11 | 2021-09-02 | [5742](https://github.com/airbytehq/airbyte/pull/5742) | Add SSH Tunnel support | | 0.3.9 | 2021-08-17 | [5304](https://github.com/airbytehq/airbyte/pull/5304) | Fix CDC OOM issue | diff --git a/docs/integrations/sources/recharge.md b/docs/integrations/sources/recharge.md index f2eeafb1b8448..0445abcbdae62 100644 --- a/docs/integrations/sources/recharge.md +++ b/docs/integrations/sources/recharge.md @@ -50,6 +50,7 @@ Please read [How to generate your API token](https://support.rechargepayments.co | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.1.4 | 2021-11-05 | [7626](https://github.com/airbytehq/airbyte/pull/7626) | Improve 'backoff' for HTTP requests | | 0.1.3 | 2021-09-17 | [6149](https://github.com/airbytehq/airbyte/pull/6149) | Update `discount` and `order` schema | | 0.1.2 | 2021-09-17 | [6149](https://github.com/airbytehq/airbyte/pull/6149) | Change `cursor_field` for Incremental streams | | | | | | diff --git a/docs/integrations/sources/salesforce.md b/docs/integrations/sources/salesforce.md index 64b27d5c5bf9d..afe7cc996f951 100644 --- a/docs/integrations/sources/salesforce.md +++ b/docs/integrations/sources/salesforce.md @@ -734,6 +734,7 @@ List of available streams: | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.1.3 | 2021-11-06 | [7592](https://github.com/airbytehq/airbyte/pull/7592) | Fix getting `anyType` fields using BULK API | | 0.1.2 | 2021-09-30 | [6438](https://github.com/airbytehq/airbyte/pull/6438) | Annotate Oauth2 flow initialization parameters in connector specification | | 0.1.1 | 2021-09-21 | [6209](https://github.com/airbytehq/airbyte/pull/6209) | Fix bug with pagination for BULK API | | 0.1.0 | 2021-09-08 | [5619](https://github.com/airbytehq/airbyte/pull/5619) | Salesforce Aitbyte-Native Connector | diff --git a/docs/integrations/sources/sentry.md b/docs/integrations/sources/sentry.md new file mode 100644 index 0000000000000..d0e07ac29af21 --- /dev/null +++ b/docs/integrations/sources/sentry.md @@ -0,0 +1,49 @@ +# Sentry + +## Sync overview + +This source can sync data for the [Sentry API](https://docs.sentry.io/api/). It supports only Full Refresh syncs. + +### Output schema + +This Source is capable of syncing the following Streams: + +* [Events](https://docs.sentry.io/api/events/list-a-projects-events/) +* [Issues](https://docs.sentry.io/api/events/list-a-projects-issues/) + +### Data type mapping + +| Integration Type | Airbyte Type | Notes | +| :--- | :--- | :--- | +| `string` | `string` | | +| `integer`, `number` | `number` | | +| `array` | `array` | | +| `object` | `object` | | + +### Features + +| Feature | Supported?\(Yes/No\) | Notes | +| :--- | :--- | :--- | +| Full Refresh Sync | Yes | | +| Incremental Sync | No | | +| SSL connection | Yes | +| Namespaces | No | | + +## Getting started + +### Requirements + +* `auth_token` - Sentry Authentication Token with the necessary permissions \(described below\) +* `organization` - Organization Slug. You can check it at https://sentry.io/settings// +* `project` - The name of the Project you wanto sync. You can list it from https://sentry.io/settings//projects/ +* `hostname` - Host name of Sentry API server. For self-hosted, specify your host name here. Otherwise, leave it empty. \(default: sentry.io\) + +### Setup guide + +You can find or create authentication tokens within [Sentry](https://sentry.io/settings/account/api/auth-tokens/). + +## Changelog + +| Version | Date | Pull Request | Subject | +| :--- | :--- | :--- | :--- | +| 0.1.0 | 2021-10-12 | [6975](https://github.com/airbytehq/airbyte/pull/6975) | New Source: Sentry | diff --git a/docs/integrations/sources/shopify.md b/docs/integrations/sources/shopify.md index db1b2d51560ad..4a28983a70800 100644 --- a/docs/integrations/sources/shopify.md +++ b/docs/integrations/sources/shopify.md @@ -37,6 +37,8 @@ This Source is capable of syncing the following core Streams: * [Price Rules](https://help.shopify.com/en/api/reference/discounts/pricerule) * [Locations](https://shopify.dev/api/admin-rest/2021-10/resources/location) * [InventoryLevels](https://shopify.dev/api/admin-rest/2021-10/resources/inventorylevel) +* [Fulfillment Orders](https://shopify.dev/api/admin-rest/2021-07/resources/fulfillmentorder) +* [Fulfillments](https://shopify.dev/api/admin-rest/2021-07/resources/fulfillment) #### NOTE: @@ -97,6 +99,7 @@ This connector support both: `OAuth 2.0` and `API PASSWORD` (for private applica | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.1.22 | 2021-10-18 | [7101](https://github.com/airbytehq/airbyte/pull/7107) | Added FulfillmentOrders, Fulfillments streams | | 0.1.21 | 2021-10-14 | [7382](https://github.com/airbytehq/airbyte/pull/7382) | Fixed `InventoryLevels` primary key | | 0.1.20 | 2021-10-14 | [7063](https://github.com/airbytehq/airbyte/pull/7063) | Added `Location` and `InventoryLevels` as streams | | 0.1.19 | 2021-10-11 | [6951](https://github.com/airbytehq/airbyte/pull/6951) | Added support of `OAuth 2.0` authorisation option | diff --git a/docs/integrations/sources/stripe.md b/docs/integrations/sources/stripe.md index c5411102e1c8e..df253799f28d8 100644 --- a/docs/integrations/sources/stripe.md +++ b/docs/integrations/sources/stripe.md @@ -11,6 +11,8 @@ This Source is capable of syncing the following core Streams: * [Balance Transactions](https://stripe.com/docs/api/balance_transactions/list) \(Incremental\) * [Bank accounts](https://stripe.com/docs/api/customer_bank_accounts/list) * [Charges](https://stripe.com/docs/api/charges/list) \(Incremental\) +* [Checkout Streams](https://stripe.com/docs/api/checkout/sessions/list) +* [Checkout Streams Line Items](https://stripe.com/docs/api/checkout/sessions/line_items) * [Coupons](https://stripe.com/docs/api/coupons/list) \(Incremental\) * [Customer Balance Transactions](https://stripe.com/docs/api/customer_balance_transactions/list) * [Customers](https://stripe.com/docs/api/customers/list) \(Incremental\) @@ -21,6 +23,7 @@ This Source is capable of syncing the following core Streams: * [Invoices](https://stripe.com/docs/api/invoices/list) \(Incremental\) * [PaymentIntents](https://stripe.com/docs/api/payment_intents/list) \(Incremental\) * [Payouts](https://stripe.com/docs/api/payouts/list) \(Incremental\) +* [Promotion Code](https://stripe.com/docs/api/promotion_codes/list) \(Incremental\) * [Plans](https://stripe.com/docs/api/plans/list) \(Incremental\) * [Products](https://stripe.com/docs/api/products/list) \(Incremental\) * [Refunds](https://stripe.com/docs/api/refunds/list) \(Incremental\) @@ -71,6 +74,7 @@ If you would like to test Airbyte using test data on Stripe, `sk_test_` and `rk_ | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.1.22 | 2021-11-05 | [7345](https://github.com/airbytehq/airbyte/pull/7345) | Add 3 new streams | | 0.1.21 | 2021-10-07 | [6841](https://github.com/airbytehq/airbyte/pull/6841) | Fix missing `start_date` argument + update json files for SAT | | 0.1.20 | 2021-09-30 | [6017](https://github.com/airbytehq/airbyte/pull/6017) | Add lookback\_window\_days parameter | | 0.1.19 | 2021-09-27 | [6466](https://github.com/airbytehq/airbyte/pull/6466) | Use `start_date` parameter in incremental streams | diff --git a/docs/integrations/sources/trello.md b/docs/integrations/sources/trello.md index 1be02dc448440..cda4717fd1c19 100644 --- a/docs/integrations/sources/trello.md +++ b/docs/integrations/sources/trello.md @@ -42,7 +42,7 @@ The Trello connector should not run into Trello API limitations under normal usa * Trello API Key ### Setup guide - + Please read [How to get your APIs Token and Key](https://developer.atlassian.com/cloud/trello/guides/rest-api/authorization/#using-basic-oauth) or you can log in to Trello and visit [Developer API Keys](https://trello.com/app-key/). ## Changelog diff --git a/docs/integrations/sources/zendesk-support.md b/docs/integrations/sources/zendesk-support.md index 598e77acb1321..69bcfba6c75c2 100644 --- a/docs/integrations/sources/zendesk-support.md +++ b/docs/integrations/sources/zendesk-support.md @@ -97,6 +97,7 @@ We recommend creating a restricted, read-only key specifically for Airbyte acces | Version | Date | Pull Request | Subject | | :------ | :-------- | :----- | :------ | +| `0.1.4` | 2021-10-26 | [7377](https://github.com/airbytehq/airbyte/pull/7377) | fix initially_assigned_at type in ticket metrics | | `0.1.3` | 2021-10-17 | [7097](https://github.com/airbytehq/airbyte/pull/7097) | correction of spec file | | `0.1.2` | 2021-10-16 | [6513](https://github.com/airbytehq/airbyte/pull/6513) | fixed comments stream | | `0.1.1` | 2021-09-02 | [5787](https://github.com/airbytehq/airbyte/pull/5787) | fixed incremental logic for the ticket_comments stream | diff --git a/docs/operator-guides/upgrading-airbyte.md b/docs/operator-guides/upgrading-airbyte.md index 1891484d5b426..d25e0889c915a 100644 --- a/docs/operator-guides/upgrading-airbyte.md +++ b/docs/operator-guides/upgrading-airbyte.md @@ -82,7 +82,7 @@ If you are upgrading from \(i.e. your current version of Airbyte is\) Airbyte ve Here's an example of what it might look like with the values filled in. It assumes that the downloaded `airbyte_archive.tar.gz` is in `/tmp`. ```bash - docker run --rm -v /tmp:/config airbyte/migration:0.30.25-alpha --\ + docker run --rm -v /tmp:/config airbyte/migration:0.30.34-alpha --\ --input /config/airbyte_archive.tar.gz\ --output /config/airbyte_archive_migrated.tar.gz ``` diff --git a/docs/understanding-airbyte/json-avro-conversion.md b/docs/understanding-airbyte/json-avro-conversion.md new file mode 100644 index 0000000000000..7b8887d7cfac5 --- /dev/null +++ b/docs/understanding-airbyte/json-avro-conversion.md @@ -0,0 +1,231 @@ +# Json to Avro Conversion for Blob Storage Destinations + +When an Airbyte data stream is synced to the Avro or Parquet format (e.g. Parquet on S3), the source Json schema is converted to an Avro schema, then the Json object is converted to an Avro record based on the Avro schema (and further to Parquet if necessary). Because the data stream can come from any data source, the Json to Avro conversion process has the following rules and limitations. + +1. Json schema types are mapped to Avro types as follows: + + | Json Data Type | Avro Data Type | + | :---: | :---: | + | string | string | + | number | double | + | integer | int | + | boolean | boolean | + | null | null | + | object | record | + | array | array | + +2. Built-in Json schema formats are not mapped to Avro logical types at this moment. +3. Combined restrictions \("allOf", "anyOf", and "oneOf"\) will be converted to type unions. The corresponding Avro schema can be less stringent. For example, the following Json schema + + ```json + { + "oneOf": [ + { "type": "string" }, + { "type": "integer" } + ] + } + ``` + + will become this in Avro schema: + + ```json + { + "type": ["null", "string", "int"] + } + ``` + +4. Keyword `not` is not supported, as there is no equivalent validation mechanism in Avro schema. +5. Only alphanumeric characters and underscores \(`/a-zA-Z0-9_/`\) are allowed in a stream or field name. Any special character will be converted to an alphabet or underscore. For example, `spécial:character_names` will become `special_character_names`. The original names will be stored in the `doc` property in this format: `_airbyte_original_name:`. +6. The field name cannot start with a number, so an underscore will be added to the field name at the beginning. +7. All field will be nullable. For example, a `string` Json field will be typed as `["null", "string"]` in Avro. This is necessary because the incoming data stream may have optional fields. +8. For array fields in Json schema, when the `items` property is an array, it means that each element in the array should follow its own schema sequentially. For example, the following specification means the first item in the array should be a string, and the second a number. + + ```json + { + "array_field": { + "type": "array", + "items": [ + { "type": "string" }, + { "type": "number" } + ] + } + } + ``` + + This is not supported in Avro schema. As a compromise, the converter creates a union, \["string", "number"\], which is less stringent: + + ```json + { + "name": "array_field", + "type": [ + "null", + { + "type": "array", + "items": ["null", "string"] + } + ], + "default": null + } + ``` + +9. Three Airbyte specific fields will be added to each Avro record: + + | Field | Schema | Document | + | :--- | :--- | :---: | + | `_airbyte_ab_id` | `uuid` | [link](http://avro.apache.org/docs/current/spec.html#UUID) | + | `_airbyte_emitted_at` | `timestamp-millis` | [link](http://avro.apache.org/docs/current/spec.html#Timestamp+%28millisecond+precision%29) | + | `_airbyte_additional_properties` | `map` of `string` | See explanation below. | + +10. A Json object can have additional properties of unknown types, which is not compatible with the Avro schema. To solve this problem during Json to Avro object conversion, we introduce a special field: `_airbyte_additional_properties` typed as a nullable `map` from `string` to `string`: + +```json +{ + "name": "_airbyte_additional_properties", + "type": ["null", { "type": "map", "values": "string" }], + "default": null +} +``` + +For example, given the following Json schema: + +```json +{ + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "properties": { + "username": { + "type": ["null", "string"] + } + } +} +``` + +this Json object + +```json +{ + "username": "admin", + "active": true, + "age": 21, + "auth": { + "auth_type": "ssl", + "api_key": "abcdefg/012345", + "admin": false, + "id": 1000 + } +} +``` + +will be converted to the following Avro object: + +```json +{ + "username": "admin", + "_airbyte_additional_properties": { + "active": "true", + "age": "21", + "auth": "{\"auth_type\":\"ssl\",\"api_key\":\"abcdefg/012345\",\"admin\":false,\"id\":1000}" + } +} +``` + +Note that all fields other than the `username` is moved under `_ab_additional_properties` as serialized strings, including the original object `auth`. + +11. Based on the above rules, here is an overall example. Given the following Json schema: + +```json +{ + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "properties": { + "id": { + "type": "integer" + }, + "user": { + "type": ["null", "object"], + "properties": { + "id": { + "type": "integer" + }, + "field_with_spécial_character": { + "type": "integer" + } + } + }, + "created_at": { + "type": ["null", "string"], + "format": "date-time" + } + } +} +``` + +Its corresponding Avro schema will be: + +```json +{ + "name": "stream_name", + "type": "record", + "fields": [ + { + "name": "_airbyte_ab_id", + "type": { + "type": "string", + "logicalType": "uuid" + } + }, + { + "name": "_airbyte_emitted_at", + "type": { + "type": "long", + "logicalType": "timestamp-millis" + } + }, + { + "name": "id", + "type": ["null", "int"], + "default": null + }, + { + "name": "user", + "type": [ + "null", + { + "type": "record", + "name": "user", + "fields": [ + { + "name": "id", + "type": ["null", "int"], + "default": null + }, + { + "name": "field_with_special_character", + "type": ["null", "int"], + "doc": "_airbyte_original_name:field_with_spécial_character", + "default": null + }, + { + "name": "_airbyte_additional_properties", + "type": ["null", { "type": "map", "values": "string" }], + "default": null + } + ] + } + ], + "default": null + }, + { + "name": "created_at", + "type": ["null", "string"], + "default": null + }, + { + "name": "_airbyte_additional_properties", + "type": ["null", { "type": "map", "values": "string" }], + "default": null + } + ] +} + +``` diff --git a/kube/overlays/stable-with-resource-limits/.env b/kube/overlays/stable-with-resource-limits/.env index e404a0ed13369..e7c0e4a4be7a2 100644 --- a/kube/overlays/stable-with-resource-limits/.env +++ b/kube/overlays/stable-with-resource-limits/.env @@ -1,4 +1,4 @@ -AIRBYTE_VERSION=0.30.25-alpha +AIRBYTE_VERSION=0.30.34-alpha # Airbyte Internal Database, see https://docs.airbyte.io/operator-guides/configuring-airbyte-db DATABASE_HOST=airbyte-db-svc diff --git a/kube/overlays/stable-with-resource-limits/kustomization.yaml b/kube/overlays/stable-with-resource-limits/kustomization.yaml index 103773a260009..415e36f44ef1b 100644 --- a/kube/overlays/stable-with-resource-limits/kustomization.yaml +++ b/kube/overlays/stable-with-resource-limits/kustomization.yaml @@ -8,15 +8,15 @@ bases: images: - name: airbyte/db - newTag: 0.30.25-alpha + newTag: 0.30.34-alpha - name: airbyte/scheduler - newTag: 0.30.25-alpha + newTag: 0.30.34-alpha - name: airbyte/server - newTag: 0.30.25-alpha + newTag: 0.30.34-alpha - name: airbyte/webapp - newTag: 0.30.25-alpha + newTag: 0.30.34-alpha - name: airbyte/worker - newTag: 0.30.25-alpha + newTag: 0.30.34-alpha - name: temporalio/auto-setup newTag: 1.7.0 diff --git a/kube/overlays/stable/.env b/kube/overlays/stable/.env index e404a0ed13369..e7c0e4a4be7a2 100644 --- a/kube/overlays/stable/.env +++ b/kube/overlays/stable/.env @@ -1,4 +1,4 @@ -AIRBYTE_VERSION=0.30.25-alpha +AIRBYTE_VERSION=0.30.34-alpha # Airbyte Internal Database, see https://docs.airbyte.io/operator-guides/configuring-airbyte-db DATABASE_HOST=airbyte-db-svc diff --git a/kube/overlays/stable/kustomization.yaml b/kube/overlays/stable/kustomization.yaml index 56bcecd4507d0..0df9c15d4d9c7 100644 --- a/kube/overlays/stable/kustomization.yaml +++ b/kube/overlays/stable/kustomization.yaml @@ -8,15 +8,15 @@ bases: images: - name: airbyte/db - newTag: 0.30.25-alpha + newTag: 0.30.34-alpha - name: airbyte/scheduler - newTag: 0.30.25-alpha + newTag: 0.30.34-alpha - name: airbyte/server - newTag: 0.30.25-alpha + newTag: 0.30.34-alpha - name: airbyte/webapp - newTag: 0.30.25-alpha + newTag: 0.30.34-alpha - name: airbyte/worker - newTag: 0.30.25-alpha + newTag: 0.30.34-alpha - name: temporalio/auto-setup newTag: 1.7.0 diff --git a/settings.gradle b/settings.gradle index 4d5b7d79afa0a..a263a988dde73 100644 --- a/settings.gradle +++ b/settings.gradle @@ -11,15 +11,21 @@ gradleEnterprise { } } +sourceControl { + gitRepository("https://github.com/airbytehq/json-avro-converter.git") { + producesModule("tech.allegro.schema.json2avro:converter") + } +} + rootProject.name = 'airbyte' // SUB_BUILD is an enum of , PLATFORM, CONNECTORS_BASE. Blank is equivalent to all. -if(!System.getenv().containsKey("SUB_BUILD")) { +if (!System.getenv().containsKey("SUB_BUILD")) { println("Building all of Airbyte.") } else { def subBuild = System.getenv().get("SUB_BUILD") println("Building Airbyte Sub Build: " + subBuild) - if(subBuild != "PLATFORM" && subBuild != "CONNECTORS_BASE") { + if (subBuild != "PLATFORM" && subBuild != "CONNECTORS_BASE") { throw new IllegalArgumentException(String.format("%s is invalid. Must be unset or PLATFORM or CONNECTORS_BASE", subBuild)) } } @@ -40,7 +46,6 @@ include ':airbyte-test-utils' // airbyte-workers has a lot of dependencies. include ':airbyte-workers' // reused by acceptance tests in connector base. include ':airbyte-analytics' // transitively used by airbyte-workers. -include ':airbyte-config:init' // transitively used by airbyte-workers. include ':airbyte-config:persistence' // transitively used by airbyte-workers. include ':airbyte-db:jooq' // transitively used by airbyte-workers. include ':airbyte-notification' // transitively used by airbyte-workers. @@ -48,7 +53,7 @@ include ':airbyte-scheduler:models' // transitively used by airbyte-workers. include ':airbyte-scheduler:persistence' // used by airbyte-workers. // platform -if(!System.getenv().containsKey("SUB_BUILD") || System.getenv().get("SUB_BUILD") == "PLATFORM") { +if (!System.getenv().containsKey("SUB_BUILD") || System.getenv().get("SUB_BUILD") == "PLATFORM") { include ':airbyte-cli' include ':airbyte-e2e-testing' include ':airbyte-migration' @@ -58,10 +63,12 @@ if(!System.getenv().containsKey("SUB_BUILD") || System.getenv().get("SUB_BUILD") include ':airbyte-server' include ':airbyte-tests' include ':airbyte-webapp' + include ':airbyte-config:init' + include ':airbyte-config:specs' } // connectors base -if(!System.getenv().containsKey("SUB_BUILD") || System.getenv().get("SUB_BUILD") == "CONNECTORS_BASE") { +if (!System.getenv().containsKey("SUB_BUILD") || System.getenv().get("SUB_BUILD") == "CONNECTORS_BASE") { include ':airbyte-cdk:python' include ':airbyte-integrations:bases:airbyte-protocol' include ':airbyte-integrations:bases:base' @@ -97,7 +104,7 @@ if(!System.getenv().containsKey("SUB_BUILD") || System.getenv().get("SUB_BUILD") } // connectors -if(!System.getenv().containsKey("SUB_BUILD")) { +if (!System.getenv().containsKey("SUB_BUILD")) { // include all connector projects def integrationsPath = rootDir.toPath().resolve('airbyte-integrations/connectors') println integrationsPath diff --git a/tools/bin/acceptance_test_kube.sh b/tools/bin/acceptance_test_kube.sh index 7dd9d2502f498..7eb6f9a9a4ed9 100755 --- a/tools/bin/acceptance_test_kube.sh +++ b/tools/bin/acceptance_test_kube.sh @@ -14,7 +14,6 @@ kind load docker-image airbyte/scheduler:dev --name chart-testing & kind load docker-image airbyte/webapp:dev --name chart-testing & kind load docker-image airbyte/worker:dev --name chart-testing & kind load docker-image airbyte/db:dev --name chart-testing & -kind load docker-image airbyte/normalization:dev --name chart-testing & wait echo "Starting app..." @@ -49,9 +48,20 @@ server_logs () { echo "server logs:" && kubectl logs deployment.apps/airbyte-ser scheduler_logs () { echo "scheduler logs:" && kubectl logs deployment.apps/airbyte-scheduler; } pod_sweeper_logs () { echo "pod sweeper logs:" && kubectl logs deployment.apps/airbyte-pod-sweeper; } worker_logs () { echo "worker logs:" && kubectl logs deployment.apps/airbyte-worker; } +db_logs () { echo "db logs:" && kubectl logs deployment.apps/airbyte-db; } +temporal_logs () { echo "temporal logs:" && kubectl logs deployment.apps/airbyte-temporal; } describe_pods () { echo "describe pods:" && kubectl describe pods; } describe_nodes () { echo "describe nodes:" && kubectl describe nodes; } -print_all_logs () { server_logs; scheduler_logs; worker_logs; pod_sweeper_logs; describe_nodes; describe_pods; } +print_all_logs () { + server_logs; + scheduler_logs; + worker_logs; + db_logs; + temporal_logs; + pod_sweeper_logs; + describe_nodes; + describe_pods; +} trap "echo 'kube logs:' && print_all_logs" EXIT kubectl port-forward svc/airbyte-server-svc 8001:8001 & diff --git a/tools/bin/ci_credentials.sh b/tools/bin/ci_credentials.sh index 1a69179826e8b..cd1742f8f25e5 100755 --- a/tools/bin/ci_credentials.sh +++ b/tools/bin/ci_credentials.sh @@ -64,6 +64,7 @@ write_standard_creds source-braintree "$BRAINTREE_TEST_CREDS" write_standard_creds source-cart "$CART_TEST_CREDS" write_standard_creds source-chargebee "$CHARGEBEE_INTEGRATION_TEST_CREDS" write_standard_creds source-close-com "$SOURCE_CLOSE_COM_CREDS" +write_standard_creds source-confluence "$SOURCE_CONFLUENCE_TEST_CREDS" write_standard_creds source-delighted "$SOURCE_DELIGHTED_TEST_CREDS" write_standard_creds source-drift "$DRIFT_INTEGRATION_TEST_CREDS" write_standard_creds source-dixa "$SOURCE_DIXA_TEST_CREDS" @@ -74,6 +75,7 @@ write_standard_creds source-file "$AZURE_STORAGE_INTEGRATION_TEST_CREDS" "azblob write_standard_creds source-file "$FILE_SECURE_HTTPS_TEST_CREDS" write_standard_creds source-file-secure "$FILE_SECURE_HTTPS_TEST_CREDS" write_standard_creds source-freshdesk "$FRESHDESK_TEST_CREDS" +write_standard_creds source-freshsales "$SOURCE_FRESHSALES_TEST_CREDS" write_standard_creds source-freshservice "$SOURCE_FRESHSERVICE_TEST_CREDS" write_standard_creds source-facebook-marketing "$FACEBOOK_MARKETING_TEST_INTEGRATION_CREDS" write_standard_creds source-facebook-pages "$FACEBOOK_PAGES_INTEGRATION_TEST_CREDS" @@ -84,6 +86,7 @@ write_standard_creds source-google-analytics-v4 "$GOOGLE_ANALYTICS_V4_TEST_CREDS write_standard_creds source-google-analytics-v4 "$GOOGLE_ANALYTICS_V4_TEST_CREDS_SRV_ACC" "service_config.json" write_standard_creds source-google-analytics-v4 "$GOOGLE_ANALYTICS_V4_TEST_CREDS_OLD" "old_config.json" write_standard_creds source-google-directory "$GOOGLE_DIRECTORY_TEST_CREDS" +write_standard_creds source-google-directory "$GOOGLE_DIRECTORY_TEST_CREDS_OAUTH" "config_oauth.json" write_standard_creds source-google-search-console "$GOOGLE_SEARCH_CONSOLE_CDK_TEST_CREDS" write_standard_creds source-google-search-console "$GOOGLE_SEARCH_CONSOLE_CDK_TEST_CREDS_SRV_ACC" "service_account_config.json" write_standard_creds source-google-sheets "$GOOGLE_SHEETS_TESTS_CREDS" @@ -108,6 +111,7 @@ write_standard_creds source-mailchimp "$MAILCHIMP_TEST_CREDS" write_standard_creds source-marketo "$SOURCE_MARKETO_TEST_CREDS" write_standard_creds source-microsoft-teams "$MICROSOFT_TEAMS_TEST_CREDS" write_standard_creds source-mixpanel "$MIXPANEL_INTEGRATION_TEST_CREDS" +write_standard_creds source-monday "$SOURCE_MONDAY_TEST_CREDS" write_standard_creds source-mongodb-strict-encrypt "$MONGODB_TEST_CREDS" "credentials.json" write_standard_creds source-mongodb-v2 "$MONGODB_TEST_CREDS" "credentials.json" write_standard_creds source-mssql "$MSSQL_RDS_TEST_CREDS" @@ -143,6 +147,7 @@ write_standard_creds source-snowflake "$SNOWFLAKE_INTEGRATION_TEST_CREDS" "confi write_standard_creds source-square "$SOURCE_SQUARE_CREDS" write_standard_creds source-strava "$SOURCE_STRAVA_TEST_CREDS" write_standard_creds source-paystack "$SOURCE_PAYSTACK_TEST_CREDS" +write_standard_creds source-sentry "$SOURCE_SENTRY_TEST_CREDS" write_standard_creds source-stripe "$SOURCE_STRIPE_CREDS" write_standard_creds source-stripe "$STRIPE_INTEGRATION_CONNECTED_ACCOUNT_TEST_CREDS" "connected_account_config.json" write_standard_creds source-surveymonkey "$SURVEYMONKEY_TEST_CREDS"