From c689594069dc564e32abd2ae5978bbfab1fa77da Mon Sep 17 00:00:00 2001 From: Koji Matsumoto Date: Tue, 2 Nov 2021 23:11:27 +0900 Subject: [PATCH 01/83] =?UTF-8?q?=F0=9F=8E=89=20New=20Source:=20Sentry=20(?= =?UTF-8?q?#6975)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * add Events, Issues, ProjectDetail stream * add P/R number * add SUMMARY entry * add docs/integrations/README.md entry * add source_definitions.yaml entry * add connector JSON definition * add builds.md entry * SentryStream keeps primary_key * add Projects stream * change stream for connection checking * handling errors with try-catch in next_page_token function * remove required key field from schemas * remove DEFAULT_HOST * raise error if link header don't have mandatory field * fix unit test for streams * update cursor for pagination * add docs to each endpoint * add hostname property to invalid_config * fix schema * add hostname to sample_config --- .../cdaf146a-9b75-49fd-9dd2-9d64a0bb4781.json | 7 + .../resources/seed/source_definitions.yaml | 10 + airbyte-integrations/builds.md | 1 + .../connectors/source-sentry/.dockerignore | 7 + .../connectors/source-sentry/.gitignore | 1 + .../connectors/source-sentry/Dockerfile | 38 ++ .../connectors/source-sentry/README.md | 132 ++++++ .../source-sentry/acceptance-test-config.yml | 18 + .../source-sentry/acceptance-test-docker.sh | 16 + .../connectors/source-sentry/bootstrap.md | 16 + .../connectors/source-sentry/build.gradle | 14 + .../integration_tests/__init__.py | 3 + .../integration_tests/acceptance.py | 14 + .../integration_tests/configured_catalog.json | 40 ++ .../integration_tests/invalid_config.json | 6 + .../integration_tests/sample_config.json | 6 + .../connectors/source-sentry/main.py | 13 + .../connectors/source-sentry/requirements.txt | 2 + .../connectors/source-sentry/setup.py | 29 ++ .../source-sentry/source_sentry/__init__.py | 8 + .../source_sentry/schemas/events.json | 72 ++++ .../source_sentry/schemas/issues.json | 133 ++++++ .../source_sentry/schemas/project_detail.json | 402 ++++++++++++++++++ .../source_sentry/schemas/projects.json | 119 ++++++ .../source-sentry/source_sentry/source.py | 44 ++ .../source-sentry/source_sentry/spec.json | 34 ++ .../source-sentry/source_sentry/streams.py | 158 +++++++ .../source-sentry/unit_tests/__init__.py | 3 + .../source-sentry/unit_tests/test_source.py | 26 ++ .../source-sentry/unit_tests/test_streams.py | 110 +++++ docs/SUMMARY.md | 1 + docs/integrations/README.md | 1 + docs/integrations/sources/sentry.md | 49 +++ 33 files changed, 1533 insertions(+) create mode 100644 airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/cdaf146a-9b75-49fd-9dd2-9d64a0bb4781.json create mode 100644 airbyte-integrations/connectors/source-sentry/.dockerignore create mode 100644 airbyte-integrations/connectors/source-sentry/.gitignore create mode 100644 airbyte-integrations/connectors/source-sentry/Dockerfile create mode 100644 airbyte-integrations/connectors/source-sentry/README.md create mode 100644 airbyte-integrations/connectors/source-sentry/acceptance-test-config.yml create mode 100644 airbyte-integrations/connectors/source-sentry/acceptance-test-docker.sh create mode 100644 airbyte-integrations/connectors/source-sentry/bootstrap.md create mode 100644 airbyte-integrations/connectors/source-sentry/build.gradle create mode 100644 airbyte-integrations/connectors/source-sentry/integration_tests/__init__.py create mode 100644 airbyte-integrations/connectors/source-sentry/integration_tests/acceptance.py create mode 100644 airbyte-integrations/connectors/source-sentry/integration_tests/configured_catalog.json create mode 100644 airbyte-integrations/connectors/source-sentry/integration_tests/invalid_config.json create mode 100644 airbyte-integrations/connectors/source-sentry/integration_tests/sample_config.json create mode 100644 airbyte-integrations/connectors/source-sentry/main.py create mode 100644 airbyte-integrations/connectors/source-sentry/requirements.txt create mode 100644 airbyte-integrations/connectors/source-sentry/setup.py create mode 100644 airbyte-integrations/connectors/source-sentry/source_sentry/__init__.py create mode 100644 airbyte-integrations/connectors/source-sentry/source_sentry/schemas/events.json create mode 100644 airbyte-integrations/connectors/source-sentry/source_sentry/schemas/issues.json create mode 100644 airbyte-integrations/connectors/source-sentry/source_sentry/schemas/project_detail.json create mode 100644 airbyte-integrations/connectors/source-sentry/source_sentry/schemas/projects.json create mode 100644 airbyte-integrations/connectors/source-sentry/source_sentry/source.py create mode 100644 airbyte-integrations/connectors/source-sentry/source_sentry/spec.json create mode 100644 airbyte-integrations/connectors/source-sentry/source_sentry/streams.py create mode 100644 airbyte-integrations/connectors/source-sentry/unit_tests/__init__.py create mode 100644 airbyte-integrations/connectors/source-sentry/unit_tests/test_source.py create mode 100644 airbyte-integrations/connectors/source-sentry/unit_tests/test_streams.py create mode 100644 docs/integrations/sources/sentry.md diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/cdaf146a-9b75-49fd-9dd2-9d64a0bb4781.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/cdaf146a-9b75-49fd-9dd2-9d64a0bb4781.json new file mode 100644 index 0000000000000..f88e4eedda821 --- /dev/null +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/cdaf146a-9b75-49fd-9dd2-9d64a0bb4781.json @@ -0,0 +1,7 @@ +{ + "sourceDefinitionId": "cdaf146a-9b75-49fd-9dd2-9d64a0bb4781", + "name": "Sentry", + "dockerRepository": "airbyte/source-sentry", + "dockerImageTag": "0.1.0", + "documentationUrl": "https://docs.airbyte.io/integrations/sources/sentry" +} diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 3e91d0e60c8bb..47ffb0f36f3d6 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -596,6 +596,16 @@ dockerImageTag: 0.1.2 documentationUrl: https://docs.airbyte.io/integrations/sources/zendesk-talk sourceType: api +- sourceDefinitionId: cdaf146a-9b75-49fd-9dd2-9d64a0bb4781 + name: Sentry + dockerRepository: airbyte/source-sentry + dockerImageTag: 0.1.0 + documentationUrl: https://docs.airbyte.io/integrations/sources/sentry +- sourceDefinitionId: bb6afd81-87d5-47e3-97c4-e2c2901b1cf8 + name: OneSignal + dockerRepository: airbyte/source-onesignal + dockerImageTag: 0.1.0 + documentationUrl: https://docs.airbyte.io/integrations/sources/lever-onesignal - name: Zoom sourceDefinitionId: aea2fd0d-377d-465e-86c0-4fdc4f688e51 dockerRepository: airbyte/source-zoom-singer diff --git a/airbyte-integrations/builds.md b/airbyte-integrations/builds.md index 70ce39f56dc4e..00a87e0827684 100644 --- a/airbyte-integrations/builds.md +++ b/airbyte-integrations/builds.md @@ -72,6 +72,7 @@ | Salesforce | [![source-salesforce](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-salesforce%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-salesforce) | | Salesloft | [![source-salesloft](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-salesloft%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-salesloft) | | Sendgrid | [![source-sendgrid](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-sendgrid%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-sendgrid) | +| Sentry | [![source-sentry](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-sentry%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-sentry) | | Shopify | [![source-shopify](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-shopify%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-shopify) | | Slack | [![source-slack](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-slack%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-slack) | | Smartsheets | [![source-smartsheets](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-smartsheets%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-smartsheets) | diff --git a/airbyte-integrations/connectors/source-sentry/.dockerignore b/airbyte-integrations/connectors/source-sentry/.dockerignore new file mode 100644 index 0000000000000..a5ed66c554120 --- /dev/null +++ b/airbyte-integrations/connectors/source-sentry/.dockerignore @@ -0,0 +1,7 @@ +* +!Dockerfile +!Dockerfile.test +!main.py +!source_sentry +!setup.py +!secrets diff --git a/airbyte-integrations/connectors/source-sentry/.gitignore b/airbyte-integrations/connectors/source-sentry/.gitignore new file mode 100644 index 0000000000000..d6e830be95797 --- /dev/null +++ b/airbyte-integrations/connectors/source-sentry/.gitignore @@ -0,0 +1 @@ +.python-version \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-sentry/Dockerfile b/airbyte-integrations/connectors/source-sentry/Dockerfile new file mode 100644 index 0000000000000..f743e7e20a611 --- /dev/null +++ b/airbyte-integrations/connectors/source-sentry/Dockerfile @@ -0,0 +1,38 @@ +FROM python:3.7.11-alpine3.14 as base + +# build and load all requirements +FROM base as builder +WORKDIR /airbyte/integration_code + +# upgrade pip to the latest version +RUN apk --no-cache upgrade \ + && pip install --upgrade pip \ + && apk --no-cache add tzdata build-base + + +COPY setup.py ./ +# install necessary packages to a temporary folder +RUN pip install --prefix=/install . + +# build a clean environment +FROM base +WORKDIR /airbyte/integration_code + +# copy all loaded and built libraries to a pure basic image +COPY --from=builder /install /usr/local +# add default timezone settings +COPY --from=builder /usr/share/zoneinfo/Etc/UTC /etc/localtime +RUN echo "Etc/UTC" > /etc/timezone + +# bash is installed for more convenient debugging. +RUN apk --no-cache add bash + +# copy payload code only +COPY main.py ./ +COPY source_sentry ./source_sentry + +ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" +ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] + +LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.name=airbyte/source-sentry diff --git a/airbyte-integrations/connectors/source-sentry/README.md b/airbyte-integrations/connectors/source-sentry/README.md new file mode 100644 index 0000000000000..bb0502804de66 --- /dev/null +++ b/airbyte-integrations/connectors/source-sentry/README.md @@ -0,0 +1,132 @@ +# Sentry Source + +This is the repository for the Sentry source connector, written in Python. +For information about how to use this connector within Airbyte, see [the documentation](https://docs.airbyte.io/integrations/sources/sentry). + +## Local development + +### Prerequisites +**To iterate on this connector, make sure to complete this prerequisites section.** + +#### Minimum Python version required `= 3.7.0` + +#### Build & Activate Virtual Environment and install dependencies +From this connector directory, create a virtual environment: +``` +python -m venv .venv +``` + +This will generate a virtualenv for this module in `.venv/`. Make sure this venv is active in your +development environment of choice. To activate it from the terminal, run: +``` +source .venv/bin/activate +pip install -r requirements.txt +pip install '.[tests]' +``` +If you are in an IDE, follow your IDE's instructions to activate the virtualenv. + +Note that while we are installing dependencies from `requirements.txt`, you should only edit `setup.py` for your dependencies. `requirements.txt` is +used for editable installs (`pip install -e`) to pull in Python dependencies from the monorepo and will call `setup.py`. +If this is mumbo jumbo to you, don't worry about it, just put your deps in `setup.py` but install using `pip install -r requirements.txt` and everything +should work as you expect. + +#### Building via Gradle +You can also build the connector in Gradle. This is typically used in CI and not needed for your development workflow. + +To build using Gradle, from the Airbyte repository root, run: +``` +./gradlew :airbyte-integrations:connectors:source-sentry:build +``` + +#### Create credentials +**If you are a community contributor**, follow the instructions in the [documentation](https://docs.airbyte.io/integrations/sources/sentry) +to generate the necessary credentials. Then create a file `secrets/config.json` conforming to the `source_sentry/spec.json` file. +Note that any directory named `secrets` is gitignored across the entire Airbyte repo, so there is no danger of accidentally checking in sensitive information. +See `integration_tests/sample_config.json` for a sample config file. + +**If you are an Airbyte core member**, copy the credentials in Lastpass under the secret name `source sentry test creds` +and place them into `secrets/config.json`. + +### Locally running the connector +``` +python main.py spec +python main.py check --config secrets/config.json +python main.py discover --config secrets/config.json +python main.py read --config secrets/config.json --catalog integration_tests/configured_catalog.json +``` + +### Locally running the connector docker image + +#### Build +First, make sure you build the latest Docker image: +``` +docker build . -t airbyte/source-sentry:dev +``` + +You can also build the connector image via Gradle: +``` +./gradlew :airbyte-integrations:connectors:source-sentry:airbyteDocker +``` +When building via Gradle, the docker image name and tag, respectively, are the values of the `io.airbyte.name` and `io.airbyte.version` `LABEL`s in +the Dockerfile. + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/source-sentry:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-sentry:dev check --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-sentry:dev discover --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/source-sentry:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json +``` +## Testing +Make sure to familiarize yourself with [pytest test discovery](https://docs.pytest.org/en/latest/goodpractices.html#test-discovery) to know how your test files and methods should be named. +First install test dependencies into your virtual environment: +``` +pip install .[tests] +``` +### Unit Tests +To run unit tests locally, from the connector directory run: +``` +python -m pytest unit_tests +``` + +### Integration Tests +There are two types of integration tests: Acceptance Tests (Airbyte's test suite for all source connectors) and custom integration tests (which are specific to this connector). +#### Custom Integration tests +Place custom tests inside `integration_tests/` folder, then, from the connector root, run +``` +python -m pytest integration_tests +``` +#### Acceptance Tests +Customize `acceptance-test-config.yml` file to configure tests. See [Source Acceptance Tests](https://docs.airbyte.io/connector-development/testing-connectors/source-acceptance-tests-reference) for more information. +If your connector requires to create or destroy resources for use during acceptance tests create fixtures for it and place them inside integration_tests/acceptance.py. +To run your integration tests with acceptance tests, from the connector root, run +``` +python -m pytest integration_tests -p integration_tests.acceptance +``` +To run your integration tests with docker + +### Using gradle to run tests +All commands should be run from airbyte project root. +To run unit tests: +``` +./gradlew :airbyte-integrations:connectors:source-sentry:unitTest +``` +To run acceptance and custom integration tests: +``` +./gradlew :airbyte-integrations:connectors:source-sentry:integrationTest +``` + +## Dependency Management +All of your dependencies should go in `setup.py`, NOT `requirements.txt`. The requirements file is only used to connect internal Airbyte dependencies in the monorepo for local development. +We split dependencies between two groups, dependencies that are: +* required for your connector to work need to go to `MAIN_REQUIREMENTS` list. +* required for the testing need to go to `TEST_REQUIREMENTS` list + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing unit and integration tests. +1. Bump the connector version in `Dockerfile` -- just increment the value of the `LABEL io.airbyte.version` appropriately (we use [SemVer](https://semver.org/)). +1. Create a Pull Request. +1. Pat yourself on the back for being an awesome contributor. +1. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. diff --git a/airbyte-integrations/connectors/source-sentry/acceptance-test-config.yml b/airbyte-integrations/connectors/source-sentry/acceptance-test-config.yml new file mode 100644 index 0000000000000..a36b40014c1b2 --- /dev/null +++ b/airbyte-integrations/connectors/source-sentry/acceptance-test-config.yml @@ -0,0 +1,18 @@ +connector_image: airbyte/source-sentry:dev +tests: + spec: + - spec_path: "source_sentry/spec.json" + connection: + - config_path: "secrets/config.json" + status: "succeed" + - config_path: "integration_tests/invalid_config.json" + status: "failed" + discovery: + - config_path: "secrets/config.json" + basic_read: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" + empty_streams: [] + full_refresh: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" diff --git a/airbyte-integrations/connectors/source-sentry/acceptance-test-docker.sh b/airbyte-integrations/connectors/source-sentry/acceptance-test-docker.sh new file mode 100644 index 0000000000000..e4d8b1cef8961 --- /dev/null +++ b/airbyte-integrations/connectors/source-sentry/acceptance-test-docker.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env sh + +# Build latest connector image +docker build . -t $(cat acceptance-test-config.yml | grep "connector_image" | head -n 1 | cut -d: -f2) + +# Pull latest acctest image +docker pull airbyte/source-acceptance-test:latest + +# Run +docker run --rm -it \ + -v /var/run/docker.sock:/var/run/docker.sock \ + -v /tmp:/tmp \ + -v $(pwd):/test_input \ + airbyte/source-acceptance-test \ + --acceptance-test-config /test_input + diff --git a/airbyte-integrations/connectors/source-sentry/bootstrap.md b/airbyte-integrations/connectors/source-sentry/bootstrap.md new file mode 100644 index 0000000000000..7f1e939a53901 --- /dev/null +++ b/airbyte-integrations/connectors/source-sentry/bootstrap.md @@ -0,0 +1,16 @@ +## Streams + +Sentry is a REST API. Connector has the following streams, and all of them support full refresh only. + +* [Events](https://docs.sentry.io/api/events/list-a-projects-events/) +* [Issues](https://docs.sentry.io/api/events/list-a-projects-issues/) + +And a [ProjectDetail](https://docs.sentry.io/api/projects/retrieve-a-project/) stream is also implemented just for connection checking. + +## Authentication + +Sentry API offers three types of [authentication methods](https://docs.sentry.io/api/auth/). + +* Auth Token - The most common authentication method in Sentry. Connector only supports this method. +* DSN Authentication - Only some API endpoints support this method. Not supported by this connector. +* API Keys - Keys are passed using HTTP Basic auth, and a legacy means of authenticating. They will still be supported but are disabled for new accounts. Not supported by this connector. \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-sentry/build.gradle b/airbyte-integrations/connectors/source-sentry/build.gradle new file mode 100644 index 0000000000000..ba18928d4c3b1 --- /dev/null +++ b/airbyte-integrations/connectors/source-sentry/build.gradle @@ -0,0 +1,14 @@ +plugins { + id 'airbyte-python' + id 'airbyte-docker' + id 'airbyte-source-acceptance-test' +} + +airbytePython { + moduleDirectory 'source_sentry' +} + +dependencies { + implementation files(project(':airbyte-integrations:bases:source-acceptance-test').airbyteDocker.outputs) + implementation files(project(':airbyte-integrations:bases:base-python').airbyteDocker.outputs) +} diff --git a/airbyte-integrations/connectors/source-sentry/integration_tests/__init__.py b/airbyte-integrations/connectors/source-sentry/integration_tests/__init__.py new file mode 100644 index 0000000000000..46b7376756ec6 --- /dev/null +++ b/airbyte-integrations/connectors/source-sentry/integration_tests/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# diff --git a/airbyte-integrations/connectors/source-sentry/integration_tests/acceptance.py b/airbyte-integrations/connectors/source-sentry/integration_tests/acceptance.py new file mode 100644 index 0000000000000..108075487440f --- /dev/null +++ b/airbyte-integrations/connectors/source-sentry/integration_tests/acceptance.py @@ -0,0 +1,14 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +import pytest + +pytest_plugins = ("source_acceptance_test.plugin",) + + +@pytest.fixture(scope="session", autouse=True) +def connector_setup(): + """ This fixture is a placeholder for external resources that acceptance test might require.""" + yield diff --git a/airbyte-integrations/connectors/source-sentry/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/source-sentry/integration_tests/configured_catalog.json new file mode 100644 index 0000000000000..ed38985229c58 --- /dev/null +++ b/airbyte-integrations/connectors/source-sentry/integration_tests/configured_catalog.json @@ -0,0 +1,40 @@ +{ + "streams": [ + { + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite", + "stream": { + "name": "events", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + } + }, + { + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite", + "stream": { + "name": "issues", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + } + }, + { + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite", + "stream": { + "name": "project_detail", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + } + }, + { + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite", + "stream": { + "name": "projects", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + } + } + ] +} diff --git a/airbyte-integrations/connectors/source-sentry/integration_tests/invalid_config.json b/airbyte-integrations/connectors/source-sentry/integration_tests/invalid_config.json new file mode 100644 index 0000000000000..7a94e39068564 --- /dev/null +++ b/airbyte-integrations/connectors/source-sentry/integration_tests/invalid_config.json @@ -0,0 +1,6 @@ +{ + "auth_token": "invalid-token", + "hostname": "sentry.io", + "organization": "invalid-organization", + "project": "invalid-project" +} diff --git a/airbyte-integrations/connectors/source-sentry/integration_tests/sample_config.json b/airbyte-integrations/connectors/source-sentry/integration_tests/sample_config.json new file mode 100644 index 0000000000000..f0e080f4be8f2 --- /dev/null +++ b/airbyte-integrations/connectors/source-sentry/integration_tests/sample_config.json @@ -0,0 +1,6 @@ +{ + "auth_token": "token", + "hostname": "sentry.io", + "organization": "organization", + "project": "project" +} diff --git a/airbyte-integrations/connectors/source-sentry/main.py b/airbyte-integrations/connectors/source-sentry/main.py new file mode 100644 index 0000000000000..0bfb3a278aab4 --- /dev/null +++ b/airbyte-integrations/connectors/source-sentry/main.py @@ -0,0 +1,13 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +import sys + +from airbyte_cdk.entrypoint import launch +from source_sentry import SourceSentry + +if __name__ == "__main__": + source = SourceSentry() + launch(source, sys.argv[1:]) diff --git a/airbyte-integrations/connectors/source-sentry/requirements.txt b/airbyte-integrations/connectors/source-sentry/requirements.txt new file mode 100644 index 0000000000000..0411042aa0911 --- /dev/null +++ b/airbyte-integrations/connectors/source-sentry/requirements.txt @@ -0,0 +1,2 @@ +-e ../../bases/source-acceptance-test +-e . diff --git a/airbyte-integrations/connectors/source-sentry/setup.py b/airbyte-integrations/connectors/source-sentry/setup.py new file mode 100644 index 0000000000000..ddf6245c4d992 --- /dev/null +++ b/airbyte-integrations/connectors/source-sentry/setup.py @@ -0,0 +1,29 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +from setuptools import find_packages, setup + +MAIN_REQUIREMENTS = [ + "airbyte-cdk", +] + +TEST_REQUIREMENTS = [ + "pytest~=6.1", + "pytest-mock~=3.6.1", + "source-acceptance-test", +] + +setup( + name="source_sentry", + description="Source implementation for Sentry.", + author="Airbyte", + author_email="contact@airbyte.io", + packages=find_packages(), + install_requires=MAIN_REQUIREMENTS, + package_data={"": ["*.json", "schemas/*.json", "schemas/shared/*.json"]}, + extras_require={ + "tests": TEST_REQUIREMENTS, + }, +) diff --git a/airbyte-integrations/connectors/source-sentry/source_sentry/__init__.py b/airbyte-integrations/connectors/source-sentry/source_sentry/__init__.py new file mode 100644 index 0000000000000..3435dba8e0726 --- /dev/null +++ b/airbyte-integrations/connectors/source-sentry/source_sentry/__init__.py @@ -0,0 +1,8 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +from .source import SourceSentry + +__all__ = ["SourceSentry"] diff --git a/airbyte-integrations/connectors/source-sentry/source_sentry/schemas/events.json b/airbyte-integrations/connectors/source-sentry/source_sentry/schemas/events.json new file mode 100644 index 0000000000000..59345c95f7d4e --- /dev/null +++ b/airbyte-integrations/connectors/source-sentry/source_sentry/schemas/events.json @@ -0,0 +1,72 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "eventID": { + "type": "string" + }, + "tags": { + "type": "array", + "items": { + "type": "object", + "properties": { + "value": { + "type": "string" + }, + "key": { + "type": "string" + } + } + } + }, + "dateCreated": { + "type": "string" + }, + "user": { + "type": ["null", "object"], + "properties": { + "username": { + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + }, + "ip_address": { + "type": ["null", "string"] + }, + "email": { + "type": ["null", "string"] + }, + "data": { + "type": ["null", "object"], + "properties": { + "isStaff": { + "type": "boolean" + } + } + }, + "id": { + "type": "string" + } + } + }, + "message": { + "type": "string" + }, + "id": { + "type": "string" + }, + "platform": { + "type": "string" + }, + "event.type": { + "type": "string" + }, + "groupID": { + "type": "string" + }, + "title": { + "type": "string" + } + } +} diff --git a/airbyte-integrations/connectors/source-sentry/source_sentry/schemas/issues.json b/airbyte-integrations/connectors/source-sentry/source_sentry/schemas/issues.json new file mode 100644 index 0000000000000..d4814ea21498b --- /dev/null +++ b/airbyte-integrations/connectors/source-sentry/source_sentry/schemas/issues.json @@ -0,0 +1,133 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "annotations": { + "type": "array", + "items": { + "type": "string" + } + }, + "assignedTo": { + "type": ["null", "object"] + }, + "count": { + "type": "string" + }, + "culprit": { + "type": "string" + }, + "firstSeen": { + "type": "string" + }, + "hasSeen": { + "type": "boolean" + }, + "id": { + "type": "string" + }, + "isBookmarked": { + "type": "boolean" + }, + "isPublic": { + "type": "boolean" + }, + "isSubscribed": { + "type": "boolean" + }, + "lastSeen": { + "type": "string" + }, + "level": { + "type": "string" + }, + "logger": { + "type": ["null", "string"] + }, + "metadata": { + "anyOf": [ + { + "type": "object", + "properties": { + "title": { + "type": "string" + } + } + }, + { + "type": "object", + "properties": { + "filename": { + "type": "string" + }, + "type": { + "type": "string" + }, + "value": { + "type": "string" + } + } + } + ] + }, + "numComments": { + "type": "integer" + }, + "permalink": { + "type": "string" + }, + "project": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "name": { + "type": "string" + }, + "slug": { + "type": "string" + } + } + }, + "shareId": { + "type": ["null", "string"] + }, + "shortId": { + "type": "string" + }, + "stats": { + "type": "object", + "properties": { + "24h": { + "type": "array", + "items": { + "type": "array", + "items": { + "type": "number" + } + } + } + } + }, + "status": { + "type": "string", + "enum": ["resolved", "unresolved", "ignored"] + }, + "statusDetails": { + "type": "object" + }, + "subscriptionDetails": { + "type": ["null", "object"] + }, + "title": { + "type": "string" + }, + "type": { + "type": "string" + }, + "userCount": { + "type": "integer" + } + } +} diff --git a/airbyte-integrations/connectors/source-sentry/source_sentry/schemas/project_detail.json b/airbyte-integrations/connectors/source-sentry/source_sentry/schemas/project_detail.json new file mode 100644 index 0000000000000..efb12e70ffbc8 --- /dev/null +++ b/airbyte-integrations/connectors/source-sentry/source_sentry/schemas/project_detail.json @@ -0,0 +1,402 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "allowedDomains": { + "type": "array", + "items": { + "type": "string" + } + }, + "avatar": { + "type": "object", + "properties": { + "avatarType": { + "type": "string" + }, + "avatarUuid": { + "type": ["null", "string"] + } + } + }, + "color": { + "type": "string" + }, + "dataScrubber": { + "type": "boolean" + }, + "dataScrubberDefaults": { + "type": "boolean" + }, + "dateCreated": { + "type": "string" + }, + "defaultEnvironment": { + "type": ["null", "string"] + }, + "digestsMaxDelay": { + "type": "integer" + }, + "digestsMinDelay": { + "type": "integer" + }, + "features": { + "type": "array", + "items": { + "type": "string" + } + }, + "firstEvent": { + "type": ["null", "string"] + }, + "hasAccess": { + "type": "boolean" + }, + "id": { + "type": "string" + }, + "isBookmarked": { + "type": "boolean" + }, + "isInternal": { + "type": "boolean" + }, + "isMember": { + "type": "boolean" + }, + "isPublic": { + "type": "boolean" + }, + "latestRelease": { + "type": ["null", "object"], + "properties": { + "authors": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "email": { + "type": "string" + } + } + } + }, + "commitCount": { + "type": "integer" + }, + "data": { + "type": "object" + }, + "dateCreated": { + "type": "string" + }, + "dateReleased": { + "type": ["null", "string"] + }, + "deployCount": { + "type": "integer" + }, + "firstEvent": { + "type": ["null", "string"] + }, + "lastCommit": { + "type": ["null", "object"] + }, + "lastDeploy": { + "type": ["null", "object"] + }, + "lastEvent": { + "type": ["null", "string"] + }, + "newGroups": { + "type": "integer" + }, + "owner": { + "type": ["null", "string"] + }, + "projects": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "slug": { + "type": "string" + } + } + } + }, + "ref": { + "type": ["null", "string"] + }, + "shortVersion": { + "type": "string" + }, + "url": { + "type": ["null", "string"] + }, + "version": { + "type": "string" + } + } + }, + "name": { + "type": "string" + }, + "options": { + "type": "object", + "properties": { + "feedback:branding": { + "type": "boolean" + }, + "filters:blacklisted_ips": { + "type": "string" + }, + "filters:error_messages": { + "type": "string" + }, + "filters:releases": { + "type": "string" + }, + "sentry:csp_ignored_sources": { + "type": "string" + }, + "sentry:csp_ignored_sources_defaults": { + "type": "boolean" + }, + "sentry:reprocessing_active": { + "type": "boolean" + } + } + }, + "organization": { + "type": "object", + "properties": { + "avatar": { + "type": "object", + "properties": { + "avatarType": { + "type": "string" + }, + "avatarUuid": { + "type": ["null", "string"] + } + } + }, + "dateCreated": { + "type": "string", + "format": "date-time" + }, + "id": { + "type": "string" + }, + "isEarlyAdopter": { + "type": "boolean" + }, + "name": { + "type": "string" + }, + "require2FA": { + "type": "boolean" + }, + "slug": { + "type": "string" + }, + "status": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "name": { + "type": "string" + } + } + } + } + }, + "platform": { + "type": ["null", "string"] + }, + "platforms": { + "type": "array", + "items": { + "type": "string" + } + }, + "plugins": { + "type": "array", + "items": { + "type": "object", + "properties": { + "assets": { + "type": "array", + "items": { + "type": "string" + } + }, + "author": { + "type": ["null", "object"], + "properties": { + "name": { + "type": "string" + }, + "url": { + "type": "string" + } + } + }, + "canDisable": { + "type": "boolean" + }, + "contexts": { + "type": "array", + "items": { + "type": "string" + } + }, + "description": { + "type": "string" + }, + "doc": { + "type": "string" + }, + "enabled": { + "type": "boolean" + }, + "hasConfiguration": { + "type": "boolean" + }, + "id": { + "type": "string" + }, + "isTestable": { + "type": "boolean" + }, + "metadata": { + "type": "object" + }, + "name": { + "type": "string" + }, + "resourceLinks": { + "type": ["null", "array"], + "items": { + "type": "object", + "properties": { + "title": { + "type": "string" + }, + "url": { + "type": "string" + } + } + } + }, + "shortName": { + "type": "string" + }, + "slug": { + "type": "string" + }, + "status": { + "type": "string" + }, + "type": { + "type": "string" + }, + "version": { + "type": ["null", "string"] + } + } + } + }, + "processingIssues": { + "type": "integer" + }, + "relayPiiConfig": { + "type": ["null", "string"] + }, + "resolveAge": { + "type": "integer" + }, + "safeFields": { + "type": "array", + "items": { + "type": "string" + } + }, + "scrapeJavaScript": { + "type": "boolean" + }, + "scrubIPAddresses": { + "type": "boolean" + }, + "securityToken": { + "type": "string" + }, + "securityTokenHeader": { + "type": ["null", "string"] + }, + "sensitiveFields": { + "type": "array", + "items": { + "type": "string" + } + }, + "slug": { + "type": "string" + }, + "status": { + "type": "string" + }, + "storeCrashReports": { + "type": ["null", "boolean"] + }, + "subjectPrefix": { + "type": "string" + }, + "subjectTemplate": { + "type": "string" + }, + "team": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "name": { + "type": "string" + }, + "slug": { + "type": "string" + } + } + }, + "teams": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "name": { + "type": "string" + }, + "slug": { + "type": "string" + } + } + } + }, + "verifySSL": { + "type": "boolean" + } + } +} diff --git a/airbyte-integrations/connectors/source-sentry/source_sentry/schemas/projects.json b/airbyte-integrations/connectors/source-sentry/source_sentry/schemas/projects.json new file mode 100644 index 0000000000000..3656b0b27c2a9 --- /dev/null +++ b/airbyte-integrations/connectors/source-sentry/source_sentry/schemas/projects.json @@ -0,0 +1,119 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "avatar": { + "type": "object", + "properties": { + "avatarType": { + "type": "string" + }, + "avatarUuid": { + "type": ["null", "string"] + } + } + }, + "color": { + "type": "string" + }, + "dateCreated": { + "type": "string", + "format": "date-time" + }, + "features": { + "type": "array", + "items": { + "type": "string" + } + }, + "firstEvent": { + "type": ["null", "string"] + }, + "hasAccess": { + "type": "boolean" + }, + "id": { + "type": "string" + }, + "isBookmarked": { + "type": "boolean" + }, + "isInternal": { + "type": "boolean" + }, + "isMember": { + "type": "boolean" + }, + "isPublic": { + "type": "boolean" + }, + "name": { + "type": "string" + }, + "organization": { + "type": "object", + "properties": { + "avatar": { + "type": "object", + "properties": { + "avatarType": { + "type": "string" + }, + "avatarUuid": { + "type": ["null", "string"] + } + } + }, + "dateCreated": { + "type": "string", + "format": "date-time" + }, + "id": { + "type": "string" + }, + "isEarlyAdopter": { + "type": "boolean" + }, + "name": { + "type": "string" + }, + "require2FA": { + "type": "boolean" + }, + "slug": { + "type": "string" + }, + "status": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "name": { + "type": "string" + } + } + }, + "requireEmailVerification": { + "type": "boolean" + }, + "features": { + "type": "array", + "items": { + "type": "string" + } + } + } + }, + "platform": { + "type": ["null", "string"] + }, + "slug": { + "type": "string" + }, + "status": { + "type": "string", + "enum": ["active", "disabled", "pending_deletion", "deletion_in_progress"] + } + } +} diff --git a/airbyte-integrations/connectors/source-sentry/source_sentry/source.py b/airbyte-integrations/connectors/source-sentry/source_sentry/source.py new file mode 100644 index 0000000000000..9eb192da02627 --- /dev/null +++ b/airbyte-integrations/connectors/source-sentry/source_sentry/source.py @@ -0,0 +1,44 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +from typing import Any, List, Mapping, Tuple + +from airbyte_cdk.models import SyncMode +from airbyte_cdk.sources import AbstractSource +from airbyte_cdk.sources.streams import Stream +from airbyte_cdk.sources.streams.http.auth import TokenAuthenticator + +from .streams import Events, Issues, ProjectDetail, Projects + + +# Source +class SourceSentry(AbstractSource): + def check_connection(self, logger, config) -> Tuple[bool, Any]: + try: + projects_stream = Projects( + authenticator=TokenAuthenticator(token=config["auth_token"]), + hostname=config.get("hostname"), + ) + next(projects_stream.read_records(sync_mode=SyncMode.full_refresh)) + return True, None + except Exception as e: + return False, e + + def streams(self, config: Mapping[str, Any]) -> List[Stream]: + stream_args = { + "authenticator": TokenAuthenticator(token=config["auth_token"]), + "hostname": config.get("hostname"), + } + project_stream_args = { + **stream_args, + "organization": config["organization"], + "project": config["project"], + } + return [ + Events(**project_stream_args), + Issues(**project_stream_args), + ProjectDetail(**project_stream_args), + Projects(**stream_args), + ] diff --git a/airbyte-integrations/connectors/source-sentry/source_sentry/spec.json b/airbyte-integrations/connectors/source-sentry/source_sentry/spec.json new file mode 100644 index 0000000000000..fbfb01e2fcf53 --- /dev/null +++ b/airbyte-integrations/connectors/source-sentry/source_sentry/spec.json @@ -0,0 +1,34 @@ +{ + "documentationUrl": "https://docs.airbyte.io/integrations/sources/sentry", + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Sentry Spec", + "type": "object", + "required": ["auth_token", "organization", "project"], + "additionalProperties": false, + "properties": { + "auth_token": { + "type": "string", + "title": "Authentication tokens", + "description": "Log into Sentry and then create authentication tokens.For self-hosted, you can find or create authentication tokens by visiting \"{instance_url_prefix}/settings/account/api/auth-tokens/\"", + "airbyte_secret": true + }, + "hostname": { + "type": "string", + "title": "Host Name", + "description": "Host name of Sentry API server.For self-hosted, specify your host name here. Otherwise, leave it empty.", + "default": "sentry.io" + }, + "organization": { + "type": "string", + "title": "Organization", + "description": "The slug of the organization the groups belong to." + }, + "project": { + "type": "string", + "title": "Project", + "description": "The slug of the project the groups belong to." + } + } + } +} diff --git a/airbyte-integrations/connectors/source-sentry/source_sentry/streams.py b/airbyte-integrations/connectors/source-sentry/source_sentry/streams.py new file mode 100644 index 0000000000000..66ab436156f8a --- /dev/null +++ b/airbyte-integrations/connectors/source-sentry/source_sentry/streams.py @@ -0,0 +1,158 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +from abc import ABC +from typing import Any, Iterable, Mapping, MutableMapping, Optional + +import requests +from airbyte_cdk.sources.streams.http import HttpStream + + +class SentryStream(HttpStream, ABC): + API_VERSION = "0" + URL_TEMPLATE = "https://{hostname}/api/{api_version}/" + primary_key = "id" + + def __init__(self, hostname: str, **kwargs): + super().__init__(**kwargs) + self._url_base = self.URL_TEMPLATE.format(hostname=hostname, api_version=self.API_VERSION) + + @property + def url_base(self) -> str: + return self._url_base + + def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: + return None + + def request_params( + self, + stream_state: Mapping[str, Any], + stream_slice: Optional[Mapping[str, Any]] = None, + next_page_token: Optional[Mapping[str, Any]] = None, + ) -> MutableMapping[str, Any]: + return {} + + +class SentryStreamPagination(SentryStream): + def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: + """ + Expect the link header field to always contain the values ​​for `rel`, `results`, and `cursor`. + If there is actually the next page, rel="next"; results="true"; cursor="". + """ + if response.links["next"]["results"] == "true": + return {"cursor": response.links["next"]["cursor"]} + else: + return None + + def request_params( + self, + stream_state: Mapping[str, Any], + stream_slice: Optional[Mapping[str, Any]] = None, + next_page_token: Optional[Mapping[str, Any]] = None, + ) -> MutableMapping[str, Any]: + params = super().request_params(stream_state, stream_slice, next_page_token) + if next_page_token: + params.update(next_page_token) + + return params + + def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: + yield from response.json() + + +class Events(SentryStreamPagination): + """ + Docs: https://docs.sentry.io/api/events/list-a-projects-events/ + """ + + def __init__(self, organization: str, project: str, **kwargs): + super().__init__(**kwargs) + self._organization = organization + self._project = project + + def path( + self, + stream_state: Optional[Mapping[str, Any]] = None, + stream_slice: Optional[Mapping[str, Any]] = None, + next_page_token: Optional[Mapping[str, Any]] = None, + ) -> str: + return f"projects/{self._organization}/{self._project}/events/" + + def request_params( + self, + stream_state: Mapping[str, Any], + stream_slice: Optional[Mapping[str, Any]] = None, + next_page_token: Optional[Mapping[str, Any]] = None, + ) -> MutableMapping[str, Any]: + params = super().request_params(stream_state, stream_slice, next_page_token) + params.update({"full": "true"}) + + return params + + +class Issues(SentryStreamPagination): + """ + Docs: https://docs.sentry.io/api/events/list-a-projects-issues/ + """ + + def __init__(self, organization: str, project: str, **kwargs): + super().__init__(**kwargs) + self._organization = organization + self._project = project + + def path( + self, + stream_state: Optional[Mapping[str, Any]] = None, + stream_slice: Optional[Mapping[str, Any]] = None, + next_page_token: Optional[Mapping[str, Any]] = None, + ) -> str: + return f"projects/{self._organization}/{self._project}/issues/" + + def request_params( + self, + stream_state: Mapping[str, Any], + stream_slice: Optional[Mapping[str, Any]] = None, + next_page_token: Optional[Mapping[str, Any]] = None, + ) -> MutableMapping[str, Any]: + params = super().request_params(stream_state, stream_slice, next_page_token) + params.update({"statsPeriod": "", "query": ""}) + + return params + + +class Projects(SentryStreamPagination): + """ + Docs: https://docs.sentry.io/api/projects/list-your-projects/ + """ + + def path( + self, + stream_state: Optional[Mapping[str, Any]] = None, + stream_slice: Optional[Mapping[str, Any]] = None, + next_page_token: Optional[Mapping[str, Any]] = None, + ) -> str: + return "projects/" + + +class ProjectDetail(SentryStream): + """ + Docs: https://docs.sentry.io/api/projects/retrieve-a-project/ + """ + + def __init__(self, organization: str, project: str, **kwargs): + super().__init__(**kwargs) + self._organization = organization + self._project = project + + def path( + self, + stream_state: Optional[Mapping[str, Any]] = None, + stream_slice: Optional[Mapping[str, Any]] = None, + next_page_token: Optional[Mapping[str, Any]] = None, + ) -> str: + return f"projects/{self._organization}/{self._project}/" + + def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: + yield response.json() diff --git a/airbyte-integrations/connectors/source-sentry/unit_tests/__init__.py b/airbyte-integrations/connectors/source-sentry/unit_tests/__init__.py new file mode 100644 index 0000000000000..46b7376756ec6 --- /dev/null +++ b/airbyte-integrations/connectors/source-sentry/unit_tests/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# diff --git a/airbyte-integrations/connectors/source-sentry/unit_tests/test_source.py b/airbyte-integrations/connectors/source-sentry/unit_tests/test_source.py new file mode 100644 index 0000000000000..03cc8a1144f24 --- /dev/null +++ b/airbyte-integrations/connectors/source-sentry/unit_tests/test_source.py @@ -0,0 +1,26 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + +from unittest.mock import MagicMock + +from source_sentry.source import SourceSentry +from source_sentry.streams import Projects + + +def test_check_connection(mocker): + source = SourceSentry() + logger_mock, config_mock = MagicMock(), MagicMock() + mocker.patch.object(Projects, "read_records", return_value=iter([{"id": "1", "name": "test"}])) + assert source.check_connection(logger_mock, config_mock) == (True, None) + + +def test_streams(mocker): + source = SourceSentry() + config_mock = MagicMock() + config_mock["auth_token"] = "test-token" + config_mock["organization"] = "test-organization" + config_mock["project"] = "test-project" + streams = source.streams(config_mock) + expected_streams_number = 4 + assert len(streams) == expected_streams_number diff --git a/airbyte-integrations/connectors/source-sentry/unit_tests/test_streams.py b/airbyte-integrations/connectors/source-sentry/unit_tests/test_streams.py new file mode 100644 index 0000000000000..2417953ac0400 --- /dev/null +++ b/airbyte-integrations/connectors/source-sentry/unit_tests/test_streams.py @@ -0,0 +1,110 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + +from unittest.mock import MagicMock + +import pytest +from source_sentry.streams import Events, Issues, ProjectDetail, Projects, SentryStreamPagination + +INIT_ARGS = {"hostname": "sentry.io", "organization": "test-org", "project": "test-project"} + + +@pytest.fixture +def patch_base_class(mocker): + # Mock abstract methods to enable instantiating abstract class + mocker.patch.object(SentryStreamPagination, "path", "test_endpoint") + mocker.patch.object(SentryStreamPagination, "__abstractmethods__", set()) + + +def test_next_page_token(patch_base_class): + stream = SentryStreamPagination(hostname="sentry.io") + resp = MagicMock() + cursor = "next_page_num" + resp.links = {"next": {"results": "true", "cursor": cursor}} + inputs = {"response": resp} + expected_token = {"cursor": cursor} + assert stream.next_page_token(**inputs) == expected_token + + +def test_next_page_token_is_none(patch_base_class): + stream = SentryStreamPagination(hostname="sentry.io") + resp = MagicMock() + resp.links = {"next": {"results": "false", "cursor": "no_next"}} + inputs = {"response": resp} + expected_token = None + assert stream.next_page_token(**inputs) == expected_token + + +def next_page_token_inputs(): + links_headers = [ + {}, + {"next": {}}, + ] + responses = [MagicMock() for _ in links_headers] + for mock, header in zip(responses, links_headers): + mock.links = header + + return responses + + +@pytest.mark.parametrize("response", next_page_token_inputs()) +def test_next_page_token_raises(patch_base_class, response): + stream = SentryStreamPagination(hostname="sentry.io") + inputs = {"response": response} + with pytest.raises(KeyError): + stream.next_page_token(**inputs) + + +def test_events_path(): + stream = Events(**INIT_ARGS) + expected = "projects/test-org/test-project/events/" + assert stream.path() == expected + + +def test_issues_path(): + stream = Issues(**INIT_ARGS) + expected = "projects/test-org/test-project/issues/" + assert stream.path() == expected + + +def test_projects_path(): + stream = Projects(hostname="sentry.io") + expected = "projects/" + assert stream.path() == expected + + +def test_project_detail_path(): + stream = ProjectDetail(**INIT_ARGS) + expected = "projects/test-org/test-project/" + assert stream.path() == expected + + +def test_sentry_stream_pagination_request_params(patch_base_class): + stream = SentryStreamPagination(hostname="sentry.io") + expected = {"cursor": "next-page"} + assert stream.request_params(stream_state=None, next_page_token={"cursor": "next-page"}) == expected + + +def test_events_request_params(): + stream = Events(**INIT_ARGS) + expected = {"cursor": "next-page", "full": "true"} + assert stream.request_params(stream_state=None, next_page_token={"cursor": "next-page"}) == expected + + +def test_issues_request_params(): + stream = Issues(**INIT_ARGS) + expected = {"cursor": "next-page", "statsPeriod": "", "query": ""} + assert stream.request_params(stream_state=None, next_page_token={"cursor": "next-page"}) == expected + + +def test_projects_request_params(): + stream = Projects(hostname="sentry.io") + expected = {"cursor": "next-page"} + assert stream.request_params(stream_state=None, next_page_token={"cursor": "next-page"}) == expected + + +def test_project_detail_request_params(): + stream = ProjectDetail(**INIT_ARGS) + expected = {} + assert stream.request_params(stream_state=None, next_page_token=None) == expected diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 8309718c31801..a4da56377cf34 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -112,6 +112,7 @@ * [Salesforce](integrations/sources/salesforce.md) * [SalesLoft](integrations/sources/salesloft.md) * [Sendgrid](integrations/sources/sendgrid.md) + * [Sentry](integrations/sources/sentry.md) * [Shopify](integrations/sources/shopify.md) * [Shortio](integrations/sources/shortio.md) * [Slack](integrations/sources/slack.md) diff --git a/docs/integrations/README.md b/docs/integrations/README.md index 4b6086e77f16d..c3c783e0dcbea 100644 --- a/docs/integrations/README.md +++ b/docs/integrations/README.md @@ -96,6 +96,7 @@ Airbyte uses a grading system for connectors to help users understand what to ex | [Salesloft](./sources/salesloft.md)| Alpha | | [SAP Business One](sources/sap-business-one.md) | Beta | | [Sendgrid](sources/sendgrid.md) | Certified | +| [Sentry](sources/sentry.md) | Alpha | | [Shopify](sources/shopify.md) | Certified | | [Short.io](sources/shortio.md) | Beta | | [Slack](sources/slack.md) | Beta | diff --git a/docs/integrations/sources/sentry.md b/docs/integrations/sources/sentry.md new file mode 100644 index 0000000000000..d0e07ac29af21 --- /dev/null +++ b/docs/integrations/sources/sentry.md @@ -0,0 +1,49 @@ +# Sentry + +## Sync overview + +This source can sync data for the [Sentry API](https://docs.sentry.io/api/). It supports only Full Refresh syncs. + +### Output schema + +This Source is capable of syncing the following Streams: + +* [Events](https://docs.sentry.io/api/events/list-a-projects-events/) +* [Issues](https://docs.sentry.io/api/events/list-a-projects-issues/) + +### Data type mapping + +| Integration Type | Airbyte Type | Notes | +| :--- | :--- | :--- | +| `string` | `string` | | +| `integer`, `number` | `number` | | +| `array` | `array` | | +| `object` | `object` | | + +### Features + +| Feature | Supported?\(Yes/No\) | Notes | +| :--- | :--- | :--- | +| Full Refresh Sync | Yes | | +| Incremental Sync | No | | +| SSL connection | Yes | +| Namespaces | No | | + +## Getting started + +### Requirements + +* `auth_token` - Sentry Authentication Token with the necessary permissions \(described below\) +* `organization` - Organization Slug. You can check it at https://sentry.io/settings// +* `project` - The name of the Project you wanto sync. You can list it from https://sentry.io/settings//projects/ +* `hostname` - Host name of Sentry API server. For self-hosted, specify your host name here. Otherwise, leave it empty. \(default: sentry.io\) + +### Setup guide + +You can find or create authentication tokens within [Sentry](https://sentry.io/settings/account/api/auth-tokens/). + +## Changelog + +| Version | Date | Pull Request | Subject | +| :--- | :--- | :--- | :--- | +| 0.1.0 | 2021-10-12 | [6975](https://github.com/airbytehq/airbyte/pull/6975) | New Source: Sentry | From 8ddf686e9e39c8b49b115c06272fff452b758306 Mon Sep 17 00:00:00 2001 From: Harshith Mullapudi Date: Tue, 2 Nov 2021 19:55:58 +0530 Subject: [PATCH 02/83] added credentials to CI for new source sentry * add credentials to CI for new source sentry * add ci cred --- .github/workflows/publish-command.yml | 1 + .github/workflows/test-command.yml | 1 + tools/bin/ci_credentials.sh | 1 + 3 files changed, 3 insertions(+) diff --git a/.github/workflows/publish-command.yml b/.github/workflows/publish-command.yml index 79f3f90f128bf..40a166ff64418 100644 --- a/.github/workflows/publish-command.yml +++ b/.github/workflows/publish-command.yml @@ -198,6 +198,7 @@ jobs: SOURCE_PAYSTACK_TEST_CREDS: ${{ secrets.SOURCE_PAYSTACK_TEST_CREDS }} SOURCE_DELIGHTED_TEST_CREDS: ${{ secrets.SOURCE_DELIGHTED_TEST_CREDS }} SOURCE_RETENTLY_TEST_CREDS: ${{ secrets.SOURCE_RETENTLY_TEST_CREDS }} + SOURCE_SENTRY_TEST_CREDS: ${{ secrets.SOURCE_SENTRY_TEST_CREDS }} - run: | echo "$SPEC_CACHE_SERVICE_ACCOUNT_KEY" > spec_cache_key_file.json && docker login -u airbytebot -p ${DOCKER_PASSWORD} ./tools/integrations/manage.sh publish airbyte-integrations/${{ github.event.inputs.connector }} ${{ github.event.inputs.run-tests }} --publish_spec_to_cache diff --git a/.github/workflows/test-command.yml b/.github/workflows/test-command.yml index 4e984b646c11c..674d8657388c1 100644 --- a/.github/workflows/test-command.yml +++ b/.github/workflows/test-command.yml @@ -193,6 +193,7 @@ jobs: SOURCE_PAYSTACK_TEST_CREDS: ${{ secrets.SOURCE_PAYSTACK_TEST_CREDS }} SOURCE_DELIGHTED_TEST_CREDS: ${{ secrets.SOURCE_DELIGHTED_TEST_CREDS }} SOURCE_RETENTLY_TEST_CREDS: ${{ secrets.SOURCE_RETENTLY_TEST_CREDS }} + SOURCE_SENTRY_TEST_CREDS: ${{ secrets.SOURCE_SENTRY_TEST_CREDS }} - run: | ./tools/bin/ci_integration_test.sh ${{ github.event.inputs.connector }} name: test ${{ github.event.inputs.connector }} diff --git a/tools/bin/ci_credentials.sh b/tools/bin/ci_credentials.sh index 1a69179826e8b..9b1dd92577a4f 100755 --- a/tools/bin/ci_credentials.sh +++ b/tools/bin/ci_credentials.sh @@ -143,6 +143,7 @@ write_standard_creds source-snowflake "$SNOWFLAKE_INTEGRATION_TEST_CREDS" "confi write_standard_creds source-square "$SOURCE_SQUARE_CREDS" write_standard_creds source-strava "$SOURCE_STRAVA_TEST_CREDS" write_standard_creds source-paystack "$SOURCE_PAYSTACK_TEST_CREDS" +write_standard_creds source-sentry "$SOURCE_SENTRY_TEST_CREDS" write_standard_creds source-stripe "$SOURCE_STRIPE_CREDS" write_standard_creds source-stripe "$STRIPE_INTEGRATION_CONNECTED_ACCOUNT_TEST_CREDS" "connected_account_config.json" write_standard_creds source-surveymonkey "$SURVEYMONKEY_TEST_CREDS" From 3928c0c7e7739cc59ef1229421ed86b0c022b666 Mon Sep 17 00:00:00 2001 From: "Sherif A. Nada" Date: Tue, 2 Nov 2021 08:00:26 -0700 Subject: [PATCH 03/83] Delete .vscode directory --- .vscode/settings.json | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index 4f81299a37cfc..0000000000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "java.configuration.updateBuildConfiguration": "automatic" -} From fd388139fc04eb9a76d9e5b741b0465231ae052c Mon Sep 17 00:00:00 2001 From: Harshith Mullapudi Date: Tue, 2 Nov 2021 21:13:29 +0530 Subject: [PATCH 04/83] fix: onesignal is duplicated (#7565) --- .../init/src/main/resources/seed/source_definitions.yaml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 47ffb0f36f3d6..51b1341af1b05 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -601,11 +601,6 @@ dockerRepository: airbyte/source-sentry dockerImageTag: 0.1.0 documentationUrl: https://docs.airbyte.io/integrations/sources/sentry -- sourceDefinitionId: bb6afd81-87d5-47e3-97c4-e2c2901b1cf8 - name: OneSignal - dockerRepository: airbyte/source-onesignal - dockerImageTag: 0.1.0 - documentationUrl: https://docs.airbyte.io/integrations/sources/lever-onesignal - name: Zoom sourceDefinitionId: aea2fd0d-377d-465e-86c0-4fdc4f688e51 dockerRepository: airbyte/source-zoom-singer From 7cf31ea50cf02fb91deb4d93c344daf8d93601d1 Mon Sep 17 00:00:00 2001 From: oneshcheret <33333155+sashaNeshcheret@users.noreply.github.com> Date: Tue, 2 Nov 2021 17:46:42 +0200 Subject: [PATCH 05/83] =?UTF-8?q?=F0=9F=90=9B=20Fix=20data=20type=20tests?= =?UTF-8?q?=20in=20CdcPostgresSourceDatatypeTest=20(#7339)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Fix data type tests in CdcPostgresSourceComprehensiveTest * update style format * bump version for postgres source * bump version for postgres source in json definition * remove unnecessary comments from test and bump version for postgres strict encrypt source * resolved potential conflicts with debezium utils in mssql converter implementation * resolved potential conflicts with debezium utils in mssql converter implementation * Update notes for money type in postgres.md Co-authored-by: Sherif A. Nada * Update docs/integrations/sources/postgres.md Co-authored-by: Sherif A. Nada * added test cases for converting data values for postgres cdc, remove time zone utc from test container * remove redundant void message from test Co-authored-by: Sherif A. Nada * update style format * fix time zone in DebeziumConverterUtilsTest * set utc time zone in DataTypeUtils * set utc time zone for date format * revert changes regarding timezone in date format, disable tests with number and duration Co-authored-by: Sherif A. Nada --- .../decd338e-5647-4c0b-adf4-da0e75f5a750.json | 2 +- .../resources/seed/source_definitions.yaml | 2 +- .../internals/DebeziumConverterUtils.java | 66 ++++ .../debezium/internals/MySQLConverter.java | 49 +-- .../debezium/internals/PostgresConverter.java | 132 ++++++++ .../internals/DebeziumConverterUtilsTest.java | 102 ++++++ .../source-postgres-strict-encrypt/Dockerfile | 2 +- .../connectors/source-postgres/Dockerfile | 2 +- .../postgres/PostgresCdcProperties.java | 3 + .../CdcPostgresSourceAcceptanceTest.java | 1 + .../CdcPostgresSourceDatatypeTest.java | 319 ++++++++---------- docs/integrations/sources/postgres.md | 3 +- 12 files changed, 463 insertions(+), 220 deletions(-) create mode 100644 airbyte-integrations/bases/debezium/src/main/java/io/airbyte/integrations/debezium/internals/DebeziumConverterUtils.java create mode 100644 airbyte-integrations/bases/debezium/src/main/java/io/airbyte/integrations/debezium/internals/PostgresConverter.java create mode 100644 airbyte-integrations/bases/debezium/src/test/java/io/airbyte/integrations/debezium/internals/DebeziumConverterUtilsTest.java diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/decd338e-5647-4c0b-adf4-da0e75f5a750.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/decd338e-5647-4c0b-adf4-da0e75f5a750.json index 1a952019a996f..2fd2cdf5d5d98 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/decd338e-5647-4c0b-adf4-da0e75f5a750.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/decd338e-5647-4c0b-adf4-da0e75f5a750.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "decd338e-5647-4c0b-adf4-da0e75f5a750", "name": "Postgres", "dockerRepository": "airbyte/source-postgres", - "dockerImageTag": "0.3.9", + "dockerImageTag": "0.3.13", "documentationUrl": "https://docs.airbyte.io/integrations/sources/postgres", "icon": "postgresql.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 51b1341af1b05..9915573cc9337 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -405,7 +405,7 @@ - name: Postgres sourceDefinitionId: decd338e-5647-4c0b-adf4-da0e75f5a750 dockerRepository: airbyte/source-postgres - dockerImageTag: 0.3.11 + dockerImageTag: 0.3.13 documentationUrl: https://docs.airbyte.io/integrations/sources/postgres icon: postgresql.svg sourceType: database diff --git a/airbyte-integrations/bases/debezium/src/main/java/io/airbyte/integrations/debezium/internals/DebeziumConverterUtils.java b/airbyte-integrations/bases/debezium/src/main/java/io/airbyte/integrations/debezium/internals/DebeziumConverterUtils.java new file mode 100644 index 0000000000000..a97694727ed3d --- /dev/null +++ b/airbyte-integrations/bases/debezium/src/main/java/io/airbyte/integrations/debezium/internals/DebeziumConverterUtils.java @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.debezium.internals; + +import io.airbyte.db.DataTypeUtils; +import io.debezium.spi.converter.RelationalColumn; +import java.sql.Timestamp; +import java.time.Duration; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.format.DateTimeParseException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public final class DebeziumConverterUtils { + + private static final Logger LOGGER = LoggerFactory.getLogger(DebeziumConverterUtils.class); + + private DebeziumConverterUtils() { + throw new UnsupportedOperationException(); + } + + public static String convertDate(final Object input) { + /** + * While building this custom converter we were not sure what type debezium could return cause there + * is no mention of it in the documentation. Secondly if you take a look at + * {@link io.debezium.connector.mysql.converters.TinyIntOneToBooleanConverter#converterFor(io.debezium.spi.converter.RelationalColumn, io.debezium.spi.converter.CustomConverter.ConverterRegistration)} + * method, even it is handling multiple data types but its not clear under what circumstances which + * data type would be returned. I just went ahead and handled the data types that made sense. + * Secondly, we use LocalDateTime to handle this cause it represents DATETIME datatype in JAVA + */ + if (input instanceof LocalDateTime) { + return DataTypeUtils.toISO8601String((LocalDateTime) input); + } else if (input instanceof LocalDate) { + return DataTypeUtils.toISO8601String((LocalDate) input); + } else if (input instanceof Duration) { + return DataTypeUtils.toISO8601String((Duration) input); + } else if (input instanceof Timestamp) { + return DataTypeUtils.toISO8601String(((Timestamp) input).toLocalDateTime()); + } else if (input instanceof Number) { + return DataTypeUtils.toISO8601String( + new Timestamp(((Number) input).longValue()).toLocalDateTime()); + } else if (input instanceof String) { + try { + return LocalDateTime.parse((String) input).toString(); + } catch (final DateTimeParseException e) { + LOGGER.warn("Cannot convert value '{}' to LocalDateTime type", input); + return input.toString(); + } + } + LOGGER.warn("Uncovered date class type '{}'. Use default converter", input.getClass().getName()); + return input.toString(); + } + + public static Object convertDefaultValue(RelationalColumn field) { + if (field.isOptional()) { + return null; + } else if (field.hasDefaultValue()) { + return field.defaultValue(); + } + return null; + } + +} diff --git a/airbyte-integrations/bases/debezium/src/main/java/io/airbyte/integrations/debezium/internals/MySQLConverter.java b/airbyte-integrations/bases/debezium/src/main/java/io/airbyte/integrations/debezium/internals/MySQLConverter.java index 0b80d39c9c0ab..24af44c5af1ab 100644 --- a/airbyte-integrations/bases/debezium/src/main/java/io/airbyte/integrations/debezium/internals/MySQLConverter.java +++ b/airbyte-integrations/bases/debezium/src/main/java/io/airbyte/integrations/debezium/internals/MySQLConverter.java @@ -4,14 +4,8 @@ package io.airbyte.integrations.debezium.internals; -import io.airbyte.db.DataTypeUtils; import io.debezium.spi.converter.CustomConverter; import io.debezium.spi.converter.RelationalColumn; -import java.sql.Timestamp; -import java.time.Duration; -import java.time.LocalDate; -import java.time.LocalDateTime; -import java.time.format.DateTimeParseException; import java.util.Arrays; import java.util.Properties; import org.apache.kafka.connect.data.SchemaBuilder; @@ -61,50 +55,15 @@ private void registerText(final RelationalColumn field, final ConverterRegistrat if (x instanceof byte[]) { return new String((byte[]) x); - } else + } else { return x.toString(); + } }); } private void registerDate(final RelationalColumn field, final ConverterRegistration registration) { - registration.register(SchemaBuilder.string(), x -> { - if (x == null) { - if (field.isOptional()) { - return null; - } else if (field.hasDefaultValue()) { - return field.defaultValue(); - } - return null; - } - /** - * While building this custom converter we were not sure what type debezium could return cause there - * is no mention of it in the documentation. Secondly if you take a look at - * {@link io.debezium.connector.mysql.converters.TinyIntOneToBooleanConverter#converterFor(RelationalColumn, ConverterRegistration)} - * method, even it is handling multiple data types but its not clear under what circumstances which - * data type would be returned. I just went ahead and handled the data types that made sense. - * Secondly, we use LocalDateTime to handle this cause it represents DATETIME datatype in JAVA - */ - if (x instanceof LocalDateTime) { - return DataTypeUtils.toISO8601String((LocalDateTime) x); - } else if (x instanceof LocalDate) { - return DataTypeUtils.toISO8601String((LocalDate) x); - } else if (x instanceof Duration) { - return DataTypeUtils.toISO8601String((Duration) x); - } else if (x instanceof Timestamp) { - return DataTypeUtils.toISO8601String(((Timestamp) x).toLocalDateTime()); - } else if (x instanceof Number) { - return DataTypeUtils.toISO8601String(new Timestamp(((Number) x).longValue()).toLocalDateTime()); - } else if (x instanceof String) { - try { - return LocalDateTime.parse((String) x).toString(); - } catch (final DateTimeParseException e) { - LOGGER.warn("Cannot convert value '{}' to LocalDateTime type", x); - return x.toString(); - } - } - LOGGER.warn("Uncovered date class type '{}'. Use default converter", x.getClass().getName()); - return x.toString(); - }); + registration.register(SchemaBuilder.string(), + x -> x == null ? DebeziumConverterUtils.convertDefaultValue(field) : DebeziumConverterUtils.convertDate(x)); } } diff --git a/airbyte-integrations/bases/debezium/src/main/java/io/airbyte/integrations/debezium/internals/PostgresConverter.java b/airbyte-integrations/bases/debezium/src/main/java/io/airbyte/integrations/debezium/internals/PostgresConverter.java new file mode 100644 index 0000000000000..dc45ee017e47b --- /dev/null +++ b/airbyte-integrations/bases/debezium/src/main/java/io/airbyte/integrations/debezium/internals/PostgresConverter.java @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.debezium.internals; + +import io.debezium.spi.converter.CustomConverter; +import io.debezium.spi.converter.RelationalColumn; +import java.math.BigDecimal; +import java.util.Arrays; +import java.util.Properties; +import org.apache.kafka.connect.data.SchemaBuilder; +import org.postgresql.util.PGInterval; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class PostgresConverter implements CustomConverter { + + private static final Logger LOGGER = LoggerFactory.getLogger(PostgresConverter.class); + + private final String[] DATE_TYPES = {"DATE", "DATETIME", "TIME", "TIMETZ", "INTERVAL", "TIMESTAMP"}; + private final String[] BIT_TYPES = {"BIT", "VARBIT"}; + private final String[] MONEY_ITEM_TYPE = {"MONEY"}; + private final String[] GEOMETRICS_TYPES = {"BOX", "CIRCLE", "LINE", "LSEG", "POINT", "POLYGON", "PATH"}; + private final String[] TEXT_TYPES = {"VARCHAR", "VARBINARY", "BLOB", "TEXT", "LONGTEXT", "TINYTEXT", "MEDIUMTEXT", "INVENTORY_ITEM", "TSVECTOR"}; + + @Override + public void configure(Properties props) {} + + @Override + public void converterFor(RelationalColumn field, ConverterRegistration registration) { + if (Arrays.stream(DATE_TYPES).anyMatch(s -> s.equalsIgnoreCase(field.typeName()))) { + registerDate(field, registration); + } else if (Arrays.stream(TEXT_TYPES).anyMatch(s -> s.equalsIgnoreCase(field.typeName())) + || Arrays.stream(GEOMETRICS_TYPES).anyMatch(s -> s.equalsIgnoreCase(field.typeName())) + || Arrays.stream(BIT_TYPES).anyMatch(s -> s.equalsIgnoreCase(field.typeName()))) { + registerText(field, registration); + } else if (Arrays.stream(MONEY_ITEM_TYPE).anyMatch(s -> s.equalsIgnoreCase(field.typeName()))) { + registerMoney(field, registration); + } + } + + private void registerText(RelationalColumn field, ConverterRegistration registration) { + registration.register(SchemaBuilder.string(), x -> { + if (x == null) { + return DebeziumConverterUtils.convertDefaultValue(field); + } + + if (x instanceof byte[]) { + return new String((byte[]) x); + } else { + return x.toString(); + } + }); + } + + private void registerDate(RelationalColumn field, ConverterRegistration registration) { + registration.register(SchemaBuilder.string(), x -> { + if (x == null) { + return DebeziumConverterUtils.convertDefaultValue(field); + } else if (x instanceof PGInterval) { + return convertInterval((PGInterval) x); + } else { + return DebeziumConverterUtils.convertDate(x); + } + }); + } + + private String convertInterval(PGInterval pgInterval) { + StringBuilder resultInterval = new StringBuilder(); + formatDateUnit(resultInterval, pgInterval.getYears(), " year "); + formatDateUnit(resultInterval, pgInterval.getMonths(), " mons "); + formatDateUnit(resultInterval, pgInterval.getDays(), " days "); + + formatTimeValues(resultInterval, pgInterval); + return resultInterval.toString(); + } + + private void registerMoney(RelationalColumn field, ConverterRegistration registration) { + registration.register(SchemaBuilder.string(), x -> { + if (x == null) { + return DebeziumConverterUtils.convertDefaultValue(field); + } else if (x instanceof Double) { + BigDecimal result = BigDecimal.valueOf((Double) x); + if (result.compareTo(new BigDecimal("999999999999999")) == 1 + || result.compareTo(new BigDecimal("-999999999999999")) == -1) { + return null; + } + return result.toString(); + } else { + return x.toString(); + } + }); + } + + private void formatDateUnit(StringBuilder resultInterval, int dateUnit, String s) { + if (dateUnit != 0) { + resultInterval + .append(dateUnit) + .append(s); + } + } + + private void formatTimeValues(StringBuilder resultInterval, PGInterval pgInterval) { + if (isNegativeTime(pgInterval)) { + resultInterval.append("-"); + } + // TODO check if value more or less than Integer.MIN_VALUE Integer.MAX_VALUE, + int hours = Math.abs(pgInterval.getHours()); + int minutes = Math.abs(pgInterval.getMinutes()); + int seconds = Math.abs(pgInterval.getWholeSeconds()); + resultInterval.append(addFirstDigit(hours)); + resultInterval.append(hours); + resultInterval.append(":"); + resultInterval.append(addFirstDigit(minutes)); + resultInterval.append(minutes); + resultInterval.append(":"); + resultInterval.append(addFirstDigit(seconds)); + resultInterval.append(seconds); + } + + private String addFirstDigit(int hours) { + return hours <= 9 ? "0" : ""; + } + + private boolean isNegativeTime(PGInterval pgInterval) { + return pgInterval.getHours() < 0 + || pgInterval.getMinutes() < 0 + || pgInterval.getWholeSeconds() < 0; + } + +} diff --git a/airbyte-integrations/bases/debezium/src/test/java/io/airbyte/integrations/debezium/internals/DebeziumConverterUtilsTest.java b/airbyte-integrations/bases/debezium/src/test/java/io/airbyte/integrations/debezium/internals/DebeziumConverterUtilsTest.java new file mode 100644 index 0000000000000..a82990a2c6bb6 --- /dev/null +++ b/airbyte-integrations/bases/debezium/src/test/java/io/airbyte/integrations/debezium/internals/DebeziumConverterUtilsTest.java @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.debezium.internals; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import io.debezium.spi.converter.RelationalColumn; +import java.sql.Timestamp; +import java.time.Duration; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.LocalTime; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; + +class DebeziumConverterUtilsTest { + + @Test + public void convertDefaultValueTest() { + + RelationalColumn relationalColumn = mock(RelationalColumn.class); + + when(relationalColumn.isOptional()).thenReturn(true); + Object actualColumnDefaultValue = DebeziumConverterUtils.convertDefaultValue(relationalColumn); + Assertions.assertNull(actualColumnDefaultValue, "Default value for optional relational column should be null"); + + when(relationalColumn.isOptional()).thenReturn(false); + when(relationalColumn.hasDefaultValue()).thenReturn(false); + actualColumnDefaultValue = DebeziumConverterUtils.convertDefaultValue(relationalColumn); + Assertions.assertNull(actualColumnDefaultValue); + + when(relationalColumn.isOptional()).thenReturn(false); + when(relationalColumn.hasDefaultValue()).thenReturn(true); + String expectedColumnDefaultValue = "default value"; + when(relationalColumn.defaultValue()).thenReturn(expectedColumnDefaultValue); + actualColumnDefaultValue = DebeziumConverterUtils.convertDefaultValue(relationalColumn); + Assertions.assertEquals(actualColumnDefaultValue, expectedColumnDefaultValue); + } + + @Test + public void convertLocalDate() { + LocalDate localDate = LocalDate.of(2021, 1, 1); + + String actual = DebeziumConverterUtils.convertDate(localDate); + Assertions.assertEquals("2021-01-01T00:00:00Z", actual); + } + + @Test + public void convertTLocalTime() { + LocalTime localTime = LocalTime.of(8, 1, 1); + String actual = DebeziumConverterUtils.convertDate(localTime); + Assertions.assertEquals("08:01:01", actual); + } + + @Test + public void convertLocalDateTime() { + LocalDateTime localDateTime = LocalDateTime.of(2021, 1, 1, 8, 1, 1); + + String actual = DebeziumConverterUtils.convertDate(localDateTime); + Assertions.assertEquals("2021-01-01T08:01:01Z", actual); + } + + @Test + @Disabled + public void convertDuration() { + Duration duration = Duration.ofHours(100_000); + + String actual = DebeziumConverterUtils.convertDate(duration); + Assertions.assertEquals("1981-05-29T20:00:00Z", actual); + } + + @Test + public void convertTimestamp() { + LocalDateTime localDateTime = LocalDateTime.of(2021, 1, 1, 8, 1, 1); + Timestamp timestamp = Timestamp.valueOf(localDateTime); + + String actual = DebeziumConverterUtils.convertDate(timestamp); + Assertions.assertEquals("2021-01-01T08:01:01Z", actual); + } + + @Test + @Disabled + public void convertNumber() { + Number number = 100_000; + + String actual = DebeziumConverterUtils.convertDate(number); + Assertions.assertEquals("1970-01-01T03:01:40Z", actual); + } + + @Test + public void convertStringDateFormat() { + String stringValue = "2021-01-01T00:00:00Z"; + + String actual = DebeziumConverterUtils.convertDate(stringValue); + Assertions.assertEquals("2021-01-01T00:00:00Z", actual); + } + +} diff --git a/airbyte-integrations/connectors/source-postgres-strict-encrypt/Dockerfile b/airbyte-integrations/connectors/source-postgres-strict-encrypt/Dockerfile index ae28a1f58fdb6..183fd4031a844 100644 --- a/airbyte-integrations/connectors/source-postgres-strict-encrypt/Dockerfile +++ b/airbyte-integrations/connectors/source-postgres-strict-encrypt/Dockerfile @@ -8,5 +8,5 @@ COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar RUN tar xf ${APPLICATION}.tar --strip-components=1 -LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.version=0.1.1 LABEL io.airbyte.name=airbyte/source-postgres-strict-encrypt diff --git a/airbyte-integrations/connectors/source-postgres/Dockerfile b/airbyte-integrations/connectors/source-postgres/Dockerfile index e671b877ce94f..7702449dd508f 100644 --- a/airbyte-integrations/connectors/source-postgres/Dockerfile +++ b/airbyte-integrations/connectors/source-postgres/Dockerfile @@ -8,5 +8,5 @@ COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar RUN tar xf ${APPLICATION}.tar --strip-components=1 -LABEL io.airbyte.version=0.3.11 +LABEL io.airbyte.version=0.3.13 LABEL io.airbyte.name=airbyte/source-postgres diff --git a/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresCdcProperties.java b/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresCdcProperties.java index eeb22b57b8a0c..8cda9da3468a7 100644 --- a/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresCdcProperties.java +++ b/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresCdcProperties.java @@ -20,6 +20,9 @@ static Properties getDebeziumProperties(final JsonNode config) { props.setProperty("publication.autocreate.mode", "disabled"); + props.setProperty("converters", "datetime"); + props.setProperty("datetime.type", "io.airbyte.integrations.debezium.internals.PostgresConverter"); + return props; } diff --git a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcPostgresSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcPostgresSourceAcceptanceTest.java index c05ac40e69f5b..62124ae28e375 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcPostgresSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcPostgresSourceAcceptanceTest.java @@ -70,6 +70,7 @@ protected void setupEnvironment(final TestDestinationEnv environment) throws Exc .put("username", container.getUsername()) .put("password", container.getPassword()) .put("replication_method", replicationMethod) + .put("ssl", false) .build()); final Database database = Databases.createDatabase( diff --git a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcPostgresSourceDatatypeTest.java b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcPostgresSourceDatatypeTest.java index f1bbfd347bcb7..38f6ae3e47627 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcPostgresSourceDatatypeTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcPostgresSourceDatatypeTest.java @@ -17,10 +17,6 @@ import org.testcontainers.containers.PostgreSQLContainer; import org.testcontainers.utility.MountableFile; -/** - * None of the tests in this class use the cdc path (run the tests and search for `using CDC: false` - * in logs). This is exact same as {@link PostgresSourceAcceptanceTest} - */ public class CdcPostgresSourceDatatypeTest extends AbstractSourceDatabaseTypeTest { private static final String SLOT_NAME_BASE = "debezium_slot"; @@ -54,6 +50,7 @@ protected Database setupDatabase() throws Exception { .put("username", container.getUsername()) .put("password", container.getPassword()) .put("replication_method", replicationMethod) + .put("ssl", false) .build()); final Database database = Databases.createDatabase( @@ -138,25 +135,23 @@ protected void initTests() { .addExpectedValues("1", "32767", "0", "-32767") .build()); - // BUG https://github.com/airbytehq/airbyte/issues/3932 - // BIT type is currently parsed as a Boolean which is incorrect - // addDataTypeTestData( - // TestDataHolder.builder() - // .sourceType("bit") - // .fullSourceDataType("BIT(3)") - // .airbyteType(JsonSchemaPrimitive.NUMBER) - // .addInsertValues("B'101'") - // //.addExpectedValues("101") - // - .build()); + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("bit") + .fullSourceDataType("BIT(3)") + .airbyteType(JsonSchemaPrimitive.NUMBER) + .addInsertValues("B'101'", "B'111'", "null") + .addExpectedValues("101", "111", null) + .build()); - // addDataTypeTestData( - // TestDataHolder.builder() - // .sourceType("bit_varying") - // .fullSourceDataType("BIT VARYING(5)") - // .airbyteType(JsonSchemaPrimitive.NUMBER) - // .addInsertValues("B'101'", "null") - // .addExpectedValues("101", null) - // .build()); + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("bit_varying") + .fullSourceDataType("BIT VARYING(5)") + .airbyteType(JsonSchemaPrimitive.NUMBER) + .addInsertValues("B'101'", "null") + .addExpectedValues("101", null) + .build()); addDataTypeTestData( TestDataHolder.builder() @@ -222,36 +217,28 @@ protected void initTests() { "128.1.0.0/16", "2001:4f8:3:ba::/64") .build()); - // JdbcUtils-> DATE_FORMAT is set as ""yyyy-MM-dd'T'HH:mm:ss'Z'"" so it doesnt suppose to handle BC - // dates - // addDataTypeTestData( - // TestDataHolder.builder() - // .sourceType("date") - // .airbyteType(JsonSchemaPrimitive.STRING) - // .addInsertValues("'1999-01-08'", "null") // "'199-10-10 BC'" - // .addExpectedValues("1999-01-08T00:00:00Z", null) // , "199-10-10 BC") - // .build()); + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("date") + .airbyteType(JsonSchemaPrimitive.STRING) + .addInsertValues("'January 7, 1999'", "'1999-01-08'", "'1/9/1999'", "'January 10, 99 BC'", "'January 11, 99 AD'", "null") + .addExpectedValues("1999-01-07", "1999-01-08", "1999-01-09", "0099-01-10", "1999-01-11", null) + .build()); - // Values "'-Infinity'", "'Infinity'", "'Nan'" will not be parsed due to: - // JdbcUtils -> setJsonField contains: - // case FLOAT, DOUBLE -> o.put(columnName, nullIfInvalid(() -> r.getDouble(i), Double::isFinite)); addDataTypeTestData( TestDataHolder.builder() .sourceType("float8") .airbyteType(JsonSchemaPrimitive.NUMBER) - .addInsertValues("'123'", "'1234567890.1234567'", "null") - .addExpectedValues("123.0", "1.2345678901234567E9", null) + .addInsertValues("'123'", "'1234567890.1234567'", "'-Infinity'", "'Infinity'", "'NaN'", "null") + .addExpectedValues("123.0", "1.2345678901234567E9", "-Infinity", "Infinity", "NaN", null) .build()); - // Values "'-Infinity'", "'Infinity'", "'Nan'" will not be parsed due to: - // JdbcUtils -> setJsonField contains: - // case FLOAT, DOUBLE -> o.put(columnName, nullIfInvalid(() -> r.getDouble(i), Double::isFinite)); addDataTypeTestData( TestDataHolder.builder() .sourceType("float") .airbyteType(JsonSchemaPrimitive.NUMBER) - .addInsertValues("'123'", "'1234567890.1234567'", "null") - .addExpectedValues("123.0", "1.2345678901234567E9", null) + .addInsertValues("'123'", "'1234567890.1234567'", "'-Infinity'", "'Infinity'", "'NaN'", "null") + .addExpectedValues("123.0", "1.2345678901234567E9", "-Infinity", "Infinity", "NaN", null) .build()); addDataTypeTestData( @@ -270,13 +257,15 @@ protected void initTests() { .addExpectedValues(null, "-2147483648", "2147483647") .build()); - // addDataTypeTestData( - // TestDataHolder.builder() - // .sourceType("interval") - // .airbyteType(JsonSchemaPrimitive.STRING) - // .addInsertValues("null", "'P1Y2M3DT4H5M6S'", "'-178000000'", "'178000000'") - // .addExpectedValues(null, "1 year 2 mons 3 days 04:05:06", "-49444:26:40", "49444:26:40") - // .build()); + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("interval") + .airbyteType(JsonSchemaPrimitive.STRING) + .addInsertValues("null", "'P1Y2M3DT4H5M6S'", "'PT4H5M6S'", "'-300'", "'-178000000'", + "'178000000'", "'1-2'", "'3 4:05:06'", "'P0002-02-03T04:05:06'") + .addExpectedValues(null, "1 year 2 mons 3 days 04:05:06", "04:05:06", "-00:05:00", "-49444:26:40", + "49444:26:40", "1 year 2 mons 00:00:00", "3 days 04:05:06", "2 year 2 mons 3 days 04:05:06") + .build()); addDataTypeTestData( TestDataHolder.builder() @@ -313,39 +302,35 @@ protected void initTests() { "08:00:2b:01:02:03:04:07") .build()); - // The Money type fails when amount is > 1,000. in JdbcUtils-> rowToJson as r.getObject(i); - // Bad value for type double : 1,000.01 - // The reason is that in jdbc implementation money type is tried to get as Double (jdbc - // implementation) - // Max values for Money type: "-92233720368547758.08", "92233720368547758.07" - // addDataTypeTestData( - // TestDataHolder.builder() - // .sourceType("money") - // .airbyteType(JsonSchemaPrimitive.STRING) - // .addInsertValues("null", "'999.99'") - // .addExpectedValues(null, "999.99") - // .build()); + // Max values for Money type should be: "-92233720368547758.08", "92233720368547758.07", + // debezium return rounded value for values more than 999999999999999 and less than + // -999999999999999, + // we map these value as null; + // opened issue https://github.com/airbytehq/airbyte/issues/7338 + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("money") + .airbyteType(JsonSchemaPrimitive.STRING) + .addInsertValues("null", "'999.99'", "'1,000.01'", "'-999999999999.99'", "'-999999999999999'", "'999999999999.99'", "'999999999999999'", + "'-92233720368547758.08'", "'92233720368547758.07'") + .addExpectedValues(null, "999.99", "1000.01", "-999999999999.99", "-999999999999999", "999999999999.99", "999999999999999", + null, null) + .build()); - // The numeric type in Postres may contain 'Nan' type, but in JdbcUtils-> rowToJson - // we try to map it like this, so it fails - // case NUMERIC, DECIMAL -> o.put(columnName, nullIfInvalid(() -> r.getBigDecimal(i))); addDataTypeTestData( TestDataHolder.builder() .sourceType("numeric") .airbyteType(JsonSchemaPrimitive.NUMBER) - .addInsertValues("'99999'", "null") - .addExpectedValues("99999", null) + .addInsertValues("'99999'", "'NAN'", null) + .addExpectedValues("99999", "NAN", null) .build()); - // The numeric type in Postres may contain 'Nan' type, but in JdbcUtils-> rowToJson - // we try to map it like this, so it fails - // case NUMERIC, DECIMAL -> o.put(columnName, nullIfInvalid(() -> r.getBigDecimal(i))); addDataTypeTestData( TestDataHolder.builder() .sourceType("decimal") .airbyteType(JsonSchemaPrimitive.NUMBER) - .addInsertValues("99999", "5.1", "0", "null") - .addExpectedValues("99999", "5.1", "0", null) + .addInsertValues("99999", "5.1", "0", "'NAN'", "null") + .addExpectedValues("99999", "5.1", "0", "NAN", null) .build()); addDataTypeTestData( @@ -353,8 +338,8 @@ protected void initTests() { .sourceType("numeric") .fullSourceDataType("numeric(13,4)") .airbyteType(JsonSchemaPrimitive.NUMBER) - .addInsertValues("0.1880", "10.0000", "5213.3468", "null") - .addExpectedValues("0.1880", "10.0000", "5213.3468", null) + .addInsertValues("0.1880", "10.0000", "5213.3468", "'NAN'", "null") + .addExpectedValues("0.1880", "10.0000", "5213.3468", "NAN", null) .build()); addDataTypeTestData( @@ -374,51 +359,45 @@ protected void initTests() { .addExpectedValues("a", "abc", "Миші йдуть;", "櫻花分店", "", null, "\\xF0\\x9F\\x9A\\x80") .build()); - // JdbcUtils-> DATE_FORMAT is set as ""yyyy-MM-dd'T'HH:mm:ss'Z'"" for both Date and Time types. - // So Time only (04:05:06) would be represented like "1970-01-01T04:05:06Z" which is incorrect addDataTypeTestData( TestDataHolder.builder() .sourceType("time") .airbyteType(JsonSchemaPrimitive.STRING) - .addInsertValues("null") - .addNullExpectedValue() + .addInsertValues("null", "'04:05:06'", "'2021-04-12 05:06:07'", "'04:05 PM'") + .addExpectedValues(null, "04:05:06", "05:06:07", "16:05:00") .build()); - // JdbcUtils-> DATE_FORMAT is set as ""yyyy-MM-dd'T'HH:mm:ss'Z'"" for both Date and Time types. - // So Time only (04:05:06) would be represented like "1970-01-01T04:05:06Z" which is incorrect addDataTypeTestData( TestDataHolder.builder() .sourceType("timetz") .airbyteType(JsonSchemaPrimitive.STRING) - .addInsertValues("null") - .addNullExpectedValue() - .build()); - - // addDataTypeTestData( - // TestDataHolder.builder() - // .sourceType("timestamp") - // .airbyteType(JsonSchemaPrimitive.STRING) - // .addInsertValues("TIMESTAMP '2004-10-19 10:23:54'", "null") - // .addExpectedValues("2004-10-19T10:23:54Z", null) - // .build()); - - // May be run locally, but correct the timezone aacording to your location - // addDataTypeTestData( - // TestDataHolder.builder() - // .sourceType("timestamptz") - // .airbyteType(JsonSchemaPrimitive.STRING) - // .addInsertValues("TIMESTAMP '2004-10-19 10:23:54+02'", "null") - // .addExpectedValues("2004-10-19T07:23:54Z", null) - // .build()); - - // addDataTypeTestData( - // TestDataHolder.builder() - // .sourceType("tsvector") - // .airbyteType(JsonSchemaPrimitive.STRING) - // .addInsertValues("to_tsvector('The quick brown fox jumped over the lazy dog.')") - // .addExpectedValues( - // "'brown':3 'dog':9 'fox':4 'jumped':5 'lazy':8 'over':6 'quick':2 'the':1,7") - // .build()); + .addInsertValues("null", "'04:05:06+03'", "'2021-04-12 05:06:07+00'", "'060708-03'") + .addExpectedValues(null, "04:05:06+03", "05:06:07+00", "06:07:08-03") + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("timestamp") + .airbyteType(JsonSchemaPrimitive.STRING) + .addInsertValues("TIMESTAMP '2004-10-19 10:23:54'", "null") + .addExpectedValues("2004-10-19T10:23:54Z", null) + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("timestamptz") + .airbyteType(JsonSchemaPrimitive.STRING) + .addInsertValues("TIMESTAMP WITH TIME ZONE '2004-10-19 10:23:54+03'", "null") + .addExpectedValues("2004-10-19T07:23:54Z", null) + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("tsvector") + .airbyteType(JsonSchemaPrimitive.STRING) + .addInsertValues("to_tsvector('The quick brown fox jumped over the lazy dog.')") + .addExpectedValues("'brown':3 'dog':9 'fox':4 'jumped':5 'lazy':8 'over':6 'quick':2 'the':1,7") + .build()); addDataTypeTestData( TestDataHolder.builder() @@ -456,13 +435,13 @@ protected void initTests() { .addExpectedValues("[\"10000\",\"10000\",\"10000\",\"10000\"]", null) .build()); - // addDataTypeTestData( - // TestDataHolder.builder() - // .sourceType("inventory_item") - // .airbyteType(JsonSchemaPrimitive.STRING) - // .addInsertValues("ROW('fuzzy dice', 42, 1.99)", "null") - // .addExpectedValues("(\"fuzzy dice\",42,1.99)", null) - // .build()); + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("inventory_item") + .airbyteType(JsonSchemaPrimitive.STRING) + .addInsertValues("ROW('fuzzy dice', 42, 1.99)", "null") + .addExpectedValues("(\"fuzzy dice\",42,1.99)", null) + .build()); addDataTypeTestData( TestDataHolder.builder() @@ -472,62 +451,62 @@ protected void initTests() { .addExpectedValues("(\"2010-01-01 14:30:00\",\"2010-01-01 15:30:00\")", null) .build()); - // addDataTypeTestData( - // TestDataHolder.builder() - // .sourceType("box") - // .airbyteType(JsonSchemaPrimitive.STRING) - // .addInsertValues("'((3,7),(15,18))'", "'((0,0),(0,0))'", "null") - // .addExpectedValues("(15,18),(3,7)", "(0,0),(0,0)", null) - // .build()); - - // addDataTypeTestData( - // TestDataHolder.builder() - // .sourceType("circle") - // .airbyteType(JsonSchemaPrimitive.STRING) - // .addInsertValues("'(5,7),10'", "'(0,0),0'", "'(-10,-4),10'", "null") - // .addExpectedValues("<(5,7),10>", "<(0,0),0>", "<(-10,-4),10>", null) - // .build()); - - // addDataTypeTestData( - // TestDataHolder.builder() - // .sourceType("line") - // .airbyteType(JsonSchemaPrimitive.STRING) - // .addInsertValues("'{4,5,6}'", "'{0,1,0}'", "null") - // .addExpectedValues("{4,5,6}", "{0,1,0}", null) - // .build()); - - // addDataTypeTestData( - // TestDataHolder.builder() - // .sourceType("lseg") - // .airbyteType(JsonSchemaPrimitive.STRING) - // .addInsertValues("'((3,7),(15,18))'", "'((0,0),(0,0))'", "null") - // .addExpectedValues("[(3,7),(15,18)]", "[(0,0),(0,0)]", null) - // .build()); - - // addDataTypeTestData( - // TestDataHolder.builder() - // .sourceType("path") - // .airbyteType(JsonSchemaPrimitive.STRING) - // .addInsertValues("'((3,7),(15,18))'", "'((0,0),(0,0))'", "null") - // .addExpectedValues("((3,7),(15,18))", "((0,0),(0,0))", null) - // .build()); - - // addDataTypeTestData( - // TestDataHolder.builder() - // .sourceType("point") - // .airbyteType(JsonSchemaPrimitive.NUMBER) - // .addInsertValues("'(3,7)'", "'(0,0)'", "'(999999999999999999999999,0)'", "null") - // .addExpectedValues("(3,7)", "(0,0)", "(1e+24,0)", null) - // .build()); - - // addDataTypeTestData( - // TestDataHolder.builder() - // .sourceType("polygon") - // .airbyteType(JsonSchemaPrimitive.STRING) - // .addInsertValues("'((3,7),(15,18))'", "'((0,0),(0,0))'", - // "'((0,0),(999999999999999999999999,0))'", "null") - // .addExpectedValues("((3,7),(15,18))", "((0,0),(0,0))", "((0,0),(1e+24,0))", null) - // .build()); + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("box") + .airbyteType(JsonSchemaPrimitive.STRING) + .addInsertValues("'((3,7),(15,18))'", "'((0,0),(0,0))'", "null") + .addExpectedValues("(15.0,18.0),(3.0,7.0)", "(0.0,0.0),(0.0,0.0)", null) + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("circle") + .airbyteType(JsonSchemaPrimitive.STRING) + .addInsertValues("'(5,7),10'", "'(0,0),0'", "'(-10,-4),10'", "null") + .addExpectedValues("<(5.0,7.0),10.0>", "<(0.0,0.0),0.0>", "<(-10.0,-4.0),10.0>", null) + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("line") + .airbyteType(JsonSchemaPrimitive.STRING) + .addInsertValues("'{4,5,6}'", "'{0,1,0}'", "null") + .addExpectedValues("{4.0,5.0,6.0}", "{0.0,1.0,0.0}", null) + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("lseg") + .airbyteType(JsonSchemaPrimitive.STRING) + .addInsertValues("'((3,7),(15,18))'", "'((0,0),(0,0))'", "null") + .addExpectedValues("[(3.0,7.0),(15.0,18.0)]", "[(0.0,0.0),(0.0,0.0)]", null) + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("path") + .airbyteType(JsonSchemaPrimitive.STRING) + .addInsertValues("'((3,7),(15.5,18.2))'", "'((0,0),(0,0))'", "null") + .addExpectedValues("((3.0,7.0),(15.5,18.2))", "((0.0,0.0),(0.0,0.0))", null) + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("point") + .airbyteType(JsonSchemaPrimitive.NUMBER) + .addInsertValues("'(3,7)'", "'(0,0)'", "'(999999999999999999999999,0)'", "null") + .addExpectedValues("(3.0,7.0)", "(0.0,0.0)", "(1.0E24,0.0)", null) + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("polygon") + .airbyteType(JsonSchemaPrimitive.STRING) + .addInsertValues("'((3,7),(15,18))'", "'((0,0),(0,0))'", + "'((0,0),(999999999999999999999999,0))'", "null") + .addExpectedValues("((3.0,7.0),(15.0,18.0))", "((0.0,0.0),(0.0,0.0))", "((0.0,0.0),(1.0E24,0.0))", null) + .build()); } } diff --git a/docs/integrations/sources/postgres.md b/docs/integrations/sources/postgres.md index ec56709bf8c19..c0733190dad75 100644 --- a/docs/integrations/sources/postgres.md +++ b/docs/integrations/sources/postgres.md @@ -235,7 +235,7 @@ Postgres data types are mapped to the following data types when synchronizing da | `lseg` | string | | | `macaddr` | string | | | `macaddr8` | string | | -| `money` | string | | +| `money` | string | When running logical replication (CDC), `money` values larger than 999999999999999 (15 nines) or smaller than -999999999999999 (15 nines) are transmitted as null; | | `mood` | string | | | `numeric` | number | | | `path` | string | | @@ -263,6 +263,7 @@ Postgres data types are mapped to the following data types when synchronizing da | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.3.13 | 2021-10-26 | [7339](https://github.com/airbytehq/airbyte/pull/7339) | Support or improve support for Interval, Money, Date, various geometric data types, inventory_items, and others | | 0.3.12 | 2021-09-30 | [6585](https://github.com/airbytehq/airbyte/pull/6585) | Improved SSH Tunnel key generation steps | | 0.3.11 | 2021-09-02 | [5742](https://github.com/airbytehq/airbyte/pull/5742) | Add SSH Tunnel support | | 0.3.9 | 2021-08-17 | [5304](https://github.com/airbytehq/airbyte/pull/5304) | Fix CDC OOM issue | From b1f5c23a6ffb11572a0b141f6aa93cbebc6c8a4a Mon Sep 17 00:00:00 2001 From: Jared Rhizor Date: Tue, 2 Nov 2021 09:45:54 -0700 Subject: [PATCH 06/83] use ADD to reduce docker image sizes (#7537) * use ADD to reduce docker image sizes * switch to full paths --- .bumpversion.cfg | 6 ++++++ airbyte-scheduler/app/Dockerfile | 6 ++---- airbyte-server/Dockerfile | 6 ++---- airbyte-workers/Dockerfile | 8 +++----- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 268c4c2cba692..ebf0c22c63ae2 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -10,6 +10,12 @@ serialize = [bumpversion:file:.env] +[bumpversion:file:airbyte-server/Dockerfile] + +[bumpversion:file:airbyte-workers/Dockerfile] + +[bumpversion:file:airbyte-scheduler/app/Dockerfile] + [bumpversion:file:airbyte-webapp/package.json] [bumpversion:file:airbyte-webapp/package-lock.json] diff --git a/airbyte-scheduler/app/Dockerfile b/airbyte-scheduler/app/Dockerfile index 8984019c79a17..26eee4e99bc45 100644 --- a/airbyte-scheduler/app/Dockerfile +++ b/airbyte-scheduler/app/Dockerfile @@ -5,9 +5,7 @@ ENV APPLICATION airbyte-scheduler WORKDIR /app -COPY build/distributions/${APPLICATION}-0*.tar ${APPLICATION}.tar - -RUN tar xf ${APPLICATION}.tar --strip-components=1 +ADD build/distributions/${APPLICATION}-0.30.23-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.23-alpha/bin/${APPLICATION}"] diff --git a/airbyte-server/Dockerfile b/airbyte-server/Dockerfile index 57d0dbd262cbc..4c56d37844989 100644 --- a/airbyte-server/Dockerfile +++ b/airbyte-server/Dockerfile @@ -7,9 +7,7 @@ ENV APPLICATION airbyte-server WORKDIR /app -COPY build/distributions/${APPLICATION}-0*.tar ${APPLICATION}.tar - -RUN tar xf ${APPLICATION}.tar --strip-components=1 +ADD build/distributions/${APPLICATION}-0.30.23-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.23-alpha/bin/${APPLICATION}"] diff --git a/airbyte-workers/Dockerfile b/airbyte-workers/Dockerfile index 513bfc194030d..397393b3d885c 100644 --- a/airbyte-workers/Dockerfile +++ b/airbyte-workers/Dockerfile @@ -27,10 +27,8 @@ RUN curl -LO https://storage.googleapis.com/kubernetes-release/release/v1.17.14/ RUN chmod +x ./kubectl RUN mv ./kubectl /usr/local/bin -# Move and run worker -COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar - -RUN tar xf ${APPLICATION}.tar --strip-components=1 +# Move worker app +ADD build/distributions/${APPLICATION}-0.30.23-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.23-alpha/bin/${APPLICATION}"] From c6a2a564aa50d7fadb7f760bf6a2c280205c5fa3 Mon Sep 17 00:00:00 2001 From: Achmad Syarif Hidayatullah <30652154+asyarif93@users.noreply.github.com> Date: Wed, 3 Nov 2021 01:27:33 +0700 Subject: [PATCH 07/83] =?UTF-8?q?=F0=9F=90=9B=20Source=20Zendesk=20Support?= =?UTF-8?q?:=20fix=20initially=5Fassigned=5Fat=20type=20in=20ticket=20metr?= =?UTF-8?q?ics=20(#7377)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(zendesk): change initially_assigned_at format * docs(zendesk): add change log * build(zendesk): update connector definition --- .../79c1aa37-dae3-42ae-b333-d1c105477715.json | 2 +- .../init/src/main/resources/seed/source_definitions.yaml | 2 +- .../connectors/source-zendesk-support/Dockerfile | 2 +- .../source_zendesk_support/schemas/ticket_metrics.json | 2 +- docs/integrations/sources/zendesk-support.md | 1 + 5 files changed, 5 insertions(+), 4 deletions(-) diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/79c1aa37-dae3-42ae-b333-d1c105477715.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/79c1aa37-dae3-42ae-b333-d1c105477715.json index 6a3aecdaa2153..04bc61ae14ef1 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/79c1aa37-dae3-42ae-b333-d1c105477715.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/79c1aa37-dae3-42ae-b333-d1c105477715.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "79c1aa37-dae3-42ae-b333-d1c105477715", "name": "Zendesk Support", "dockerRepository": "airbyte/source-zendesk-support", - "dockerImageTag": "0.1.3", + "dockerImageTag": "0.1.4", "documentationUrl": "https://docs.airbyte.io/integrations/sources/zendesk-support", "icon": "zendesk.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 9915573cc9337..ae98307cfdfe4 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -586,7 +586,7 @@ - name: Zendesk Support sourceDefinitionId: 79c1aa37-dae3-42ae-b333-d1c105477715 dockerRepository: airbyte/source-zendesk-support - dockerImageTag: 0.1.3 + dockerImageTag: 0.1.4 documentationUrl: https://docs.airbyte.io/integrations/sources/zendesk-support icon: zendesk.svg sourceType: api diff --git a/airbyte-integrations/connectors/source-zendesk-support/Dockerfile b/airbyte-integrations/connectors/source-zendesk-support/Dockerfile index f44e3e602d74d..42475d2905f52 100644 --- a/airbyte-integrations/connectors/source-zendesk-support/Dockerfile +++ b/airbyte-integrations/connectors/source-zendesk-support/Dockerfile @@ -25,5 +25,5 @@ COPY source_zendesk_support ./source_zendesk_support ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.3 +LABEL io.airbyte.version=0.1.4 LABEL io.airbyte.name=airbyte/source-zendesk-support diff --git a/airbyte-integrations/connectors/source-zendesk-support/source_zendesk_support/schemas/ticket_metrics.json b/airbyte-integrations/connectors/source-zendesk-support/source_zendesk_support/schemas/ticket_metrics.json index 454ab85dffc8a..a139c863d2b91 100644 --- a/airbyte-integrations/connectors/source-zendesk-support/source_zendesk_support/schemas/ticket_metrics.json +++ b/airbyte-integrations/connectors/source-zendesk-support/source_zendesk_support/schemas/ticket_metrics.json @@ -132,7 +132,7 @@ }, "initially_assigned_at": { "type": ["null", "string"], - "format": "datetime" + "format": "date-time" }, "assigned_at": { "type": ["null", "string"], diff --git a/docs/integrations/sources/zendesk-support.md b/docs/integrations/sources/zendesk-support.md index 598e77acb1321..69bcfba6c75c2 100644 --- a/docs/integrations/sources/zendesk-support.md +++ b/docs/integrations/sources/zendesk-support.md @@ -97,6 +97,7 @@ We recommend creating a restricted, read-only key specifically for Airbyte acces | Version | Date | Pull Request | Subject | | :------ | :-------- | :----- | :------ | +| `0.1.4` | 2021-10-26 | [7377](https://github.com/airbytehq/airbyte/pull/7377) | fix initially_assigned_at type in ticket metrics | | `0.1.3` | 2021-10-17 | [7097](https://github.com/airbytehq/airbyte/pull/7097) | correction of spec file | | `0.1.2` | 2021-10-16 | [6513](https://github.com/airbytehq/airbyte/pull/6513) | fixed comments stream | | `0.1.1` | 2021-09-02 | [5787](https://github.com/airbytehq/airbyte/pull/5787) | fixed incremental logic for the ticket_comments stream | From d37e00e6a45d85a58aadd7a972728a528b007204 Mon Sep 17 00:00:00 2001 From: vitaliizazmic <75620293+vitaliizazmic@users.noreply.github.com> Date: Tue, 2 Nov 2021 22:17:00 +0200 Subject: [PATCH 08/83] =?UTF-8?q?=F0=9F=8E=89=20Source=20Google=20Director?= =?UTF-8?q?y:=20migrate=20to=20the=20CDK?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Source Google Directory #7415 - migrate to the CDK * Source Google Directory #5190 - fix timeout error * Source Google Directory #7415 - fix according to PR review * Source Google Directory #7415 - added etag and lastLoginTime to ignored fields for full refresh acceptance test * Source Google Directory #7415 - fix full refresh acceptance test config * Source Google Directory #7415 - bump version --- .../d19ae824-e289-4b14-995a-0632eb46d246.json | 2 +- .../resources/seed/source_definitions.yaml | 2 +- .../source-google-directory/.dockerignore | 2 +- .../source-google-directory/Dockerfile | 42 +++- .../acceptance-test-config.yml | 22 ++ .../acceptance-test-docker.sh | 16 ++ .../source-google-directory/build.gradle | 15 +- .../integration_tests/__init__.py | 3 + .../integration_tests/acceptance.py | 14 ++ .../integration_tests/configured_catalog.json | 34 +++ .../integration_tests/invalid_config.json | 4 + .../integration_tests/sample_config.json | 4 + .../{main_dev.py => main.py} | 2 +- .../source-google-directory/requirements.txt | 4 +- .../source-google-directory/setup.py | 32 +-- .../source_google_directory/api.py | 15 +- .../source_google_directory/client.py | 2 +- .../schemas/group_members.json | 10 +- .../schemas/groups.json | 16 +- .../schemas/users.json | 220 ++++++++---------- .../source_google_directory/source.py | 2 +- docs/integrations/sources/google-directory.md | 1 + 22 files changed, 275 insertions(+), 189 deletions(-) create mode 100644 airbyte-integrations/connectors/source-google-directory/acceptance-test-config.yml create mode 100644 airbyte-integrations/connectors/source-google-directory/acceptance-test-docker.sh create mode 100644 airbyte-integrations/connectors/source-google-directory/integration_tests/__init__.py create mode 100644 airbyte-integrations/connectors/source-google-directory/integration_tests/acceptance.py create mode 100644 airbyte-integrations/connectors/source-google-directory/integration_tests/configured_catalog.json create mode 100644 airbyte-integrations/connectors/source-google-directory/integration_tests/invalid_config.json create mode 100644 airbyte-integrations/connectors/source-google-directory/integration_tests/sample_config.json rename airbyte-integrations/connectors/source-google-directory/{main_dev.py => main.py} (84%) diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/d19ae824-e289-4b14-995a-0632eb46d246.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/d19ae824-e289-4b14-995a-0632eb46d246.json index 7e1649ea63744..c15dd270d14b0 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/d19ae824-e289-4b14-995a-0632eb46d246.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/d19ae824-e289-4b14-995a-0632eb46d246.json @@ -2,6 +2,6 @@ "sourceDefinitionId": "d19ae824-e289-4b14-995a-0632eb46d246", "name": "Google Directory", "dockerRepository": "airbyte/source-google-directory", - "dockerImageTag": "0.1.5", + "dockerImageTag": "0.1.6", "documentationUrl": "https://docs.airbyte.io/integrations/sources/google-directory" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index ae98307cfdfe4..53d51637789f9 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -192,7 +192,7 @@ - name: Google Directory sourceDefinitionId: d19ae824-e289-4b14-995a-0632eb46d246 dockerRepository: airbyte/source-google-directory - dockerImageTag: 0.1.5 + dockerImageTag: 0.1.6 documentationUrl: https://docs.airbyte.io/integrations/sources/google-directory sourceType: api - name: Google Search Console diff --git a/airbyte-integrations/connectors/source-google-directory/.dockerignore b/airbyte-integrations/connectors/source-google-directory/.dockerignore index 99638fb7b66de..543110c087f09 100644 --- a/airbyte-integrations/connectors/source-google-directory/.dockerignore +++ b/airbyte-integrations/connectors/source-google-directory/.dockerignore @@ -1,6 +1,6 @@ * !Dockerfile -!Dockerfile.test +!main.py !source_google_directory !setup.py !secrets diff --git a/airbyte-integrations/connectors/source-google-directory/Dockerfile b/airbyte-integrations/connectors/source-google-directory/Dockerfile index 65af5d8719f9a..995b550e28eba 100644 --- a/airbyte-integrations/connectors/source-google-directory/Dockerfile +++ b/airbyte-integrations/connectors/source-google-directory/Dockerfile @@ -1,18 +1,38 @@ -FROM airbyte/integration-base-python:0.1.1 +FROM python:3.7.11-alpine3.14 as base -# Bash is installed for more convenient debugging. -RUN apt-get update && apt-get install -y bash && rm -rf /var/lib/apt/lists/* +# build and load all requirements +FROM base as builder +WORKDIR /airbyte/integration_code + +# upgrade pip to the latest version +RUN apk --no-cache upgrade \ + && pip install --upgrade pip \ + && apk --no-cache add tzdata build-base -ENV CODE_PATH="source_google_directory" -ENV AIRBYTE_IMPL_MODULE="source_google_directory" -ENV AIRBYTE_IMPL_PATH="SourceGoogleDirectory" -WORKDIR /airbyte/integration_code -COPY $CODE_PATH ./$CODE_PATH COPY setup.py ./ -RUN pip install ".[main]" +# install necessary packages to a temporary folder +RUN pip install --prefix=/install . + +# build a clean environment +FROM base +WORKDIR /airbyte/integration_code + +# copy all loaded and built libraries to a pure basic image +COPY --from=builder /install /usr/local +# add default timezone settings +COPY --from=builder /usr/share/zoneinfo/Etc/UTC /etc/localtime +RUN echo "Etc/UTC" > /etc/timezone + +# bash is installed for more convenient debugging. +RUN apk --no-cache add bash + +# copy payload code only +COPY main.py ./ +COPY source_google_directory ./source_google_directory -ENV AIRBYTE_ENTRYPOINT "/airbyte/base.sh" +ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" +ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.5 +LABEL io.airbyte.version=0.1.6 LABEL io.airbyte.name=airbyte/source-google-directory diff --git a/airbyte-integrations/connectors/source-google-directory/acceptance-test-config.yml b/airbyte-integrations/connectors/source-google-directory/acceptance-test-config.yml new file mode 100644 index 0000000000000..fb8a23bcf7aeb --- /dev/null +++ b/airbyte-integrations/connectors/source-google-directory/acceptance-test-config.yml @@ -0,0 +1,22 @@ +# See [Source Acceptance Tests](https://docs.airbyte.io/connector-development/testing-connectors/source-acceptance-tests-reference) +# for more information about how to configure these tests +connector_image: airbyte/source-google-directory:dev +tests: + spec: + - spec_path: "source_google_directory/spec.json" + connection: + - config_path: "secrets/config.json" + status: "succeed" + - config_path: "integration_tests/invalid_config.json" + status: "failed" + discovery: + - config_path: "secrets/config.json" + basic_read: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" + full_refresh: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" + # API returns different lastLoginTime for some users, eteg is generated on all data and also same time different + ignored_fields: + "users": ["etag", "lastLoginTime"] diff --git a/airbyte-integrations/connectors/source-google-directory/acceptance-test-docker.sh b/airbyte-integrations/connectors/source-google-directory/acceptance-test-docker.sh new file mode 100644 index 0000000000000..e4d8b1cef8961 --- /dev/null +++ b/airbyte-integrations/connectors/source-google-directory/acceptance-test-docker.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env sh + +# Build latest connector image +docker build . -t $(cat acceptance-test-config.yml | grep "connector_image" | head -n 1 | cut -d: -f2) + +# Pull latest acctest image +docker pull airbyte/source-acceptance-test:latest + +# Run +docker run --rm -it \ + -v /var/run/docker.sock:/var/run/docker.sock \ + -v /tmp:/tmp \ + -v $(pwd):/test_input \ + airbyte/source-acceptance-test \ + --acceptance-test-config /test_input + diff --git a/airbyte-integrations/connectors/source-google-directory/build.gradle b/airbyte-integrations/connectors/source-google-directory/build.gradle index d5273dc9cd9b1..cc749b14afc54 100644 --- a/airbyte-integrations/connectors/source-google-directory/build.gradle +++ b/airbyte-integrations/connectors/source-google-directory/build.gradle @@ -1,22 +1,9 @@ plugins { id 'airbyte-python' id 'airbyte-docker' - id 'airbyte-standard-source-test-file' + id 'airbyte-source-acceptance-test' } airbytePython { moduleDirectory 'source_google_directory' } - -airbyteStandardSourceTestFile { - // For more information on standard source tests, see https://docs.airbyte.io/connector-development/testing-connectors - specPath = "source_google_directory/spec.json" - configPath = "secrets/config.json" - configuredCatalogPath = "sample_files/configured_catalog.json" -} - - -dependencies { - implementation files(project(':airbyte-integrations:bases:base-standard-source-test-file').airbyteDocker.outputs) - implementation files(project(':airbyte-integrations:bases:base-python').airbyteDocker.outputs) -} diff --git a/airbyte-integrations/connectors/source-google-directory/integration_tests/__init__.py b/airbyte-integrations/connectors/source-google-directory/integration_tests/__init__.py new file mode 100644 index 0000000000000..46b7376756ec6 --- /dev/null +++ b/airbyte-integrations/connectors/source-google-directory/integration_tests/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# diff --git a/airbyte-integrations/connectors/source-google-directory/integration_tests/acceptance.py b/airbyte-integrations/connectors/source-google-directory/integration_tests/acceptance.py new file mode 100644 index 0000000000000..108075487440f --- /dev/null +++ b/airbyte-integrations/connectors/source-google-directory/integration_tests/acceptance.py @@ -0,0 +1,14 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +import pytest + +pytest_plugins = ("source_acceptance_test.plugin",) + + +@pytest.fixture(scope="session", autouse=True) +def connector_setup(): + """ This fixture is a placeholder for external resources that acceptance test might require.""" + yield diff --git a/airbyte-integrations/connectors/source-google-directory/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/source-google-directory/integration_tests/configured_catalog.json new file mode 100644 index 0000000000000..d8a60dea13653 --- /dev/null +++ b/airbyte-integrations/connectors/source-google-directory/integration_tests/configured_catalog.json @@ -0,0 +1,34 @@ +{ + "streams": [ + { + "stream": { + "name": "users", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_cursor": false + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "groups", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_cursor": false + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "group_members", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_cursor": false + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + } + ] +} diff --git a/airbyte-integrations/connectors/source-google-directory/integration_tests/invalid_config.json b/airbyte-integrations/connectors/source-google-directory/integration_tests/invalid_config.json new file mode 100644 index 0000000000000..2135fa0a5cb24 --- /dev/null +++ b/airbyte-integrations/connectors/source-google-directory/integration_tests/invalid_config.json @@ -0,0 +1,4 @@ +{ + "credentials_json": "{}", + "email": "test@test.test" +} diff --git a/airbyte-integrations/connectors/source-google-directory/integration_tests/sample_config.json b/airbyte-integrations/connectors/source-google-directory/integration_tests/sample_config.json new file mode 100644 index 0000000000000..361b1de29e923 --- /dev/null +++ b/airbyte-integrations/connectors/source-google-directory/integration_tests/sample_config.json @@ -0,0 +1,4 @@ +{ + "credentials_json": "", + "email": "test@test.test" +} diff --git a/airbyte-integrations/connectors/source-google-directory/main_dev.py b/airbyte-integrations/connectors/source-google-directory/main.py similarity index 84% rename from airbyte-integrations/connectors/source-google-directory/main_dev.py rename to airbyte-integrations/connectors/source-google-directory/main.py index bd3852b046f60..b8f3ffa6715cb 100644 --- a/airbyte-integrations/connectors/source-google-directory/main_dev.py +++ b/airbyte-integrations/connectors/source-google-directory/main.py @@ -5,7 +5,7 @@ import sys -from base_python.entrypoint import launch +from airbyte_cdk.entrypoint import launch from source_google_directory import SourceGoogleDirectory if __name__ == "__main__": diff --git a/airbyte-integrations/connectors/source-google-directory/requirements.txt b/airbyte-integrations/connectors/source-google-directory/requirements.txt index 76af767f3755a..0411042aa0911 100644 --- a/airbyte-integrations/connectors/source-google-directory/requirements.txt +++ b/airbyte-integrations/connectors/source-google-directory/requirements.txt @@ -1,4 +1,2 @@ --e ../../bases/airbyte-protocol --e ../../bases/base-python --e ../../bases/base-python-test +-e ../../bases/source-acceptance-test -e . diff --git a/airbyte-integrations/connectors/source-google-directory/setup.py b/airbyte-integrations/connectors/source-google-directory/setup.py index b2b989d84ed13..31545b105515f 100644 --- a/airbyte-integrations/connectors/source-google-directory/setup.py +++ b/airbyte-integrations/connectors/source-google-directory/setup.py @@ -5,27 +5,29 @@ from setuptools import find_packages, setup +MAIN_REQUIREMENTS = [ + "airbyte-cdk~=0.1", + "google-api-python-client==1.12.8", + "google-auth-httplib2==0.0.4", + "google-auth-oauthlib==0.4.2", + "backoff==1.10.0", +] + +TEST_REQUIREMENTS = [ + "pytest~=6.1", + "pytest-mock~=3.6.1", + "source-acceptance-test", +] + setup( name="source_google_directory", description="Source implementation for Google Directory.", author="Airbyte", author_email="contact@airbyte.io", packages=find_packages(), - install_requires=[ - "airbyte-protocol", - "base-python", - "google-api-python-client==1.12.8", - "google-auth-httplib2==0.0.4", - "google-auth-oauthlib==0.4.2", - "backoff==1.10.0", - ], - package_data={"": ["*.json", "schemas/*.json"]}, - setup_requires=["pytest-runner"], - tests_require=["pytest"], + install_requires=MAIN_REQUIREMENTS, + package_data={"": ["*.json", "schemas/*.json", "schemas/shared/*.json"]}, extras_require={ - # Dependencies required by the main package but not integration tests should go in main. Deps required by - # integration tests but not the main package go in tests. Deps required by both should go in - # install_requires. - "tests": ["airbyte-python-test", "pytest"], + "tests": TEST_REQUIREMENTS, }, ) diff --git a/airbyte-integrations/connectors/source-google-directory/source_google_directory/api.py b/airbyte-integrations/connectors/source-google-directory/source_google_directory/api.py index 4eaee6953a818..8083eb3080445 100644 --- a/airbyte-integrations/connectors/source-google-directory/source_google_directory/api.py +++ b/airbyte-integrations/connectors/source-google-directory/source_google_directory/api.py @@ -10,7 +10,7 @@ import backoff from google.oauth2 import service_account -from googleapiclient.discovery import Resource, build +from googleapiclient.discovery import build from googleapiclient.errors import HttpError as GoogleApiHttpError from .utils import rate_limit_handling @@ -23,25 +23,26 @@ def __init__(self, credentials_json: str, email: str): self._creds = None self._credentials_json = credentials_json self._admin_email = email + self._service = None def _load_account_info(self) -> Dict: account_info = json.loads(self._credentials_json) return account_info - def _obtain_creds(self) -> service_account.Credentials: + def _obtain_creds(self): account_info = self._load_account_info() creds = service_account.Credentials.from_service_account_info(account_info, scopes=SCOPES) self._creds = creds.with_subject(self._admin_email) - def _construct_resource(self) -> Resource: + def _construct_resource(self): if not self._creds: self._obtain_creds() - service = build("admin", "directory_v1", credentials=self._creds) - return service + if not self._service: + self._service = build("admin", "directory_v1", credentials=self._creds) def _get_resource(self, name: str): - service = self._construct_resource() - return getattr(service, name) + self._construct_resource() + return getattr(self._service, name) @backoff.on_exception(backoff.expo, GoogleApiHttpError, max_tries=7, giveup=rate_limit_handling) def get(self, name: str, params: Dict = None) -> Dict: diff --git a/airbyte-integrations/connectors/source-google-directory/source_google_directory/client.py b/airbyte-integrations/connectors/source-google-directory/source_google_directory/client.py index 2e5faa7cb9317..32b48f2007053 100644 --- a/airbyte-integrations/connectors/source-google-directory/source_google_directory/client.py +++ b/airbyte-integrations/connectors/source-google-directory/source_google_directory/client.py @@ -5,7 +5,7 @@ from typing import Any, Mapping, Tuple -from base_python import BaseClient +from airbyte_cdk.sources.deprecated.client import BaseClient from .api import API, GroupMembersAPI, GroupsAPI, UsersAPI diff --git a/airbyte-integrations/connectors/source-google-directory/source_google_directory/schemas/group_members.json b/airbyte-integrations/connectors/source-google-directory/source_google_directory/schemas/group_members.json index bd52c904a89ea..1359c3cea555b 100644 --- a/airbyte-integrations/connectors/source-google-directory/source_google_directory/schemas/group_members.json +++ b/airbyte-integrations/connectors/source-google-directory/source_google_directory/schemas/group_members.json @@ -3,19 +3,19 @@ "type": "object", "properties": { "kind": { - "type": "string" + "type": ["null", "string"] }, "id": { - "type": "string" + "type": ["null", "string"] }, "email": { - "type": "string" + "type": ["null", "string"] }, "role": { - "type": "string" + "type": ["null", "string"] }, "type": { - "type": "string" + "type": ["null", "string"] } } } diff --git a/airbyte-integrations/connectors/source-google-directory/source_google_directory/schemas/groups.json b/airbyte-integrations/connectors/source-google-directory/source_google_directory/schemas/groups.json index cea499bdf2a28..09ef95af9dbed 100644 --- a/airbyte-integrations/connectors/source-google-directory/source_google_directory/schemas/groups.json +++ b/airbyte-integrations/connectors/source-google-directory/source_google_directory/schemas/groups.json @@ -3,28 +3,28 @@ "type": "object", "properties": { "kind": { - "type": "string" + "type": ["null", "string"] }, "id": { - "type": "string" + "type": ["null", "string"] }, "etag": { - "type": "string" + "type": ["null", "string"] }, "email": { - "type": "string" + "type": ["null", "string"] }, "name": { - "type": "string" + "type": ["null", "string"] }, "directMembersCount": { - "type": "string" + "type": ["null", "string"] }, "description": { - "type": "string" + "type": ["null", "string"] }, "adminCreated": { - "type": "boolean" + "type": ["null", "boolean"] } } } diff --git a/airbyte-integrations/connectors/source-google-directory/source_google_directory/schemas/users.json b/airbyte-integrations/connectors/source-google-directory/source_google_directory/schemas/users.json index a90e989c66032..3350edc42db65 100644 --- a/airbyte-integrations/connectors/source-google-directory/source_google_directory/schemas/users.json +++ b/airbyte-integrations/connectors/source-google-directory/source_google_directory/schemas/users.json @@ -3,189 +3,169 @@ "type": "object", "properties": { "kind": { - "type": "string" + "type": ["null", "string"] }, "id": { - "type": "string" + "type": ["null", "string"] }, "username": { - "type": "string" + "type": ["null", "string"] }, "name": { - "type": "object", + "type": ["null", "object"], "properties": { "givenName": { - "type": "string" + "type": ["null", "string"] }, "familyName": { - "type": "string" + "type": ["null", "string"] }, "fullName": { - "type": "string" + "type": ["null", "string"] } - }, - "required": ["givenName", "familyName", "fullName"] + } }, "isAdmin": { - "type": "boolean" + "type": ["null", "boolean"] }, "isDelegatedAdmin": { - "type": "boolean" + "type": ["null", "boolean"] }, "lastLoginTime": { - "type": "string" + "type": ["null", "string"] }, "creationTime": { - "type": "string" + "type": ["null", "string"] }, "agreedToTerms": { - "type": "boolean" + "type": ["null", "boolean"] }, "hashFunction": { - "type": "string" + "type": ["null", "string"] }, "suspended": { - "type": "boolean" + "type": ["null", "boolean"] }, "changePasswordAtNextLogin": { - "type": "boolean" + "type": ["null", "boolean"] }, "ipWhitelisted": { - "type": "boolean" + "type": ["null", "boolean"] }, "emails": { - "type": "array", - "items": [ - { - "type": "object", - "properties": { - "address": { - "type": "string" - }, - "type": { - "type": "string" - }, - "customType": { - "type": "string" - }, - "primary": { - "type": "boolean" - } + "type": ["null", "array"], + "items": { + "type": "object", + "properties": { + "address": { + "type": ["null", "string"] }, - "required": ["address", "type", "customType", "primary"] + "type": { + "type": ["null", "string"] + }, + "customType": { + "type": ["null", "string"] + }, + "primary": { + "type": ["null", "boolean"] + } } - ] + } }, "externalIds": { - "type": "array", - "items": [ - { - "type": "object", - "properties": { - "value": { - "type": "string" - }, - "type": { - "type": "string" - }, - "customType": { - "type": "string" - } + "type": ["null", "array"], + "items": { + "type": "object", + "properties": { + "value": { + "type": ["null", "string"] + }, + "type": { + "type": ["null", "string"] }, - "required": ["value", "type", "customType"] + "customType": { + "type": ["null", "string"] + } } - ] + } }, "relations": { - "type": "array", - "items": [ - { - "type": "object", - "properties": { - "value": { - "type": "string" - }, - "type": { - "type": "string" - }, - "customType": { - "type": "string" - } + "type": ["null", "array"], + "items": { + "type": "object", + "properties": { + "value": { + "type": ["null", "string"] }, - "required": ["value", "type", "customType"] + "type": { + "type": ["null", "string"] + }, + "customType": { + "type": ["null", "string"] + } } - ] + } }, "organizations": { - "type": "array", - "items": [ - { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "title": { - "type": "string" - }, - "primary": { - "type": "boolean" - }, - "customType": { - "type": "string" - }, - "description": { - "type": "string" - } + "type": ["null", "array"], + "items": { + "type": "object", + "properties": { + "name": { + "type": ["null", "string"] + }, + "title": { + "type": ["null", "string"] }, - "required": ["name", "title", "primary", "customType", "description"] + "primary": { + "type": ["null", "boolean"] + }, + "customType": { + "type": ["null", "string"] + }, + "description": { + "type": ["null", "string"] + } } - ] + } }, "phones": { - "type": "array", - "items": [ - { - "type": "object", - "properties": { - "value": { - "type": "string" - }, - "type": { - "type": "string" - } + "type": ["null", "array"], + "items": { + "type": "object", + "properties": { + "value": { + "type": ["null", "string"] }, - "required": ["value", "type"] + "type": { + "type": ["null", "string"] + } } - ] + } }, "aliases": { - "type": "array", - "items": [ - { - "type": "string" - } - ] + "type": ["null", "array"], + "items": { + "type": ["null", "string"] + } }, "nonEditableAliases": { - "type": "array", - "items": [ - { - "type": "string" - } - ] + "type": ["null", "array"], + "items": { + "type": ["null", "string"] + } }, "customerId": { - "type": "string" + "type": ["null", "string"] }, "orgUnitPath": { - "type": "string" + "type": ["null", "string"] }, "isMailboxSetup": { - "type": "boolean" + "type": ["null", "boolean"] }, "includeInGlobalAddressList": { - "type": "boolean" + "type": ["null", "boolean"] } } } diff --git a/airbyte-integrations/connectors/source-google-directory/source_google_directory/source.py b/airbyte-integrations/connectors/source-google-directory/source_google_directory/source.py index 13fbb18ed93d2..98762996cd345 100644 --- a/airbyte-integrations/connectors/source-google-directory/source_google_directory/source.py +++ b/airbyte-integrations/connectors/source-google-directory/source_google_directory/source.py @@ -3,7 +3,7 @@ # -from base_python import BaseSource +from airbyte_cdk.sources.deprecated.base_source import BaseSource from .client import Client diff --git a/docs/integrations/sources/google-directory.md b/docs/integrations/sources/google-directory.md index b82e6f06f2f34..bb52676d054df 100644 --- a/docs/integrations/sources/google-directory.md +++ b/docs/integrations/sources/google-directory.md @@ -58,5 +58,6 @@ You should now be ready to use the Google Directory connector in Airbyte. | Version | Date | Pull Request | Subject | | :------ | :-------- | :----- | :------ | +| 0.1.6 | 2021-11-02 | [7464](https://github.com/airbytehq/airbyte/pull/7464) | Migrate to the CDK | | 0.1.5 | 2021-10-20 | [6930](https://github.com/airbytehq/airbyte/pull/6930) | Fix crash when a group don't have members | | 0.1.4 | 2021-10-19 | [7167](https://github.com/airbytehq/airbyte/pull/7167) | Add organizations and phones to `users` schema | From 50112264c525e4a3ae0b2d3439ee38b25f43c421 Mon Sep 17 00:00:00 2001 From: Benoit Moriceau Date: Tue, 2 Nov 2021 14:15:11 -0700 Subject: [PATCH 09/83] Setup the right version for the docker container (#7570) --- airbyte-scheduler/app/Dockerfile | 4 ++-- airbyte-server/Dockerfile | 4 ++-- airbyte-workers/Dockerfile | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/airbyte-scheduler/app/Dockerfile b/airbyte-scheduler/app/Dockerfile index 26eee4e99bc45..5144d2136f9ca 100644 --- a/airbyte-scheduler/app/Dockerfile +++ b/airbyte-scheduler/app/Dockerfile @@ -5,7 +5,7 @@ ENV APPLICATION airbyte-scheduler WORKDIR /app -ADD build/distributions/${APPLICATION}-0.30.23-alpha.tar /app +ADD build/distributions/${APPLICATION}-0.30.25-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.23-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.25-alpha/bin/${APPLICATION}"] diff --git a/airbyte-server/Dockerfile b/airbyte-server/Dockerfile index 4c56d37844989..3ea4279ef3efb 100644 --- a/airbyte-server/Dockerfile +++ b/airbyte-server/Dockerfile @@ -7,7 +7,7 @@ ENV APPLICATION airbyte-server WORKDIR /app -ADD build/distributions/${APPLICATION}-0.30.23-alpha.tar /app +ADD build/distributions/${APPLICATION}-0.30.25-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.23-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.25-alpha/bin/${APPLICATION}"] diff --git a/airbyte-workers/Dockerfile b/airbyte-workers/Dockerfile index 397393b3d885c..69f7736edc243 100644 --- a/airbyte-workers/Dockerfile +++ b/airbyte-workers/Dockerfile @@ -28,7 +28,7 @@ RUN chmod +x ./kubectl RUN mv ./kubectl /usr/local/bin # Move worker app -ADD build/distributions/${APPLICATION}-0.30.23-alpha.tar /app +ADD build/distributions/${APPLICATION}-0.30.25-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.23-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.25-alpha/bin/${APPLICATION}"] From 6285d311f9ad7a9e6bb407deccabdc8d138d8d6b Mon Sep 17 00:00:00 2001 From: midavadim Date: Tue, 2 Nov 2021 23:24:10 +0200 Subject: [PATCH 10/83] :bug: source mixpanel: match API limitation of requests rate (#7439) * Added delay to for all streams, removed logic which increase reqs rate because it does not take into consideration actual number of requests made in previous and next streams. * Fixed argmument passing * Increased timeout for SAT * Increased timeout for SAT * bump version * bumped connector version, updated change log Co-authored-by: Marcos Marx --- .../12928b32-bf0a-4f1e-964f-07e12e37153a.json | 2 +- .../resources/seed/source_definitions.yaml | 2 +- .../connectors/source-mixpanel/Dockerfile | 2 +- .../acceptance-test-config.yml | 3 ++ .../source-mixpanel/source_mixpanel/source.py | 36 +++++++------------ docs/integrations/sources/mixpanel.md | 1 + 6 files changed, 20 insertions(+), 26 deletions(-) diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/12928b32-bf0a-4f1e-964f-07e12e37153a.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/12928b32-bf0a-4f1e-964f-07e12e37153a.json index cd73c479261a5..addc1ddf76170 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/12928b32-bf0a-4f1e-964f-07e12e37153a.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/12928b32-bf0a-4f1e-964f-07e12e37153a.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "12928b32-bf0a-4f1e-964f-07e12e37153a", "name": "Mixpanel", "dockerRepository": "airbyte/source-mixpanel", - "dockerImageTag": "0.1.1", + "dockerImageTag": "0.1.2", "documentationUrl": "https://docs.airbyte.io/integrations/sources/mixpanel", "icon": "mixpanel.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 53d51637789f9..11082ad6991b3 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -328,7 +328,7 @@ - name: Mixpanel sourceDefinitionId: 12928b32-bf0a-4f1e-964f-07e12e37153a dockerRepository: airbyte/source-mixpanel - dockerImageTag: 0.1.1 + dockerImageTag: 0.1.2 documentationUrl: https://docs.airbyte.io/integrations/sources/mixpanel icon: mixpanel.svg sourceType: api diff --git a/airbyte-integrations/connectors/source-mixpanel/Dockerfile b/airbyte-integrations/connectors/source-mixpanel/Dockerfile index d77882fdd02dd..6985afaf20690 100644 --- a/airbyte-integrations/connectors/source-mixpanel/Dockerfile +++ b/airbyte-integrations/connectors/source-mixpanel/Dockerfile @@ -12,5 +12,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.1 +LABEL io.airbyte.version=0.1.2 LABEL io.airbyte.name=airbyte/source-mixpanel diff --git a/airbyte-integrations/connectors/source-mixpanel/acceptance-test-config.yml b/airbyte-integrations/connectors/source-mixpanel/acceptance-test-config.yml index 974e60c5e2591..a198296243479 100644 --- a/airbyte-integrations/connectors/source-mixpanel/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-mixpanel/acceptance-test-config.yml @@ -14,9 +14,11 @@ tests: basic_read: - config_path: "secrets/config.json" configured_catalog_path: "integration_tests/configured_catalog.json" + timeout_seconds: 3600 full_refresh: - config_path: "secrets/config.json" configured_catalog_path: "integration_tests/configured_catalog.json" + timeout_seconds: 3600 incremental: # incremental streams Funnels, Revenue, Export # Funnels - fails because it has complex state, like {'funnel_idX': {'date': 'dateX'}} @@ -29,4 +31,5 @@ tests: cursor_paths: revenue: ["date"] export: ["date"] + timeout_seconds: 3600 diff --git a/airbyte-integrations/connectors/source-mixpanel/source_mixpanel/source.py b/airbyte-integrations/connectors/source-mixpanel/source_mixpanel/source.py index fc1ce3a78fa58..894301292adc7 100644 --- a/airbyte-integrations/connectors/source-mixpanel/source_mixpanel/source.py +++ b/airbyte-integrations/connectors/source-mixpanel/source_mixpanel/source.py @@ -27,13 +27,7 @@ class MixpanelStream(HttpStream, ABC): A maximum of 5 concurrent queries 400 queries per hour. - API Rate Limit Handler: - If total number of planned requests is lower than it is allowed per hour - then - reset reqs_per_hour_limit and send requests with small delay (1 reqs/sec) - because API endpoint accept requests bursts up to 3 reqs/sec - else - send requests with planned delay: 3600/reqs_per_hour_limit seconds + API Rate Limit Handler: after each request freeze for the time period: 3600/reqs_per_hour_limit seconds """ @property @@ -82,7 +76,7 @@ def _send_request(self, request: requests.PreparedRequest, request_kwargs: Mappi self.logger.error(f"Stream {self.name}: {e.response.status_code} {e.response.reason} - {error_message}") raise e - def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: + def process_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: json_response = response.json() if self.data_field is not None: data = json_response.get(self.data_field, []) @@ -94,6 +88,11 @@ def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapp for record in data: yield record + def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: + + # parse the whole response + yield from self.process_response(response, **kwargs) + # wait for X seconds to match API limitations time.sleep(3600 / self.reqs_per_hour_limit) @@ -190,10 +189,6 @@ def stream_slices( # add 1 additional day because date range is inclusive start_date = end_date + timedelta(days=1) - # reset reqs_per_hour_limit if we expect less requests (1 req per stream) than it is allowed by API reqs_per_hour_limit - if len(date_slices) < self.reqs_per_hour_limit: - self.reqs_per_hour_limit = 3600 # 1 query per sec - return date_slices def request_params( @@ -269,9 +264,6 @@ def stream_slices( for date_slice in date_slices: stream_slices.append({**funnel_slice, **date_slice}) - # reset reqs_per_hour_limit if we expect less requests (1 req per stream) than it is allowed by API reqs_per_hour_limit - if len(stream_slices) < self.reqs_per_hour_limit: - self.reqs_per_hour_limit = 3600 # queries per hour (1 query per sec) return stream_slices def request_params( @@ -288,7 +280,7 @@ def request_params( params["unit"] = "day" return params - def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: + def process_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: """ response.json() example: { @@ -368,7 +360,7 @@ class EngageSchema(MixpanelStream): def path(self, **kwargs) -> str: return "engage/properties" - def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: + def process_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: """ response.json() example: { @@ -444,7 +436,7 @@ def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, self._total = None return None - def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: + def process_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: """ { "page": 0 @@ -591,7 +583,7 @@ class Revenue(DateSlicesMixin, IncrementalMixpanelStream): def path(self, **kwargs) -> str: return "engage/revenue" - def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: + def process_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: """ response.json() example: { @@ -634,7 +626,7 @@ class ExportSchema(MixpanelStream): def path(self, **kwargs) -> str: return "events/properties/top" - def parse_response(self, response: requests.Response, **kwargs) -> Iterable[str]: + def process_response(self, response: requests.Response, **kwargs) -> Iterable[str]: """ response.json() example: { @@ -691,7 +683,7 @@ def url_base(self): def path(self, **kwargs) -> str: return "export" - def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: + def process_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: """Export API return response.text in JSONL format but each line is a valid JSON object Raw item example: { @@ -737,8 +729,6 @@ def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapp yield item - # wait for X seconds to meet API limitation - time.sleep(3600 / self.reqs_per_hour_limit) def get_json_schema(self) -> Mapping[str, Any]: """ diff --git a/docs/integrations/sources/mixpanel.md b/docs/integrations/sources/mixpanel.md index 4c3e3dc1ce984..50713de707e0c 100644 --- a/docs/integrations/sources/mixpanel.md +++ b/docs/integrations/sources/mixpanel.md @@ -56,6 +56,7 @@ Select the correct region \(EU or US\) for your Mixpanel project. See detail [he | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| `0.1.2` | 2021-11-02 | [7439](https://github.com/airbytehq/airbyte/issues/7439) | Added delay for all streams to match API limitation of requests rate | | `0.1.1` | 2021-09-16 | [6075](https://github.com/airbytehq/airbyte/issues/6075) | Added option to select project region | | `0.1.0` | 2021-07-06 | [3698](https://github.com/airbytehq/airbyte/issues/3698) | created CDK native mixpanel connector | From 24a38f70d1c2d7be04fb6e6b6619f1cf8938bebb Mon Sep 17 00:00:00 2001 From: Jared Rhizor Date: Tue, 2 Nov 2021 15:10:30 -0700 Subject: [PATCH 11/83] fix ssl flag used for postgres source in acceptance tests (#7572) * fix ssl flag used for postgres source in acceptance tests * reverse for gke --- .../java/io/airbyte/test/acceptance/AcceptanceTests.java | 1 + 1 file changed, 1 insertion(+) diff --git a/airbyte-tests/src/acceptanceTests/java/io/airbyte/test/acceptance/AcceptanceTests.java b/airbyte-tests/src/acceptanceTests/java/io/airbyte/test/acceptance/AcceptanceTests.java index 24c73947f793e..8f8584934a956 100644 --- a/airbyte-tests/src/acceptanceTests/java/io/airbyte/test/acceptance/AcceptanceTests.java +++ b/airbyte-tests/src/acceptanceTests/java/io/airbyte/test/acceptance/AcceptanceTests.java @@ -1102,6 +1102,7 @@ private Map localConfig(final PostgreSQLContainer psql, final bo dbConfig.put("port", psql.getFirstMappedPort()); dbConfig.put("database", psql.getDatabaseName()); dbConfig.put("username", psql.getUsername()); + dbConfig.put("ssl", false); if (withSchema) { dbConfig.put("schema", "public"); From c8454935287aea4a4ae21de63e070aa57e4a7bf4 Mon Sep 17 00:00:00 2001 From: Davin Chia Date: Wed, 3 Nov 2021 08:15:00 +0800 Subject: [PATCH 12/83] Configure Socat to only print fatal, error and warning message. (#7561) --- .../java/io/airbyte/workers/process/KubePodProcess.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodProcess.java b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodProcess.java index d24e781f2a490..ca2c7aba0fe53 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodProcess.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodProcess.java @@ -325,21 +325,21 @@ public KubePodProcess(final String processRunnerHost, final Container remoteStdin = new ContainerBuilder() .withName("remote-stdin") .withImage("alpine/socat:1.7.4.1-r1") - .withCommand("sh", "-c", "socat -d -d -d TCP-L:9001 STDOUT > " + STDIN_PIPE_FILE) + .withCommand("sh", "-c", "socat -d TCP-L:9001 STDOUT > " + STDIN_PIPE_FILE) .withVolumeMounts(pipeVolumeMount, terminationVolumeMount) .build(); final Container relayStdout = new ContainerBuilder() .withName("relay-stdout") .withImage("alpine/socat:1.7.4.1-r1") - .withCommand("sh", "-c", String.format("cat %s | socat -d -d -d - TCP:%s:%s", STDOUT_PIPE_FILE, processRunnerHost, stdoutLocalPort)) + .withCommand("sh", "-c", String.format("cat %s | socat -d - TCP:%s:%s", STDOUT_PIPE_FILE, processRunnerHost, stdoutLocalPort)) .withVolumeMounts(pipeVolumeMount, terminationVolumeMount) .build(); final Container relayStderr = new ContainerBuilder() .withName("relay-stderr") .withImage("alpine/socat:1.7.4.1-r1") - .withCommand("sh", "-c", String.format("cat %s | socat -d -d -d - TCP:%s:%s", STDERR_PIPE_FILE, processRunnerHost, stderrLocalPort)) + .withCommand("sh", "-c", String.format("cat %s | socat -d - TCP:%s:%s", STDERR_PIPE_FILE, processRunnerHost, stderrLocalPort)) .withVolumeMounts(pipeVolumeMount, terminationVolumeMount) .build(); From bf3ded3d1ca9562ceb99559c3d89a0f201a392a5 Mon Sep 17 00:00:00 2001 From: Jared Rhizor Date: Tue, 2 Nov 2021 17:31:30 -0700 Subject: [PATCH 13/83] remove kubectl from worker (#7582) --- airbyte-workers/Dockerfile | 5 ----- 1 file changed, 5 deletions(-) diff --git a/airbyte-workers/Dockerfile b/airbyte-workers/Dockerfile index 69f7736edc243..77828fb4fc94d 100644 --- a/airbyte-workers/Dockerfile +++ b/airbyte-workers/Dockerfile @@ -22,11 +22,6 @@ ENV APPLICATION airbyte-workers WORKDIR /app -# Install kubectl -RUN curl -LO https://storage.googleapis.com/kubernetes-release/release/v1.17.14/bin/linux/${ARCH}/kubectl -RUN chmod +x ./kubectl -RUN mv ./kubectl /usr/local/bin - # Move worker app ADD build/distributions/${APPLICATION}-0.30.25-alpha.tar /app From 6586fbaf3973e2641d90eb6f814577a46b88e3b7 Mon Sep 17 00:00:00 2001 From: Jared Rhizor Date: Tue, 2 Nov 2021 17:31:40 -0700 Subject: [PATCH 14/83] switch kube testing order (#7579) --- .github/workflows/gradle.yml | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/.github/workflows/gradle.yml b/.github/workflows/gradle.yml index f485acc781ace..87ff147195de7 100644 --- a/.github/workflows/gradle.yml +++ b/.github/workflows/gradle.yml @@ -459,6 +459,17 @@ jobs: - name: Build Platform Docker Images run: SUB_BUILD=PLATFORM ./gradlew composeBuild --scan + - name: Run Kubernetes End-to-End Acceptance Tests + env: + USER: root + HOME: /home/runner + AWS_S3_INTEGRATION_TEST_CREDS: ${{ secrets.AWS_S3_INTEGRATION_TEST_CREDS }} + SECRET_STORE_GCP_CREDENTIALS: ${{ secrets.SECRET_STORE_GCP_CREDENTIALS }} + SECRET_STORE_GCP_PROJECT_ID: ${{ secrets.SECRET_STORE_GCP_PROJECT_ID }} + SECRET_STORE_FOR_CONFIGS: ${{ secrets.SECRET_STORE_FOR_CONFIGS }} + run: | + CI=true IS_MINIKUBE=true ./tools/bin/acceptance_test_kube.sh + - name: Run Logging Tests run: ./tools/bin/cloud_storage_logging_test.sh env: @@ -481,16 +492,6 @@ jobs: run: | CI=true ./tools/bin/gcp_acceptance_tests.sh - - name: Run Kubernetes End-to-End Acceptance Tests - env: - USER: root - HOME: /home/runner - AWS_S3_INTEGRATION_TEST_CREDS: ${{ secrets.AWS_S3_INTEGRATION_TEST_CREDS }} - SECRET_STORE_GCP_CREDENTIALS: ${{ secrets.SECRET_STORE_GCP_CREDENTIALS }} - SECRET_STORE_GCP_PROJECT_ID: ${{ secrets.SECRET_STORE_GCP_PROJECT_ID }} - SECRET_STORE_FOR_CONFIGS: ${{ secrets.SECRET_STORE_FOR_CONFIGS }} - run: | - CI=true IS_MINIKUBE=true ./tools/bin/acceptance_test_kube.sh # In case of self-hosted EC2 errors, remove this block. stop-kube-acceptance-test-runner: name: Stop Kube Acceptance Test EC2 Runner From b39274df779aa94df7dcb4f0b6a84fdf3856174b Mon Sep 17 00:00:00 2001 From: Jared Rhizor Date: Tue, 2 Nov 2021 17:31:50 -0700 Subject: [PATCH 15/83] upgrade kbue client (#7578) --- airbyte-scheduler/app/build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-scheduler/app/build.gradle b/airbyte-scheduler/app/build.gradle index ddd6a0cd06996..d40206efb3ae9 100644 --- a/airbyte-scheduler/app/build.gradle +++ b/airbyte-scheduler/app/build.gradle @@ -3,7 +3,7 @@ plugins { } dependencies { - implementation 'io.fabric8:kubernetes-client:5.5.0' + implementation 'io.fabric8:kubernetes-client:5.9.0' implementation 'io.kubernetes:client-java-api:10.0.0' implementation 'io.kubernetes:client-java:10.0.0' implementation 'io.kubernetes:client-java-extended:10.0.0' From 92fcdea5bb11b0494604a2f9f25f33be7eae998f Mon Sep 17 00:00:00 2001 From: Haoran Yu Date: Wed, 3 Nov 2021 11:20:33 +0800 Subject: [PATCH 16/83] Refactor JDBC JSON_Format into JDBC utils (#7504) --- .../lib/src/main/java/io/airbyte/db/jdbc/JdbcUtils.java | 8 ++++++++ .../databricks/DatabricksDestinationAcceptanceTest.java | 6 ++---- .../destination/jdbc/JdbcDestinationAcceptanceTest.java | 7 ++----- .../MssqlStrictEncryptDestinationAcceptanceTest.java | 6 ++---- .../destination/mssql/MSSQLDestinationAcceptanceTest.java | 7 ++----- .../mssql/MSSQLDestinationAcceptanceTestSSL.java | 7 ++----- .../mssql/SshMSSQLDestinationAcceptanceTest.java | 7 ++----- .../MySQLStrictEncryptDestinationAcceptanceTest.java | 7 ++----- .../destination/mysql/MySQLDestinationAcceptanceTest.java | 7 ++----- .../mysql/SshMySQLDestinationAcceptanceTest.java | 6 ++---- .../mysql/SslMySQLDestinationAcceptanceTest.java | 7 ++----- .../oracle/SshOracleDestinationAcceptanceTest.java | 6 ++---- .../UnencryptedOracleDestinationAcceptanceTest.java | 7 ++----- .../PostgresDestinationStrictEncryptAcceptanceTest.java | 7 ++----- .../postgres/PostgresDestinationAcceptanceTest.java | 7 ++----- .../postgres/SshPostgresDestinationAcceptanceTest.java | 7 ++----- .../redshift/RedshiftCopyDestinationAcceptanceTest.java | 6 ++---- .../scheduler/persistence/DefaultJobPersistence.java | 6 ++---- 18 files changed, 42 insertions(+), 79 deletions(-) diff --git a/airbyte-db/lib/src/main/java/io/airbyte/db/jdbc/JdbcUtils.java b/airbyte-db/lib/src/main/java/io/airbyte/db/jdbc/JdbcUtils.java index 6f8f46862fd79..a7d681857aae6 100644 --- a/airbyte-db/lib/src/main/java/io/airbyte/db/jdbc/JdbcUtils.java +++ b/airbyte-db/lib/src/main/java/io/airbyte/db/jdbc/JdbcUtils.java @@ -4,14 +4,22 @@ package io.airbyte.db.jdbc; +import org.jooq.JSONFormat; + public class JdbcUtils { private static final JdbcSourceOperations defaultSourceOperations = new JdbcSourceOperations(); + private static final JSONFormat defaultJSONFormat = new JSONFormat().recordFormat(JSONFormat.RecordFormat.OBJECT); + public static JdbcSourceOperations getDefaultSourceOperations() { return defaultSourceOperations; } + public static JSONFormat getDefaultJSONFormat() { + return defaultJSONFormat; + } + public static String getFullyQualifiedTableName(final String schemaName, final String tableName) { return schemaName != null ? schemaName + "." + tableName : tableName; } diff --git a/airbyte-integrations/connectors/destination-databricks/src/test-integration/java/io/airbyte/integrations/destination/databricks/DatabricksDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-databricks/src/test-integration/java/io/airbyte/integrations/destination/databricks/DatabricksDestinationAcceptanceTest.java index 4fc11cf3d7d07..1bdda60327006 100644 --- a/airbyte-integrations/connectors/destination-databricks/src/test-integration/java/io/airbyte/integrations/destination/databricks/DatabricksDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-databricks/src/test-integration/java/io/airbyte/integrations/destination/databricks/DatabricksDestinationAcceptanceTest.java @@ -18,6 +18,7 @@ import io.airbyte.commons.json.Jsons; import io.airbyte.db.Database; import io.airbyte.db.Databases; +import io.airbyte.db.jdbc.JdbcUtils; import io.airbyte.integrations.base.JavaBaseConstants; import io.airbyte.integrations.destination.ExtendedNameTransformer; import io.airbyte.integrations.destination.jdbc.copy.StreamCopierFactory; @@ -31,8 +32,6 @@ import java.util.List; import java.util.stream.Collectors; import org.apache.commons.lang3.RandomStringUtils; -import org.jooq.JSONFormat; -import org.jooq.JSONFormat.RecordFormat; import org.jooq.SQLDialect; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -41,7 +40,6 @@ public class DatabricksDestinationAcceptanceTest extends DestinationAcceptanceTe private static final Logger LOGGER = LoggerFactory.getLogger(DatabricksDestinationAcceptanceTest.class); private static final String SECRETS_CONFIG_JSON = "secrets/config.json"; - private static final JSONFormat JSON_FORMAT = new JSONFormat().recordFormat(RecordFormat.OBJECT); private final ExtendedNameTransformer nameTransformer = new DatabricksNameTransformer(); private JsonNode configJson; @@ -85,7 +83,7 @@ protected List retrieveRecords(final TestDestinationEnv testEnv, .orderBy(field(JavaBaseConstants.COLUMN_NAME_EMITTED_AT).asc()) .fetch().stream() .map(record -> { - final JsonNode json = Jsons.deserialize(record.formatJSON(JSON_FORMAT)); + final JsonNode json = Jsons.deserialize(record.formatJSON(JdbcUtils.getDefaultJSONFormat())); final JsonNode jsonWithOriginalFields = nameUpdater.getJsonWithOriginalFieldNames(json); return AvroRecordHelper.pruneAirbyteJson(jsonWithOriginalFields); }) diff --git a/airbyte-integrations/connectors/destination-jdbc/src/test-integration/java/io/airbyte/integrations/destination/jdbc/JdbcDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-jdbc/src/test-integration/java/io/airbyte/integrations/destination/jdbc/JdbcDestinationAcceptanceTest.java index d8a4681fa43ce..a49b1664be4f6 100644 --- a/airbyte-integrations/connectors/destination-jdbc/src/test-integration/java/io/airbyte/integrations/destination/jdbc/JdbcDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-jdbc/src/test-integration/java/io/airbyte/integrations/destination/jdbc/JdbcDestinationAcceptanceTest.java @@ -8,6 +8,7 @@ import com.google.common.collect.ImmutableMap; import io.airbyte.commons.json.Jsons; import io.airbyte.db.Databases; +import io.airbyte.db.jdbc.JdbcUtils; import io.airbyte.integrations.base.JavaBaseConstants; import io.airbyte.integrations.destination.ExtendedNameTransformer; import io.airbyte.integrations.standardtest.destination.DestinationAcceptanceTest; @@ -15,14 +16,10 @@ import java.util.ArrayList; import java.util.List; import java.util.stream.Collectors; -import org.jooq.JSONFormat; -import org.jooq.JSONFormat.RecordFormat; import org.testcontainers.containers.PostgreSQLContainer; public class JdbcDestinationAcceptanceTest extends DestinationAcceptanceTest { - private static final JSONFormat JSON_FORMAT = new JSONFormat().recordFormat(RecordFormat.OBJECT); - private PostgreSQLContainer db; private final ExtendedNameTransformer namingResolver = new ExtendedNameTransformer(); @@ -99,7 +96,7 @@ private List retrieveRecordsFromTable(final String tableName, final St ctx -> ctx .fetch(String.format("SELECT * FROM %s.%s ORDER BY %s ASC;", schema, tableName, JavaBaseConstants.COLUMN_NAME_EMITTED_AT)) .stream() - .map(r -> r.formatJSON(JSON_FORMAT)) + .map(r -> r.formatJSON(JdbcUtils.getDefaultJSONFormat())) .map(Jsons::deserialize) .collect(Collectors.toList())); } diff --git a/airbyte-integrations/connectors/destination-mssql-strict-encrypt/src/test-integration/java/io/airbyte/integrations/destination/mssql_strict_encrypt/MssqlStrictEncryptDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-mssql-strict-encrypt/src/test-integration/java/io/airbyte/integrations/destination/mssql_strict_encrypt/MssqlStrictEncryptDestinationAcceptanceTest.java index 1cc76ff2ae683..ace31323c4a6d 100644 --- a/airbyte-integrations/connectors/destination-mssql-strict-encrypt/src/test-integration/java/io/airbyte/integrations/destination/mssql_strict_encrypt/MssqlStrictEncryptDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-mssql-strict-encrypt/src/test-integration/java/io/airbyte/integrations/destination/mssql_strict_encrypt/MssqlStrictEncryptDestinationAcceptanceTest.java @@ -14,6 +14,7 @@ import io.airbyte.commons.string.Strings; import io.airbyte.db.Database; import io.airbyte.db.Databases; +import io.airbyte.db.jdbc.JdbcUtils; import io.airbyte.integrations.base.JavaBaseConstants; import io.airbyte.integrations.base.ssh.SshHelpers; import io.airbyte.integrations.destination.ExtendedNameTransformer; @@ -23,7 +24,6 @@ import java.util.ArrayList; import java.util.List; import java.util.stream.Collectors; -import org.jooq.JSONFormat; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; @@ -31,8 +31,6 @@ public class MssqlStrictEncryptDestinationAcceptanceTest extends DestinationAcceptanceTest { - private static final JSONFormat JSON_FORMAT = new JSONFormat().recordFormat(JSONFormat.RecordFormat.OBJECT); - private static MSSQLServerContainer db; private final ExtendedNameTransformer namingResolver = new ExtendedNameTransformer(); private JsonNode config; @@ -130,7 +128,7 @@ private List retrieveRecordsFromTable(final String tableName, final St return ctx .fetch(String.format("SELECT * FROM %s.%s ORDER BY %s ASC;", schemaName, tableName, JavaBaseConstants.COLUMN_NAME_EMITTED_AT)) .stream() - .map(r -> r.formatJSON(JSON_FORMAT)) + .map(r -> r.formatJSON(JdbcUtils.getDefaultJSONFormat())) .map(Jsons::deserialize) .collect(Collectors.toList()); }); diff --git a/airbyte-integrations/connectors/destination-mssql/src/test-integration/java/io/airbyte/integrations/destination/mssql/MSSQLDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-mssql/src/test-integration/java/io/airbyte/integrations/destination/mssql/MSSQLDestinationAcceptanceTest.java index 8b3222e3fb0fd..2376a434c22a4 100644 --- a/airbyte-integrations/connectors/destination-mssql/src/test-integration/java/io/airbyte/integrations/destination/mssql/MSSQLDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-mssql/src/test-integration/java/io/airbyte/integrations/destination/mssql/MSSQLDestinationAcceptanceTest.java @@ -11,6 +11,7 @@ import io.airbyte.commons.string.Strings; import io.airbyte.db.Database; import io.airbyte.db.Databases; +import io.airbyte.db.jdbc.JdbcUtils; import io.airbyte.integrations.base.JavaBaseConstants; import io.airbyte.integrations.destination.ExtendedNameTransformer; import io.airbyte.integrations.standardtest.destination.DestinationAcceptanceTest; @@ -18,16 +19,12 @@ import java.util.ArrayList; import java.util.List; import java.util.stream.Collectors; -import org.jooq.JSONFormat; -import org.jooq.JSONFormat.RecordFormat; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.testcontainers.containers.MSSQLServerContainer; public class MSSQLDestinationAcceptanceTest extends DestinationAcceptanceTest { - private static final JSONFormat JSON_FORMAT = new JSONFormat().recordFormat(RecordFormat.OBJECT); - private static MSSQLServerContainer db; private final ExtendedNameTransformer namingResolver = new ExtendedNameTransformer(); private JsonNode configWithoutDbName; @@ -120,7 +117,7 @@ private List retrieveRecordsFromTable(final String tableName, final St return ctx .fetch(String.format("SELECT * FROM %s.%s ORDER BY %s ASC;", schemaName, tableName, JavaBaseConstants.COLUMN_NAME_EMITTED_AT)) .stream() - .map(r -> r.formatJSON(JSON_FORMAT)) + .map(r -> r.formatJSON(JdbcUtils.getDefaultJSONFormat())) .map(Jsons::deserialize) .collect(Collectors.toList()); }); diff --git a/airbyte-integrations/connectors/destination-mssql/src/test-integration/java/io/airbyte/integrations/destination/mssql/MSSQLDestinationAcceptanceTestSSL.java b/airbyte-integrations/connectors/destination-mssql/src/test-integration/java/io/airbyte/integrations/destination/mssql/MSSQLDestinationAcceptanceTestSSL.java index bb6af0a0b4554..8717a8c7f6c7f 100644 --- a/airbyte-integrations/connectors/destination-mssql/src/test-integration/java/io/airbyte/integrations/destination/mssql/MSSQLDestinationAcceptanceTestSSL.java +++ b/airbyte-integrations/connectors/destination-mssql/src/test-integration/java/io/airbyte/integrations/destination/mssql/MSSQLDestinationAcceptanceTestSSL.java @@ -11,6 +11,7 @@ import io.airbyte.commons.string.Strings; import io.airbyte.db.Database; import io.airbyte.db.Databases; +import io.airbyte.db.jdbc.JdbcUtils; import io.airbyte.integrations.base.JavaBaseConstants; import io.airbyte.integrations.destination.ExtendedNameTransformer; import io.airbyte.integrations.standardtest.destination.DestinationAcceptanceTest; @@ -18,8 +19,6 @@ import java.util.ArrayList; import java.util.List; import java.util.stream.Collectors; -import org.jooq.JSONFormat; -import org.jooq.JSONFormat.RecordFormat; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.testcontainers.containers.MSSQLServerContainer; @@ -27,8 +26,6 @@ public class MSSQLDestinationAcceptanceTestSSL extends DestinationAcceptanceTest { - private static final JSONFormat JSON_FORMAT = new JSONFormat().recordFormat(RecordFormat.OBJECT); - private static MSSQLServerContainer db; private final ExtendedNameTransformer namingResolver = new ExtendedNameTransformer(); private JsonNode configWithoutDbName; @@ -129,7 +126,7 @@ private List retrieveRecordsFromTable(final String tableName, final St return ctx .fetch(String.format("SELECT * FROM %s.%s ORDER BY %s ASC;", schemaName, tableName, JavaBaseConstants.COLUMN_NAME_EMITTED_AT)) .stream() - .map(r -> r.formatJSON(JSON_FORMAT)) + .map(r -> r.formatJSON(JdbcUtils.getDefaultJSONFormat())) .map(Jsons::deserialize) .collect(Collectors.toList()); }); diff --git a/airbyte-integrations/connectors/destination-mssql/src/test-integration/java/io/airbyte/integrations/destination/mssql/SshMSSQLDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-mssql/src/test-integration/java/io/airbyte/integrations/destination/mssql/SshMSSQLDestinationAcceptanceTest.java index 805105394fa58..2f1e4ab42afda 100644 --- a/airbyte-integrations/connectors/destination-mssql/src/test-integration/java/io/airbyte/integrations/destination/mssql/SshMSSQLDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-mssql/src/test-integration/java/io/airbyte/integrations/destination/mssql/SshMSSQLDestinationAcceptanceTest.java @@ -11,6 +11,7 @@ import io.airbyte.commons.json.Jsons; import io.airbyte.db.Database; import io.airbyte.db.Databases; +import io.airbyte.db.jdbc.JdbcUtils; import io.airbyte.integrations.base.JavaBaseConstants; import io.airbyte.integrations.base.ssh.SshBastionContainer; import io.airbyte.integrations.base.ssh.SshTunnel; @@ -21,8 +22,6 @@ import java.util.Objects; import java.util.stream.Collectors; import org.apache.commons.lang3.RandomStringUtils; -import org.jooq.JSONFormat; -import org.jooq.JSONFormat.RecordFormat; import org.testcontainers.containers.JdbcDatabaseContainer; import org.testcontainers.containers.MSSQLServerContainer; import org.testcontainers.containers.Network; @@ -33,8 +32,6 @@ */ public abstract class SshMSSQLDestinationAcceptanceTest extends DestinationAcceptanceTest { - private static final JSONFormat JSON_FORMAT = new JSONFormat().recordFormat(RecordFormat.OBJECT); - private final ExtendedNameTransformer namingResolver = new ExtendedNameTransformer(); private final String schemaName = RandomStringUtils.randomAlphabetic(8).toLowerCase(); @@ -148,7 +145,7 @@ private List retrieveRecordsFromTable(final String tableName, final St database, schema, tableName.toLowerCase(), JavaBaseConstants.COLUMN_NAME_EMITTED_AT)) .stream() - .map(r -> r.formatJSON(JSON_FORMAT)) + .map(r -> r.formatJSON(JdbcUtils.getDefaultJSONFormat())) .map(Jsons::deserialize) .collect(Collectors.toList()))); } diff --git a/airbyte-integrations/connectors/destination-mysql-strict-encrypt/src/test-integration/java/io/airbyte/integrations/destination/mysql/MySQLStrictEncryptDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-mysql-strict-encrypt/src/test-integration/java/io/airbyte/integrations/destination/mysql/MySQLStrictEncryptDestinationAcceptanceTest.java index a4796b527bb3d..0c29d20a296b8 100644 --- a/airbyte-integrations/connectors/destination-mysql-strict-encrypt/src/test-integration/java/io/airbyte/integrations/destination/mysql/MySQLStrictEncryptDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-mysql-strict-encrypt/src/test-integration/java/io/airbyte/integrations/destination/mysql/MySQLStrictEncryptDestinationAcceptanceTest.java @@ -11,6 +11,7 @@ import com.google.common.collect.Lists; import io.airbyte.commons.json.Jsons; import io.airbyte.db.Databases; +import io.airbyte.db.jdbc.JdbcUtils; import io.airbyte.integrations.base.JavaBaseConstants; import io.airbyte.integrations.destination.ExtendedNameTransformer; import io.airbyte.integrations.standardtest.destination.DestinationAcceptanceTest; @@ -26,16 +27,12 @@ import java.util.ArrayList; import java.util.List; import java.util.stream.Collectors; -import org.jooq.JSONFormat; -import org.jooq.JSONFormat.RecordFormat; import org.jooq.SQLDialect; import org.junit.jupiter.api.Test; import org.testcontainers.containers.MySQLContainer; public class MySQLStrictEncryptDestinationAcceptanceTest extends DestinationAcceptanceTest { - private static final JSONFormat JSON_FORMAT = new JSONFormat().recordFormat(RecordFormat.OBJECT); - private MySQLContainer db; private final ExtendedNameTransformer namingResolver = new MySQLNameTransformer(); @@ -115,7 +112,7 @@ private List retrieveRecordsFromTable(final String tableName, final St .fetch(String.format("SELECT * FROM %s.%s ORDER BY %s ASC;", schemaName, tableName, JavaBaseConstants.COLUMN_NAME_EMITTED_AT)) .stream() - .map(r -> r.formatJSON(JSON_FORMAT)) + .map(r -> r.formatJSON(JdbcUtils.getDefaultJSONFormat())) .map(Jsons::deserialize) .collect(Collectors.toList())); } diff --git a/airbyte-integrations/connectors/destination-mysql/src/test-integration/java/io/airbyte/integrations/destination/mysql/MySQLDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-mysql/src/test-integration/java/io/airbyte/integrations/destination/mysql/MySQLDestinationAcceptanceTest.java index 6b4ed5de48f74..a9f491059c558 100644 --- a/airbyte-integrations/connectors/destination-mysql/src/test-integration/java/io/airbyte/integrations/destination/mysql/MySQLDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-mysql/src/test-integration/java/io/airbyte/integrations/destination/mysql/MySQLDestinationAcceptanceTest.java @@ -11,6 +11,7 @@ import com.google.common.collect.Lists; import io.airbyte.commons.json.Jsons; import io.airbyte.db.Databases; +import io.airbyte.db.jdbc.JdbcUtils; import io.airbyte.integrations.base.JavaBaseConstants; import io.airbyte.integrations.destination.ExtendedNameTransformer; import io.airbyte.integrations.standardtest.destination.DestinationAcceptanceTest; @@ -26,16 +27,12 @@ import java.util.ArrayList; import java.util.List; import java.util.stream.Collectors; -import org.jooq.JSONFormat; -import org.jooq.JSONFormat.RecordFormat; import org.jooq.SQLDialect; import org.junit.jupiter.api.Test; import org.testcontainers.containers.MySQLContainer; public class MySQLDestinationAcceptanceTest extends DestinationAcceptanceTest { - private static final JSONFormat JSON_FORMAT = new JSONFormat().recordFormat(RecordFormat.OBJECT); - private MySQLContainer db; private final ExtendedNameTransformer namingResolver = new MySQLNameTransformer(); @@ -117,7 +114,7 @@ private List retrieveRecordsFromTable(final String tableName, final St .fetch(String.format("SELECT * FROM %s.%s ORDER BY %s ASC;", schemaName, tableName, JavaBaseConstants.COLUMN_NAME_EMITTED_AT)) .stream() - .map(r -> r.formatJSON(JSON_FORMAT)) + .map(r -> r.formatJSON(JdbcUtils.getDefaultJSONFormat())) .map(Jsons::deserialize) .collect(Collectors.toList())); } diff --git a/airbyte-integrations/connectors/destination-mysql/src/test-integration/java/io/airbyte/integrations/destination/mysql/SshMySQLDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-mysql/src/test-integration/java/io/airbyte/integrations/destination/mysql/SshMySQLDestinationAcceptanceTest.java index c668151ee2c12..409737eaf9deb 100644 --- a/airbyte-integrations/connectors/destination-mysql/src/test-integration/java/io/airbyte/integrations/destination/mysql/SshMySQLDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-mysql/src/test-integration/java/io/airbyte/integrations/destination/mysql/SshMySQLDestinationAcceptanceTest.java @@ -13,6 +13,7 @@ import io.airbyte.commons.json.Jsons; import io.airbyte.db.Database; import io.airbyte.db.Databases; +import io.airbyte.db.jdbc.JdbcUtils; import io.airbyte.integrations.base.JavaBaseConstants; import io.airbyte.integrations.base.ssh.SshTunnel; import io.airbyte.integrations.destination.ExtendedNameTransformer; @@ -22,7 +23,6 @@ import java.util.List; import java.util.stream.Collectors; import org.apache.commons.lang3.RandomStringUtils; -import org.jooq.JSONFormat; /** * Abstract class that allows us to avoid duplicating testing logic for testing SSH with a key file @@ -30,8 +30,6 @@ */ public abstract class SshMySQLDestinationAcceptanceTest extends DestinationAcceptanceTest { - private static final JSONFormat JSON_FORMAT = new JSONFormat().recordFormat(JSONFormat.RecordFormat.OBJECT); - private final ExtendedNameTransformer namingResolver = new MySQLNameTransformer(); private String schemaName; @@ -131,7 +129,7 @@ private List retrieveRecordsFromTable(final String tableName, final St .fetch(String.format("SELECT * FROM %s.%s ORDER BY %s ASC;", schema, tableName.toLowerCase(), JavaBaseConstants.COLUMN_NAME_EMITTED_AT)) .stream() - .map(r -> r.formatJSON(JSON_FORMAT)) + .map(r -> r.formatJSON(JdbcUtils.getDefaultJSONFormat())) .map(Jsons::deserialize) .collect(Collectors.toList()))); } diff --git a/airbyte-integrations/connectors/destination-mysql/src/test-integration/java/io/airbyte/integrations/destination/mysql/SslMySQLDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-mysql/src/test-integration/java/io/airbyte/integrations/destination/mysql/SslMySQLDestinationAcceptanceTest.java index fcadc2909ddf9..b60ac9b2950b5 100644 --- a/airbyte-integrations/connectors/destination-mysql/src/test-integration/java/io/airbyte/integrations/destination/mysql/SslMySQLDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-mysql/src/test-integration/java/io/airbyte/integrations/destination/mysql/SslMySQLDestinationAcceptanceTest.java @@ -8,21 +8,18 @@ import com.google.common.collect.ImmutableMap; import io.airbyte.commons.json.Jsons; import io.airbyte.db.Databases; +import io.airbyte.db.jdbc.JdbcUtils; import io.airbyte.integrations.base.JavaBaseConstants; import io.airbyte.integrations.destination.ExtendedNameTransformer; import java.sql.SQLException; import java.util.List; import java.util.stream.Collectors; -import org.jooq.JSONFormat; -import org.jooq.JSONFormat.RecordFormat; import org.jooq.SQLDialect; import org.junit.jupiter.api.Test; import org.testcontainers.containers.MySQLContainer; public class SslMySQLDestinationAcceptanceTest extends MySQLDestinationAcceptanceTest { - private static final JSONFormat JSON_FORMAT = new JSONFormat().recordFormat(RecordFormat.OBJECT); - private MySQLContainer db; private final ExtendedNameTransformer namingResolver = new MySQLNameTransformer(); @@ -108,7 +105,7 @@ private List retrieveRecordsFromTable(final String tableName, final St .fetch(String.format("SELECT * FROM %s.%s ORDER BY %s ASC;", schemaName, tableName, JavaBaseConstants.COLUMN_NAME_EMITTED_AT)) .stream() - .map(r -> r.formatJSON(JSON_FORMAT)) + .map(r -> r.formatJSON(JdbcUtils.getDefaultJSONFormat())) .map(Jsons::deserialize) .collect(Collectors.toList())); } diff --git a/airbyte-integrations/connectors/destination-oracle/src/test-integration/java/io/airbyte/integrations/destination/oracle/SshOracleDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-oracle/src/test-integration/java/io/airbyte/integrations/destination/oracle/SshOracleDestinationAcceptanceTest.java index 233734419aafc..1646e2f1dc0e8 100644 --- a/airbyte-integrations/connectors/destination-oracle/src/test-integration/java/io/airbyte/integrations/destination/oracle/SshOracleDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-oracle/src/test-integration/java/io/airbyte/integrations/destination/oracle/SshOracleDestinationAcceptanceTest.java @@ -11,6 +11,7 @@ import io.airbyte.commons.json.Jsons; import io.airbyte.db.Database; import io.airbyte.db.Databases; +import io.airbyte.db.jdbc.JdbcUtils; import io.airbyte.integrations.base.JavaBaseConstants; import io.airbyte.integrations.base.ssh.SshBastionContainer; import io.airbyte.integrations.base.ssh.SshTunnel; @@ -21,13 +22,10 @@ import java.util.List; import java.util.Objects; import java.util.stream.Collectors; -import org.jooq.JSONFormat; import org.testcontainers.containers.Network; public abstract class SshOracleDestinationAcceptanceTest extends DestinationAcceptanceTest { - private static final JSONFormat JSON_FORMAT = new JSONFormat().recordFormat(JSONFormat.RecordFormat.OBJECT); - private final ExtendedNameTransformer namingResolver = new OracleNameTransformer(); private final String schemaName = "TEST_ORCL"; @@ -116,7 +114,7 @@ private List retrieveRecordsFromTable(final String tableName, final St ctx -> ctx .fetch(String.format("SELECT * FROM %s.%s ORDER BY %s ASC", schemaName, tableName, OracleDestination.COLUMN_NAME_EMITTED_AT))) .stream() - .map(r -> r.formatJSON(JSON_FORMAT)) + .map(r -> r.formatJSON(JdbcUtils.getDefaultJSONFormat())) .map(Jsons::deserialize) .collect(Collectors.toList())); } diff --git a/airbyte-integrations/connectors/destination-oracle/src/test-integration/java/io/airbyte/integrations/destination/oracle/UnencryptedOracleDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-oracle/src/test-integration/java/io/airbyte/integrations/destination/oracle/UnencryptedOracleDestinationAcceptanceTest.java index dd3612ea72909..963e1277a2eec 100644 --- a/airbyte-integrations/connectors/destination-oracle/src/test-integration/java/io/airbyte/integrations/destination/oracle/UnencryptedOracleDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-oracle/src/test-integration/java/io/airbyte/integrations/destination/oracle/UnencryptedOracleDestinationAcceptanceTest.java @@ -13,6 +13,7 @@ import io.airbyte.commons.string.Strings; import io.airbyte.db.Database; import io.airbyte.db.Databases; +import io.airbyte.db.jdbc.JdbcUtils; import io.airbyte.db.jdbc.JdbcDatabase; import io.airbyte.integrations.destination.ExtendedNameTransformer; import io.airbyte.integrations.standardtest.destination.DestinationAcceptanceTest; @@ -20,14 +21,10 @@ import java.util.ArrayList; import java.util.List; import java.util.stream.Collectors; -import org.jooq.JSONFormat; -import org.jooq.JSONFormat.RecordFormat; import org.junit.Test; public class UnencryptedOracleDestinationAcceptanceTest extends DestinationAcceptanceTest { - private static final JSONFormat JSON_FORMAT = new JSONFormat().recordFormat(RecordFormat.OBJECT); - private final ExtendedNameTransformer namingResolver = new OracleNameTransformer(); private static OracleContainer db; private static JsonNode config; @@ -120,7 +117,7 @@ private List retrieveRecordsFromTable(final String tableName, final St .collect(Collectors.toList())); return result .stream() - .map(r -> r.formatJSON(JSON_FORMAT)) + .map(r -> r.formatJSON(JdbcUtils.getDefaultJSONFormat())) .map(Jsons::deserialize) .collect(Collectors.toList()); } diff --git a/airbyte-integrations/connectors/destination-postgres-strict-encrypt/src/test-integration/java/io/airbyte/integrations/destination/postgres/PostgresDestinationStrictEncryptAcceptanceTest.java b/airbyte-integrations/connectors/destination-postgres-strict-encrypt/src/test-integration/java/io/airbyte/integrations/destination/postgres/PostgresDestinationStrictEncryptAcceptanceTest.java index 9e1be4d78e0bf..ce71fbf7ec97c 100644 --- a/airbyte-integrations/connectors/destination-postgres-strict-encrypt/src/test-integration/java/io/airbyte/integrations/destination/postgres/PostgresDestinationStrictEncryptAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-postgres-strict-encrypt/src/test-integration/java/io/airbyte/integrations/destination/postgres/PostgresDestinationStrictEncryptAcceptanceTest.java @@ -8,6 +8,7 @@ import com.google.common.collect.ImmutableMap; import io.airbyte.commons.json.Jsons; import io.airbyte.db.Databases; +import io.airbyte.db.jdbc.JdbcUtils; import io.airbyte.integrations.base.JavaBaseConstants; import io.airbyte.integrations.destination.ExtendedNameTransformer; import io.airbyte.integrations.standardtest.destination.DestinationAcceptanceTest; @@ -15,16 +16,12 @@ import java.util.ArrayList; import java.util.List; import java.util.stream.Collectors; -import org.jooq.JSONFormat; -import org.jooq.JSONFormat.RecordFormat; import org.testcontainers.containers.PostgreSQLContainer; import org.testcontainers.utility.DockerImageName; // todo (cgardens) - DRY this up with PostgresDestinationAcceptanceTest public class PostgresDestinationStrictEncryptAcceptanceTest extends DestinationAcceptanceTest { - private static final JSONFormat JSON_FORMAT = new JSONFormat().recordFormat(RecordFormat.OBJECT); - private PostgreSQLContainer db; private final ExtendedNameTransformer namingResolver = new ExtendedNameTransformer(); @@ -117,7 +114,7 @@ private List retrieveRecordsFromTable(final String tableName, final St ctx -> ctx .fetch(String.format("SELECT * FROM %s.%s ORDER BY %s ASC;", schemaName, tableName, JavaBaseConstants.COLUMN_NAME_EMITTED_AT)) .stream() - .map(r -> r.formatJSON(JSON_FORMAT)) + .map(r -> r.formatJSON(JdbcUtils.getDefaultJSONFormat())) .map(Jsons::deserialize) .collect(Collectors.toList())); } diff --git a/airbyte-integrations/connectors/destination-postgres/src/test-integration/java/io/airbyte/integrations/destination/postgres/PostgresDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-postgres/src/test-integration/java/io/airbyte/integrations/destination/postgres/PostgresDestinationAcceptanceTest.java index aabac7d93f6ac..17463b521acc2 100644 --- a/airbyte-integrations/connectors/destination-postgres/src/test-integration/java/io/airbyte/integrations/destination/postgres/PostgresDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-postgres/src/test-integration/java/io/airbyte/integrations/destination/postgres/PostgresDestinationAcceptanceTest.java @@ -8,6 +8,7 @@ import com.google.common.collect.ImmutableMap; import io.airbyte.commons.json.Jsons; import io.airbyte.db.Databases; +import io.airbyte.db.jdbc.JdbcUtils; import io.airbyte.integrations.base.JavaBaseConstants; import io.airbyte.integrations.destination.ExtendedNameTransformer; import io.airbyte.integrations.standardtest.destination.DestinationAcceptanceTest; @@ -15,14 +16,10 @@ import java.util.ArrayList; import java.util.List; import java.util.stream.Collectors; -import org.jooq.JSONFormat; -import org.jooq.JSONFormat.RecordFormat; import org.testcontainers.containers.PostgreSQLContainer; public class PostgresDestinationAcceptanceTest extends DestinationAcceptanceTest { - private static final JSONFormat JSON_FORMAT = new JSONFormat().recordFormat(RecordFormat.OBJECT); - private PostgreSQLContainer db; private final ExtendedNameTransformer namingResolver = new ExtendedNameTransformer(); @@ -116,7 +113,7 @@ private List retrieveRecordsFromTable(final String tableName, final St ctx -> ctx .fetch(String.format("SELECT * FROM %s.%s ORDER BY %s ASC;", schemaName, tableName, JavaBaseConstants.COLUMN_NAME_EMITTED_AT)) .stream() - .map(r -> r.formatJSON(JSON_FORMAT)) + .map(r -> r.formatJSON(JdbcUtils.getDefaultJSONFormat())) .map(Jsons::deserialize) .collect(Collectors.toList())); } diff --git a/airbyte-integrations/connectors/destination-postgres/src/test-integration/java/io/airbyte/integrations/destination/postgres/SshPostgresDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-postgres/src/test-integration/java/io/airbyte/integrations/destination/postgres/SshPostgresDestinationAcceptanceTest.java index 762fb9aef760e..4ee8d0ed0f79c 100644 --- a/airbyte-integrations/connectors/destination-postgres/src/test-integration/java/io/airbyte/integrations/destination/postgres/SshPostgresDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-postgres/src/test-integration/java/io/airbyte/integrations/destination/postgres/SshPostgresDestinationAcceptanceTest.java @@ -10,6 +10,7 @@ import io.airbyte.commons.json.Jsons; import io.airbyte.db.Database; import io.airbyte.db.Databases; +import io.airbyte.db.jdbc.JdbcUtils; import io.airbyte.integrations.base.JavaBaseConstants; import io.airbyte.integrations.base.ssh.SshBastionContainer; import io.airbyte.integrations.base.ssh.SshTunnel; @@ -19,8 +20,6 @@ import java.util.List; import java.util.stream.Collectors; import org.apache.commons.lang3.RandomStringUtils; -import org.jooq.JSONFormat; -import org.jooq.JSONFormat.RecordFormat; import org.testcontainers.containers.PostgreSQLContainer; // todo (cgardens) - likely some of this could be further de-duplicated with @@ -32,8 +31,6 @@ */ public abstract class SshPostgresDestinationAcceptanceTest extends DestinationAcceptanceTest { - private static final JSONFormat JSON_FORMAT = new JSONFormat().recordFormat(RecordFormat.OBJECT); - private final ExtendedNameTransformer namingResolver = new ExtendedNameTransformer(); private static final String schemaName = RandomStringUtils.randomAlphabetic(8).toLowerCase(); private static PostgreSQLContainer db; @@ -130,7 +127,7 @@ private List retrieveRecordsFromTable(final String tableName, final St ctx -> ctx .fetch(String.format("SELECT * FROM %s.%s ORDER BY %s ASC;", schemaName, tableName, JavaBaseConstants.COLUMN_NAME_EMITTED_AT)) .stream() - .map(r -> r.formatJSON(JSON_FORMAT)) + .map(r -> r.formatJSON(JdbcUtils.getDefaultJSONFormat())) .map(Jsons::deserialize) .collect(Collectors.toList()))); } diff --git a/airbyte-integrations/connectors/destination-redshift/src/test-integration/java/io/airbyte/integrations/destination/redshift/RedshiftCopyDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-redshift/src/test-integration/java/io/airbyte/integrations/destination/redshift/RedshiftCopyDestinationAcceptanceTest.java index 44d232a526b0f..a6fdd5f877000 100644 --- a/airbyte-integrations/connectors/destination-redshift/src/test-integration/java/io/airbyte/integrations/destination/redshift/RedshiftCopyDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-redshift/src/test-integration/java/io/airbyte/integrations/destination/redshift/RedshiftCopyDestinationAcceptanceTest.java @@ -11,6 +11,7 @@ import io.airbyte.commons.string.Strings; import io.airbyte.db.Database; import io.airbyte.db.Databases; +import io.airbyte.db.jdbc.JdbcUtils; import io.airbyte.integrations.base.JavaBaseConstants; import io.airbyte.integrations.standardtest.destination.DestinationAcceptanceTest; import java.nio.file.Path; @@ -18,8 +19,6 @@ import java.util.ArrayList; import java.util.List; import java.util.stream.Collectors; -import org.jooq.JSONFormat; -import org.jooq.JSONFormat.RecordFormat; /** * Integration test testing {@link RedshiftCopyS3Destination}. The default Redshift integration test @@ -27,7 +26,6 @@ */ public class RedshiftCopyDestinationAcceptanceTest extends DestinationAcceptanceTest { - private static final JSONFormat JSON_FORMAT = new JSONFormat().recordFormat(RecordFormat.OBJECT); // config from which to create / delete schemas. private JsonNode baseConfig; // config which refers to the schema that the test is being run in. @@ -111,7 +109,7 @@ private List retrieveRecordsFromTable(final String tableName, final St ctx -> ctx .fetch(String.format("SELECT * FROM %s.%s ORDER BY %s ASC;", schemaName, tableName, JavaBaseConstants.COLUMN_NAME_EMITTED_AT)) .stream() - .map(r -> r.formatJSON(JSON_FORMAT)) + .map(r -> r.formatJSON(JdbcUtils.getDefaultJSONFormat())) .map(Jsons::deserialize) .collect(Collectors.toList())); } diff --git a/airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/DefaultJobPersistence.java b/airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/DefaultJobPersistence.java index 8feceddbda9a4..7b6b96f27efe3 100644 --- a/airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/DefaultJobPersistence.java +++ b/airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/DefaultJobPersistence.java @@ -23,6 +23,7 @@ import io.airbyte.db.Database; import io.airbyte.db.ExceptionWrappingDatabase; import io.airbyte.db.instance.jobs.JobsDatabaseSchema; +import io.airbyte.db.jdbc.JdbcUtils; import io.airbyte.scheduler.models.Attempt; import io.airbyte.scheduler.models.AttemptStatus; import io.airbyte.scheduler.models.Job; @@ -53,8 +54,6 @@ import org.jooq.Field; import org.jooq.InsertValuesStepN; import org.jooq.JSONB; -import org.jooq.JSONFormat; -import org.jooq.JSONFormat.RecordFormat; import org.jooq.Named; import org.jooq.Record; import org.jooq.Result; @@ -76,7 +75,6 @@ public class DefaultJobPersistence implements JobPersistence { .of("pg_toast", "information_schema", "pg_catalog", "import_backup", "pg_internal", "catalog_history"); - private static final JSONFormat DB_JSON_FORMAT = new JSONFormat().recordFormat(RecordFormat.OBJECT); protected static final String DEFAULT_SCHEMA = "public"; private static final String BACKUP_SCHEMA = "import_backup"; public static final String DEPLOYMENT_ID_KEY = "deployment_id"; @@ -618,7 +616,7 @@ private Stream exportTable(final String schema, final String tableName .filter(f -> f.getDataType().getTypeName().equals("jsonb")) .map(Field::getName) .collect(Collectors.toSet()); - final JsonNode row = Jsons.deserialize(record.formatJSON(DB_JSON_FORMAT)); + final JsonNode row = Jsons.deserialize(record.formatJSON(JdbcUtils.getDefaultJSONFormat())); // for json fields, deserialize them so they are treated as objects instead of strings. this is to // get around that formatJson doesn't handle deserializing them for us. jsonFieldNames.forEach(jsonFieldName -> ((ObjectNode) row).replace(jsonFieldName, Jsons.deserialize(row.get(jsonFieldName).asText()))); From 73b8589a2bb15296d10b91170e29d2cd94888a14 Mon Sep 17 00:00:00 2001 From: Tuan Nguyen Date: Wed, 3 Nov 2021 10:37:58 +0700 Subject: [PATCH 17/83] =?UTF-8?q?=F0=9F=8E=89=20New=20Source:=20Freshsales?= =?UTF-8?q?=20(#6963)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * initial commit * finish implementing full_refresh * add other tables * cleaning up * add docs and use requests_native_auth * fix function return different number of values * change package author * fix schema * fix schema bug * linting * fix unit test * clean up * add null for schemas * remove fc_widget_collaboration col --- .../source-freshsales/.dockerignore | 7 + .../connectors/source-freshsales/Dockerfile | 38 +++ .../connectors/source-freshsales/README.md | 132 ++++++++++ .../acceptance-test-config.yml | 20 ++ .../acceptance-test-docker.sh | 16 ++ .../connectors/source-freshsales/build.gradle | 14 + .../integration_tests/__init__.py | 3 + .../integration_tests/acceptance.py | 14 + .../integration_tests/configured_catalog.json | 94 +++++++ .../integration_tests/integration_test.py | 8 + .../integration_tests/invalid_config.json | 1 + .../integration_tests/sample_config.json | 1 + .../connectors/source-freshsales/main.py | 13 + .../source-freshsales/requirements.txt | 2 + .../connectors/source-freshsales/setup.py | 29 +++ .../source_freshsales/__init__.py | 8 + .../source_freshsales/schemas/accounts.json | 67 +++++ .../schemas/completed_tasks.json | 21 ++ .../source_freshsales/schemas/contacts.json | 65 +++++ .../source_freshsales/schemas/lost_deals.json | 52 ++++ .../source_freshsales/schemas/open_deals.json | 52 ++++ .../source_freshsales/schemas/open_tasks.json | 21 ++ .../schemas/past_appointments.json | 29 +++ .../schemas/upcoming_appointments.json | 29 +++ .../source_freshsales/schemas/won_deals.json | 52 ++++ .../source_freshsales/source.py | 246 ++++++++++++++++++ .../source_freshsales/spec.json | 22 ++ .../source-freshsales/unit_tests/__init__.py | 3 + .../source-freshsales/unit_tests/conftest.py | 13 + .../unit_tests/test_source.py | 21 ++ 30 files changed, 1093 insertions(+) create mode 100644 airbyte-integrations/connectors/source-freshsales/.dockerignore create mode 100644 airbyte-integrations/connectors/source-freshsales/Dockerfile create mode 100644 airbyte-integrations/connectors/source-freshsales/README.md create mode 100644 airbyte-integrations/connectors/source-freshsales/acceptance-test-config.yml create mode 100644 airbyte-integrations/connectors/source-freshsales/acceptance-test-docker.sh create mode 100644 airbyte-integrations/connectors/source-freshsales/build.gradle create mode 100644 airbyte-integrations/connectors/source-freshsales/integration_tests/__init__.py create mode 100644 airbyte-integrations/connectors/source-freshsales/integration_tests/acceptance.py create mode 100644 airbyte-integrations/connectors/source-freshsales/integration_tests/configured_catalog.json create mode 100644 airbyte-integrations/connectors/source-freshsales/integration_tests/integration_test.py create mode 100644 airbyte-integrations/connectors/source-freshsales/integration_tests/invalid_config.json create mode 100644 airbyte-integrations/connectors/source-freshsales/integration_tests/sample_config.json create mode 100644 airbyte-integrations/connectors/source-freshsales/main.py create mode 100644 airbyte-integrations/connectors/source-freshsales/requirements.txt create mode 100644 airbyte-integrations/connectors/source-freshsales/setup.py create mode 100644 airbyte-integrations/connectors/source-freshsales/source_freshsales/__init__.py create mode 100644 airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/accounts.json create mode 100644 airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/completed_tasks.json create mode 100644 airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/contacts.json create mode 100644 airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/lost_deals.json create mode 100644 airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/open_deals.json create mode 100644 airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/open_tasks.json create mode 100644 airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/past_appointments.json create mode 100644 airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/upcoming_appointments.json create mode 100644 airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/won_deals.json create mode 100644 airbyte-integrations/connectors/source-freshsales/source_freshsales/source.py create mode 100644 airbyte-integrations/connectors/source-freshsales/source_freshsales/spec.json create mode 100644 airbyte-integrations/connectors/source-freshsales/unit_tests/__init__.py create mode 100644 airbyte-integrations/connectors/source-freshsales/unit_tests/conftest.py create mode 100644 airbyte-integrations/connectors/source-freshsales/unit_tests/test_source.py diff --git a/airbyte-integrations/connectors/source-freshsales/.dockerignore b/airbyte-integrations/connectors/source-freshsales/.dockerignore new file mode 100644 index 0000000000000..02bb719d4c3ee --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/.dockerignore @@ -0,0 +1,7 @@ +* +!Dockerfile +!Dockerfile.test +!main.py +!source_freshsales +!setup.py +!secrets diff --git a/airbyte-integrations/connectors/source-freshsales/Dockerfile b/airbyte-integrations/connectors/source-freshsales/Dockerfile new file mode 100644 index 0000000000000..d7e7bc9102319 --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/Dockerfile @@ -0,0 +1,38 @@ +FROM python:3.7.11-alpine3.14 as base + +# build and load all requirements +FROM base as builder +WORKDIR /airbyte/integration_code + +# upgrade pip to the latest version +RUN apk --no-cache upgrade \ + && pip install --upgrade pip \ + && apk --no-cache add tzdata build-base + + +COPY setup.py ./ +# install necessary packages to a temporary folder +RUN pip install --prefix=/install . + +# build a clean environment +FROM base +WORKDIR /airbyte/integration_code + +# copy all loaded and built libraries to a pure basic image +COPY --from=builder /install /usr/local +# add default timezone settings +COPY --from=builder /usr/share/zoneinfo/Etc/UTC /etc/localtime +RUN echo "Etc/UTC" > /etc/timezone + +# bash is installed for more convenient debugging. +RUN apk --no-cache add bash + +# copy payload code only +COPY main.py ./ +COPY source_freshsales ./source_freshsales + +ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" +ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] + +LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.name=airbyte/source-freshsales diff --git a/airbyte-integrations/connectors/source-freshsales/README.md b/airbyte-integrations/connectors/source-freshsales/README.md new file mode 100644 index 0000000000000..01cfa7382c1c7 --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/README.md @@ -0,0 +1,132 @@ +# Freshsales Source + +This is the repository for the Freshsales source connector, written in Python. +For information about how to use this connector within Airbyte, see [the documentation](https://docs.airbyte.io/integrations/sources/freshsales). + +## Local development + +### Prerequisites +**To iterate on this connector, make sure to complete this prerequisites section.** + +#### Minimum Python version required `= 3.7.0` + +#### Build & Activate Virtual Environment and install dependencies +From this connector directory, create a virtual environment: +``` +python -m venv .venv +``` + +This will generate a virtualenv for this module in `.venv/`. Make sure this venv is active in your +development environment of choice. To activate it from the terminal, run: +``` +source .venv/bin/activate +pip install -r requirements.txt +pip install '.[tests]' +``` +If you are in an IDE, follow your IDE's instructions to activate the virtualenv. + +Note that while we are installing dependencies from `requirements.txt`, you should only edit `setup.py` for your dependencies. `requirements.txt` is +used for editable installs (`pip install -e`) to pull in Python dependencies from the monorepo and will call `setup.py`. +If this is mumbo jumbo to you, don't worry about it, just put your deps in `setup.py` but install using `pip install -r requirements.txt` and everything +should work as you expect. + +#### Building via Gradle +You can also build the connector in Gradle. This is typically used in CI and not needed for your development workflow. + +To build using Gradle, from the Airbyte repository root, run: +``` +./gradlew :airbyte-integrations:connectors:source-freshsales:build +``` + +#### Create credentials +**If you are a community contributor**, follow the instructions in the [documentation](https://docs.airbyte.io/integrations/sources/freshsales) +to generate the necessary credentials. Then create a file `secrets/config.json` conforming to the `source_freshsales/spec.json` file. +Note that any directory named `secrets` is gitignored across the entire Airbyte repo, so there is no danger of accidentally checking in sensitive information. +See `integration_tests/sample_config.json` for a sample config file. + +**If you are an Airbyte core member**, copy the credentials in Lastpass under the secret name `source freshsales test creds` +and place them into `secrets/config.json`. + +### Locally running the connector +``` +python main.py spec +python main.py check --config secrets/config.json +python main.py discover --config secrets/config.json +python main.py read --config secrets/config.json --catalog integration_tests/configured_catalog.json +``` + +### Locally running the connector docker image + +#### Build +First, make sure you build the latest Docker image: +``` +docker build . -t airbyte/source-freshsales:dev +``` + +You can also build the connector image via Gradle: +``` +./gradlew :airbyte-integrations:connectors:source-freshsales:airbyteDocker +``` +When building via Gradle, the docker image name and tag, respectively, are the values of the `io.airbyte.name` and `io.airbyte.version` `LABEL`s in +the Dockerfile. + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/source-freshsales:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-freshsales:dev check --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-freshsales:dev discover --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/source-freshsales:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json +``` +## Testing +Make sure to familiarize yourself with [pytest test discovery](https://docs.pytest.org/en/latest/goodpractices.html#test-discovery) to know how your test files and methods should be named. +First install test dependencies into your virtual environment: +``` +pip install .[tests] +``` +### Unit Tests +To run unit tests locally, from the connector directory run: +``` +python -m pytest unit_tests +``` + +### Integration Tests +There are two types of integration tests: Acceptance Tests (Airbyte's test suite for all source connectors) and custom integration tests (which are specific to this connector). +#### Custom Integration tests +Place custom tests inside `integration_tests/` folder, then, from the connector root, run +``` +python -m pytest integration_tests +``` +#### Acceptance Tests +Customize `acceptance-test-config.yml` file to configure tests. See [Source Acceptance Tests](https://docs.airbyte.io/connector-development/testing-connectors/source-acceptance-tests-reference) for more information. +If your connector requires to create or destroy resources for use during acceptance tests create fixtures for it and place them inside integration_tests/acceptance.py. +To run your integration tests with acceptance tests, from the connector root, run +``` +python -m pytest integration_tests -p integration_tests.acceptance +``` +To run your integration tests with docker + +### Using gradle to run tests +All commands should be run from airbyte project root. +To run unit tests: +``` +./gradlew :airbyte-integrations:connectors:source-freshsales:unitTest +``` +To run acceptance and custom integration tests: +``` +./gradlew :airbyte-integrations:connectors:source-freshsales:integrationTest +``` + +## Dependency Management +All of your dependencies should go in `setup.py`, NOT `requirements.txt`. The requirements file is only used to connect internal Airbyte dependencies in the monorepo for local development. +We split dependencies between two groups, dependencies that are: +* required for your connector to work need to go to `MAIN_REQUIREMENTS` list. +* required for the testing need to go to `TEST_REQUIREMENTS` list + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing unit and integration tests. +1. Bump the connector version in `Dockerfile` -- just increment the value of the `LABEL io.airbyte.version` appropriately (we use [SemVer](https://semver.org/)). +1. Create a Pull Request. +1. Pat yourself on the back for being an awesome contributor. +1. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. diff --git a/airbyte-integrations/connectors/source-freshsales/acceptance-test-config.yml b/airbyte-integrations/connectors/source-freshsales/acceptance-test-config.yml new file mode 100644 index 0000000000000..dfd392cb6e673 --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/acceptance-test-config.yml @@ -0,0 +1,20 @@ +# See [Source Acceptance Tests](https://docs.airbyte.io/connector-development/testing-connectors/source-acceptance-tests-reference) +# for more information about how to configure these tests +connector_image: airbyte/source-freshsales:dev +tests: + spec: + - spec_path: "source_freshsales/spec.json" + connection: + - config_path: "secrets/config.json" + status: "succeed" + - config_path: "integration_tests/invalid_config.json" + status: "failed" + discovery: + - config_path: "secrets/config.json" + basic_read: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" + empty_streams: [] + full_refresh: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" diff --git a/airbyte-integrations/connectors/source-freshsales/acceptance-test-docker.sh b/airbyte-integrations/connectors/source-freshsales/acceptance-test-docker.sh new file mode 100644 index 0000000000000..e4d8b1cef8961 --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/acceptance-test-docker.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env sh + +# Build latest connector image +docker build . -t $(cat acceptance-test-config.yml | grep "connector_image" | head -n 1 | cut -d: -f2) + +# Pull latest acctest image +docker pull airbyte/source-acceptance-test:latest + +# Run +docker run --rm -it \ + -v /var/run/docker.sock:/var/run/docker.sock \ + -v /tmp:/tmp \ + -v $(pwd):/test_input \ + airbyte/source-acceptance-test \ + --acceptance-test-config /test_input + diff --git a/airbyte-integrations/connectors/source-freshsales/build.gradle b/airbyte-integrations/connectors/source-freshsales/build.gradle new file mode 100644 index 0000000000000..097ec61b07f86 --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/build.gradle @@ -0,0 +1,14 @@ +plugins { + id 'airbyte-python' + id 'airbyte-docker' + id 'airbyte-source-acceptance-test' +} + +airbytePython { + moduleDirectory 'source_freshsales' +} + +dependencies { + implementation files(project(':airbyte-integrations:bases:source-acceptance-test').airbyteDocker.outputs) + implementation files(project(':airbyte-integrations:bases:base-python').airbyteDocker.outputs) +} diff --git a/airbyte-integrations/connectors/source-freshsales/integration_tests/__init__.py b/airbyte-integrations/connectors/source-freshsales/integration_tests/__init__.py new file mode 100644 index 0000000000000..46b7376756ec6 --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/integration_tests/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# diff --git a/airbyte-integrations/connectors/source-freshsales/integration_tests/acceptance.py b/airbyte-integrations/connectors/source-freshsales/integration_tests/acceptance.py new file mode 100644 index 0000000000000..108075487440f --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/integration_tests/acceptance.py @@ -0,0 +1,14 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +import pytest + +pytest_plugins = ("source_acceptance_test.plugin",) + + +@pytest.fixture(scope="session", autouse=True) +def connector_setup(): + """ This fixture is a placeholder for external resources that acceptance test might require.""" + yield diff --git a/airbyte-integrations/connectors/source-freshsales/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/source-freshsales/integration_tests/configured_catalog.json new file mode 100644 index 0000000000000..470df870dfdbb --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/integration_tests/configured_catalog.json @@ -0,0 +1,94 @@ +{ + "streams": [ + { + "stream": { + "name": "contacts", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "accounts", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "open_deals", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "won_deals", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "lost_deals", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "open_tasks", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "completed_tasks", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "past_appointments", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "upcoming_appointments", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + } + ] +} diff --git a/airbyte-integrations/connectors/source-freshsales/integration_tests/integration_test.py b/airbyte-integrations/connectors/source-freshsales/integration_tests/integration_test.py new file mode 100644 index 0000000000000..2824cd4a16cdb --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/integration_tests/integration_test.py @@ -0,0 +1,8 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +def test_dummy_test(): + """ this is the dummy test to pass integration tests step """ + pass diff --git a/airbyte-integrations/connectors/source-freshsales/integration_tests/invalid_config.json b/airbyte-integrations/connectors/source-freshsales/integration_tests/invalid_config.json new file mode 100644 index 0000000000000..8b7f2e2e3e00d --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/integration_tests/invalid_config.json @@ -0,0 +1 @@ +{"domain_name": "", "api_key": "ghiklmn"} \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-freshsales/integration_tests/sample_config.json b/airbyte-integrations/connectors/source-freshsales/integration_tests/sample_config.json new file mode 100644 index 0000000000000..1e106e9658f8e --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/integration_tests/sample_config.json @@ -0,0 +1 @@ +{"domain_name": "testabc.myfreshworks.com", "api_key": "ghiklmn"} \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-freshsales/main.py b/airbyte-integrations/connectors/source-freshsales/main.py new file mode 100644 index 0000000000000..3f4d8a1f45c41 --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/main.py @@ -0,0 +1,13 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +import sys + +from airbyte_cdk.entrypoint import launch +from source_freshsales import SourceFreshsales + +if __name__ == "__main__": + source = SourceFreshsales() + launch(source, sys.argv[1:]) diff --git a/airbyte-integrations/connectors/source-freshsales/requirements.txt b/airbyte-integrations/connectors/source-freshsales/requirements.txt new file mode 100644 index 0000000000000..0411042aa0911 --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/requirements.txt @@ -0,0 +1,2 @@ +-e ../../bases/source-acceptance-test +-e . diff --git a/airbyte-integrations/connectors/source-freshsales/setup.py b/airbyte-integrations/connectors/source-freshsales/setup.py new file mode 100644 index 0000000000000..2539849d0c679 --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/setup.py @@ -0,0 +1,29 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +from setuptools import find_packages, setup + +MAIN_REQUIREMENTS = [ + "airbyte-cdk", +] + +TEST_REQUIREMENTS = [ + "pytest~=6.1", + "pytest-mock~=3.6.1", + "source-acceptance-test", +] + +setup( + name="source_freshsales", + description="Source implementation for Freshsales.", + author="Tuan Nguyen", + author_email="anhtuan.nguyen@me.com", + packages=find_packages(), + install_requires=MAIN_REQUIREMENTS, + package_data={"": ["*.json", "schemas/*.json", "schemas/shared/*.json"]}, + extras_require={ + "tests": TEST_REQUIREMENTS, + }, +) diff --git a/airbyte-integrations/connectors/source-freshsales/source_freshsales/__init__.py b/airbyte-integrations/connectors/source-freshsales/source_freshsales/__init__.py new file mode 100644 index 0000000000000..9061e6659822c --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/source_freshsales/__init__.py @@ -0,0 +1,8 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +from .source import SourceFreshsales + +__all__ = ["SourceFreshsales"] diff --git a/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/accounts.json b/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/accounts.json new file mode 100644 index 0000000000000..ec1193d483c39 --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/accounts.json @@ -0,0 +1,67 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "required": ["id"], + "properties": { + "id": { "type": ["null", "integer"] }, + "name": { "type": ["null", "string"] }, + "address": { "type": ["null", "string"] }, + "city": { "type": ["null", "string"] }, + "state": { "type": ["null", "string"] }, + "zipcode": { "type": ["null", "string"] }, + "country": { "type": ["null", "string"] }, + "industry_type_id": { "type": ["null", "integer"] }, + "business_type_id": { "type": ["null", "integer"] }, + "number_of_employees": { "type": ["null", "integer"] }, + "annual_revenue": { "type": ["null", "number"] }, + "website": { "type": ["null", "string"] }, + "phone": { "type": ["null", "string"] }, + "owner_id": { "type": ["null", "integer"] }, + "facebook": { "type": ["null", "string"] }, + "twitter": { "type": ["null", "string"] }, + "linkedin": { "type": ["null", "string"] }, + "territory_id": { "type": ["null", "integer"] }, + "created_at": { "type": ["null", "string"] }, + "updated_at": { "type": ["null", "string"] }, + "parent_sales_account_id": { "type": ["null", "integer"] }, + "first_name": { "type": ["null", "string"] }, + "last_name": { "type": ["null", "string"] }, + "display_name": { "type": ["null", "string"] }, + "avatar": { "type": ["null", "string"] }, + "job_title": { "type": ["null", "string"] }, + "email": { "type": ["null", "string"] }, + "emails": { "type": ["null", "array"] }, + "time_zone": { "type": ["null", "string"] }, + "work_number": { "type": ["null", "string"] }, + "mobile_number": { "type": ["null", "string"] }, + "last_seen": { "type": ["null", "string"] }, + "lead_score": { "type": ["null", "integer"] }, + "last_contacted": { "type": ["null", "string"] }, + "open_deals_amount": { "type": ["null", "number"] }, + "won_deals_amount": { "type": ["null", "number"] }, + "links": { "type": ["null", "object"] }, + "last_contacted_sales_activity_mode": { "type": ["null", "string"] }, + "custom_field": { "type": ["null", "object"] }, + "keyword": { "type": ["null", "string"] }, + "medium": { "type": ["null", "string"] }, + "last_contacted_mode": { "type": ["null", "string"] }, + "recent_note": { "type": ["null", "string"] }, + "won_deals_count": { "type": ["null", "integer"] }, + "last_contacted_via_sales_activity": { "type": ["null", "string"] }, + "completed_sales_sequences": { "type": ["null", "string"] }, + "active_sales_sequences": { "type": ["null", "string"] }, + "web_form_ids": { "type": ["null", "array"] }, + "open_deals_count": { "type": ["null", "integer"] }, + "last_assigned_at": { "type": ["null", "string"] }, + "tags": { "type": ["null", "array"] }, + "is_deleted": { "type": ["null", "boolean"] }, + "team_user_ids": { "type": ["null", "array"] }, + "external_id": { "type": ["null", "string"] }, + "work_email": { "type": ["null", "string"] }, + "subscription_status": { "type": ["null", "integer"] }, + "subscription_types": { "type": ["null", "string"] }, + "customer_fit": { "type": ["null", "string"] }, + "whatsapp_subscription_status": { "type": ["null", "string"] }, + "phone_numbers": { "type": ["null", "array"] } + } +} diff --git a/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/completed_tasks.json b/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/completed_tasks.json new file mode 100644 index 0000000000000..dfdcb18586e09 --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/completed_tasks.json @@ -0,0 +1,21 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "required": ["id"], + "properties": { + "id": { "type": ["null", "integer"] }, + "title": { "type": ["null", "string"] }, + "description": { "type": ["null", "string"] }, + "due_date": { "type": ["null", "string"] }, + "targetable_id": { "type": ["null", "integer"] }, + "targetable_type": { "type": ["null", "string"] }, + "Possible": { "type": ["null", "string"] }, + "owner_id": { "type": ["null", "integer"] }, + "status": { "type": ["null", "string"] }, + "creater_id": { "type": ["null", "integer"] }, + "created_at": { "type": ["null", "string"] }, + "updated_at": { "type": ["null", "string"] }, + "outcome_id": { "type": ["null", "integer"] }, + "task_type_id": { "type": ["null", "integer"] } + } +} diff --git a/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/contacts.json b/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/contacts.json new file mode 100644 index 0000000000000..cf32ac6abe1df --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/contacts.json @@ -0,0 +1,65 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "required": ["id"], + "properties": { + "id": { "type": ["null", "integer"] }, + "first_name": { "type": ["null", "string"] }, + "last_name": { "type": ["null", "string"] }, + "subscription_status": { "type": ["null", "string"] }, + "job_title": { "type": ["null", "string"] }, + "email": { "type": ["null", "string"] }, + "emails": { "type": ["null", "string"] }, + "work_number": { "type": ["null", "string"] }, + "external_id": { "type": ["null", "string"] }, + "mobile_number": { "type": ["null", "string"] }, + "address": { "type": ["null", "string"] }, + "city": { "type": ["null", "string"] }, + "state": { "type": ["null", "string"] }, + "zipcode": { "type": ["null", "string"] }, + "country": { "type": ["null", "string"] }, + "sales_accounts": { "type": ["null", "array"] }, + "territory_id": { "type": ["null", "integer"] }, + "lead_source_id": { "type": ["null", "integer"] }, + "owner_id": { "type": ["null", "integer"] }, + "subscription_types": { "type": ["null", "string"] }, + "medium": { "type": ["null", "string"] }, + "campaign_id": { "type": ["null", "integer"] }, + "keyword": { "type": ["null", "string"] }, + "time_zone": { "type": ["null", "string"] }, + "facebook": { "type": ["null", "string"] }, + "twitter": { "type": ["null", "string"] }, + "linkedin": { "type": ["null", "string"] }, + "created_at": { "type": ["null", "string"] }, + "updated_at": { "type": ["null", "string"] }, + "contact_status_id": { "type": ["null", "integer"] }, + "sales_account_id": { "type": ["null", "integer"] }, + "lifecycle_stage_id": { "type": ["null", "integer"] }, + "display_name": { "type": ["null", "string"] }, + "avatar": { "type": ["null", "string"] }, + "last_seen": { "type": ["null", "string"] }, + "lead_score": { "type": ["null", "integer"] }, + "last_contacted": { "type": ["null", "string"] }, + "open_deals_amount": { "type": ["null", "number"] }, + "won_deals_amount": { "type": ["null", "number"] }, + "links": { "type": ["null", "object"] }, + "last_contacted_sales_activity_mode": { "type": ["null", "string"] }, + "custom_field": { "type": ["null", "object"] }, + "last_contacted_mode": { "type": ["null", "string"] }, + "recent_note": { "type": ["null", "string"] }, + "won_deals_count": { "type": ["null", "integer"] }, + "last_contacted_via_sales_activity": { "type": ["null", "string"] }, + "completed_sales_sequences": { "type": ["null", "string"] }, + "active_sales_sequences": { "type": ["null", "string"] }, + "web_form_ids": { "type": ["null", "string"] }, + "open_deals_count": { "type": ["null", "integer"] }, + "last_assigned_at": { "type": ["null", "string"] }, + "tags": { "type": ["null", "array"] }, + "is_deleted": { "type": ["null", "boolean"] }, + "team_user_ids": { "type": ["null", "string"] }, + "work_email": { "type": ["null", "string"] }, + "customer_fit": { "type": ["null", "integer"] }, + "whatsapp_subscription_status": { "type": ["null", "integer"] }, + "phone_numbers": { "type": ["null", "array"] } + } +} diff --git a/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/lost_deals.json b/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/lost_deals.json new file mode 100644 index 0000000000000..625e5823b30db --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/lost_deals.json @@ -0,0 +1,52 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "required": ["id"], + "properties": { + "id": { "type": ["null", "integer"] }, + "name": { "type": ["null", "string"] }, + "amount": { "type": ["null", "number"] }, + "currency_id": { "type": ["null", "integer"] }, + "base_currency_amount": { "type": ["null", "number"] }, + "sales_account_id": { "type": ["null", "integer"] }, + "deal_stage_id": { "type": ["null", "integer"] }, + "deal_reason_id": { "type": ["null", "integer"] }, + "deal_type_id": { "type": ["null", "integer"] }, + "owner_id": { "type": ["null", "integer"] }, + "expected_close": { "type": ["null", "string"] }, + "closed_date": { "type": ["null", "string"] }, + "lead_source_id": { "type": ["null", "integer"] }, + "campaign_id": { "type": ["null", "integer"] }, + "deal_product_id": { "type": ["null", "integer"] }, + "deal_payment_status_id": { "type": ["null", "integer"] }, + "probability": { "type": ["null", "integer"] }, + "created_at": { "type": ["null", "string"] }, + "updated_at": { "type": ["null", "string"] }, + "territory_id": { "type": ["null", "integer"] }, + "deal_pipeline_id": { "type": "integer" }, + "stage_updated_time": { "type": ["null", "string"] }, + "custom_field": { "type": ["null", "object"] }, + "age": { "type": ["null", "integer"] }, + "links": { "type": ["null", "object"] }, + "recent_note": { "type": ["null", "string"] }, + "completed_sales_sequences": { "type": ["null", "string"] }, + "active_sales_sequences": { "type": ["null", "string"] }, + "web_form_id": { "type": ["null", "integer"] }, + "upcoming_activities_time": { "type": ["null", "string"] }, + "collaboration": { "type": ["null", "object"] }, + "last_assigned_at": { "type": ["null", "string"] }, + "tags": { "type": ["null", "array"] }, + "last_contacted_sales_activity_mode": { "type": ["null", "string"] }, + "last_contacted_via_sales_activity": { "type": ["null", "string"] }, + "expected_deal_value": { "type": ["null", "number"] }, + "is_deleted": { "type": ["null", "boolean"] }, + "team_user_ids": { "type": ["null", "string"] }, + "avatar": { "type": ["null", "string"] }, + "fc_widget_collaboration": { "type": ["null", "object"] }, + "forecast_category": { "type": ["null", "integer"] }, + "deal_prediction_last_updated_at": { "type": ["null", "string"] }, + "rotten_days": { "type": ["null", "integer"] }, + "has_products": { "type": ["null", "boolean"] }, + "products": { "type": ["null", "string"] } + } +} diff --git a/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/open_deals.json b/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/open_deals.json new file mode 100644 index 0000000000000..625e5823b30db --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/open_deals.json @@ -0,0 +1,52 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "required": ["id"], + "properties": { + "id": { "type": ["null", "integer"] }, + "name": { "type": ["null", "string"] }, + "amount": { "type": ["null", "number"] }, + "currency_id": { "type": ["null", "integer"] }, + "base_currency_amount": { "type": ["null", "number"] }, + "sales_account_id": { "type": ["null", "integer"] }, + "deal_stage_id": { "type": ["null", "integer"] }, + "deal_reason_id": { "type": ["null", "integer"] }, + "deal_type_id": { "type": ["null", "integer"] }, + "owner_id": { "type": ["null", "integer"] }, + "expected_close": { "type": ["null", "string"] }, + "closed_date": { "type": ["null", "string"] }, + "lead_source_id": { "type": ["null", "integer"] }, + "campaign_id": { "type": ["null", "integer"] }, + "deal_product_id": { "type": ["null", "integer"] }, + "deal_payment_status_id": { "type": ["null", "integer"] }, + "probability": { "type": ["null", "integer"] }, + "created_at": { "type": ["null", "string"] }, + "updated_at": { "type": ["null", "string"] }, + "territory_id": { "type": ["null", "integer"] }, + "deal_pipeline_id": { "type": "integer" }, + "stage_updated_time": { "type": ["null", "string"] }, + "custom_field": { "type": ["null", "object"] }, + "age": { "type": ["null", "integer"] }, + "links": { "type": ["null", "object"] }, + "recent_note": { "type": ["null", "string"] }, + "completed_sales_sequences": { "type": ["null", "string"] }, + "active_sales_sequences": { "type": ["null", "string"] }, + "web_form_id": { "type": ["null", "integer"] }, + "upcoming_activities_time": { "type": ["null", "string"] }, + "collaboration": { "type": ["null", "object"] }, + "last_assigned_at": { "type": ["null", "string"] }, + "tags": { "type": ["null", "array"] }, + "last_contacted_sales_activity_mode": { "type": ["null", "string"] }, + "last_contacted_via_sales_activity": { "type": ["null", "string"] }, + "expected_deal_value": { "type": ["null", "number"] }, + "is_deleted": { "type": ["null", "boolean"] }, + "team_user_ids": { "type": ["null", "string"] }, + "avatar": { "type": ["null", "string"] }, + "fc_widget_collaboration": { "type": ["null", "object"] }, + "forecast_category": { "type": ["null", "integer"] }, + "deal_prediction_last_updated_at": { "type": ["null", "string"] }, + "rotten_days": { "type": ["null", "integer"] }, + "has_products": { "type": ["null", "boolean"] }, + "products": { "type": ["null", "string"] } + } +} diff --git a/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/open_tasks.json b/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/open_tasks.json new file mode 100644 index 0000000000000..dfdcb18586e09 --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/open_tasks.json @@ -0,0 +1,21 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "required": ["id"], + "properties": { + "id": { "type": ["null", "integer"] }, + "title": { "type": ["null", "string"] }, + "description": { "type": ["null", "string"] }, + "due_date": { "type": ["null", "string"] }, + "targetable_id": { "type": ["null", "integer"] }, + "targetable_type": { "type": ["null", "string"] }, + "Possible": { "type": ["null", "string"] }, + "owner_id": { "type": ["null", "integer"] }, + "status": { "type": ["null", "string"] }, + "creater_id": { "type": ["null", "integer"] }, + "created_at": { "type": ["null", "string"] }, + "updated_at": { "type": ["null", "string"] }, + "outcome_id": { "type": ["null", "integer"] }, + "task_type_id": { "type": ["null", "integer"] } + } +} diff --git a/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/past_appointments.json b/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/past_appointments.json new file mode 100644 index 0000000000000..29117a8d9fdf4 --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/past_appointments.json @@ -0,0 +1,29 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "required": ["id"], + "properties": { + "id": { "type": ["null", "integer"] }, + "from_date": { "type": ["null", "string"] }, + "date": { "type": ["null", "string"] }, + "Start": { "type": ["null", "string"] }, + "end_date": { "type": ["null", "string"] }, + "End": { "type": ["null", "string"] }, + "time_zone": { "type": ["null", "string"] }, + "title": { "type": ["null", "string"] }, + "description": { "type": ["null", "string"] }, + "creater_id": { "type": ["null", "integer"] }, + "targetable_id": { "type": ["null", "integer"] }, + "targetable_type": { "type": ["null", "string"] }, + "Possible": { "type": ["null", "string"] }, + "location": { "type": ["null", "string"] }, + "created_at": { "type": ["null", "string"] }, + "updated_at": { "type": ["null", "string"] }, + "is_allday": { "type": ["null", "string"] }, + "appointment_attendees_attributes": { "type": ["null", "array"] }, + "outcome_id": { "type": ["null", "integer"] }, + "latitude": { "type": ["null", "string"] }, + "longitude": { "type": ["null", "string"] }, + "checkedin_at": { "type": ["null", "string"] } + } +} diff --git a/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/upcoming_appointments.json b/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/upcoming_appointments.json new file mode 100644 index 0000000000000..29117a8d9fdf4 --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/upcoming_appointments.json @@ -0,0 +1,29 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "required": ["id"], + "properties": { + "id": { "type": ["null", "integer"] }, + "from_date": { "type": ["null", "string"] }, + "date": { "type": ["null", "string"] }, + "Start": { "type": ["null", "string"] }, + "end_date": { "type": ["null", "string"] }, + "End": { "type": ["null", "string"] }, + "time_zone": { "type": ["null", "string"] }, + "title": { "type": ["null", "string"] }, + "description": { "type": ["null", "string"] }, + "creater_id": { "type": ["null", "integer"] }, + "targetable_id": { "type": ["null", "integer"] }, + "targetable_type": { "type": ["null", "string"] }, + "Possible": { "type": ["null", "string"] }, + "location": { "type": ["null", "string"] }, + "created_at": { "type": ["null", "string"] }, + "updated_at": { "type": ["null", "string"] }, + "is_allday": { "type": ["null", "string"] }, + "appointment_attendees_attributes": { "type": ["null", "array"] }, + "outcome_id": { "type": ["null", "integer"] }, + "latitude": { "type": ["null", "string"] }, + "longitude": { "type": ["null", "string"] }, + "checkedin_at": { "type": ["null", "string"] } + } +} diff --git a/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/won_deals.json b/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/won_deals.json new file mode 100644 index 0000000000000..625e5823b30db --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/won_deals.json @@ -0,0 +1,52 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "required": ["id"], + "properties": { + "id": { "type": ["null", "integer"] }, + "name": { "type": ["null", "string"] }, + "amount": { "type": ["null", "number"] }, + "currency_id": { "type": ["null", "integer"] }, + "base_currency_amount": { "type": ["null", "number"] }, + "sales_account_id": { "type": ["null", "integer"] }, + "deal_stage_id": { "type": ["null", "integer"] }, + "deal_reason_id": { "type": ["null", "integer"] }, + "deal_type_id": { "type": ["null", "integer"] }, + "owner_id": { "type": ["null", "integer"] }, + "expected_close": { "type": ["null", "string"] }, + "closed_date": { "type": ["null", "string"] }, + "lead_source_id": { "type": ["null", "integer"] }, + "campaign_id": { "type": ["null", "integer"] }, + "deal_product_id": { "type": ["null", "integer"] }, + "deal_payment_status_id": { "type": ["null", "integer"] }, + "probability": { "type": ["null", "integer"] }, + "created_at": { "type": ["null", "string"] }, + "updated_at": { "type": ["null", "string"] }, + "territory_id": { "type": ["null", "integer"] }, + "deal_pipeline_id": { "type": "integer" }, + "stage_updated_time": { "type": ["null", "string"] }, + "custom_field": { "type": ["null", "object"] }, + "age": { "type": ["null", "integer"] }, + "links": { "type": ["null", "object"] }, + "recent_note": { "type": ["null", "string"] }, + "completed_sales_sequences": { "type": ["null", "string"] }, + "active_sales_sequences": { "type": ["null", "string"] }, + "web_form_id": { "type": ["null", "integer"] }, + "upcoming_activities_time": { "type": ["null", "string"] }, + "collaboration": { "type": ["null", "object"] }, + "last_assigned_at": { "type": ["null", "string"] }, + "tags": { "type": ["null", "array"] }, + "last_contacted_sales_activity_mode": { "type": ["null", "string"] }, + "last_contacted_via_sales_activity": { "type": ["null", "string"] }, + "expected_deal_value": { "type": ["null", "number"] }, + "is_deleted": { "type": ["null", "boolean"] }, + "team_user_ids": { "type": ["null", "string"] }, + "avatar": { "type": ["null", "string"] }, + "fc_widget_collaboration": { "type": ["null", "object"] }, + "forecast_category": { "type": ["null", "integer"] }, + "deal_prediction_last_updated_at": { "type": ["null", "string"] }, + "rotten_days": { "type": ["null", "integer"] }, + "has_products": { "type": ["null", "boolean"] }, + "products": { "type": ["null", "string"] } + } +} diff --git a/airbyte-integrations/connectors/source-freshsales/source_freshsales/source.py b/airbyte-integrations/connectors/source-freshsales/source_freshsales/source.py new file mode 100644 index 0000000000000..974114398ffc6 --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/source_freshsales/source.py @@ -0,0 +1,246 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +from abc import ABC +from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple + +import requests +from airbyte_cdk.sources import AbstractSource +from airbyte_cdk.sources.streams import Stream +from airbyte_cdk.sources.streams.http import HttpStream +from airbyte_cdk.sources.streams.http.auth import TokenAuthenticator +from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer + + +# Basic full refresh stream +class FreshsalesStream(HttpStream, ABC): + url_base = "https://{}/crm/sales/api/" + primary_key = "id" + order_field = "updated_at" + transformer: TypeTransformer = TypeTransformer(TransformConfig.DefaultSchemaNormalization) + + def __init__(self, domain_name: str, **kwargs): + super().__init__(**kwargs) + self.url_base = self.url_base.format(domain_name) + self.domain_name = domain_name + self.page = 1 + + def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: + """ + There is no next page token in the respond so incrementing the page param until there is no new result + """ + list_result = response.json().get(self.object_name, []) + if list_result: + self.page += 1 + return self.page + else: + return None + + def request_params( + self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, any] = None, next_page_token: Mapping[str, Any] = None + ) -> MutableMapping[str, Any]: + params = {"page": self.page, "sort": self.order_field, "sort_type": "asc"} + return params + + def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: + json_response = response.json() + records = json_response.get(self.object_name, []) if self.object_name is not None else json_response + yield from records + + def _get_filters(self) -> List: + """ + Some streams require a filter_id to be passed in. This function gets all available filters. + """ + filters_url = f"https://{self.domain_name}/crm/sales/api/{self.object_name}/filters" + auth = self.authenticator.get_auth_header() + + try: + r = requests.get(filters_url, headers=auth) + r.raise_for_status() + return r.json().get("filters") + except requests.exceptions.RequestException as e: + raise e + + def get_view_id(self): + """ + This function iterate over all available filters and get the relevant filter_id. + """ + if hasattr(self, "filter_name"): + filters = self._get_filters() + return next(filter["id"] for filter in filters if filter["name"] == self.filter_name) + else: + return + + def path( + self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None + ) -> str: + view_id = self.get_view_id() + return f"{self.object_name}/view/{view_id}" + + +class Contacts(FreshsalesStream): + """ + API docs: https://developers.freshworks.com/crm/api/#contacts + """ + + object_name = "contacts" + filter_name = "All Contacts" + + +class Accounts(FreshsalesStream): + """ + API docs: https://developers.freshworks.com/crm/api/#accounts + """ + + object_name = "sales_accounts" + filter_name = "All Accounts" + + +class Deals(FreshsalesStream): + object_name = "deals" + + def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: + json_response = response.json() + records = json_response.get(self.object_name, []) if self.object_name is not None else json_response + # This is to remove data form widget development. Keeping this in failed integration tests. + for record in records: + record.pop("fc_widget_collaboration", None) + yield from records + + +class OpenDeals(Deals): + """ + API docs: https://developers.freshworks.com/crm/api/#deals + """ + + filter_name = "Open Deals" + + +class WonDeals(Deals): + """ + API docs: https://developers.freshworks.com/crm/api/#deals + """ + + filter_name = "Won Deals" + + +class LostDeals(Deals): + """ + API docs: https://developers.freshworks.com/crm/api/#deals + """ + + filter_name = "Lost Deals" + + +class OpenTasks(FreshsalesStream): + """ + API docs: https://developers.freshworks.com/crm/api/#tasks + """ + + object_name = "tasks" + filter_value = "open" + + def path( + self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None + ) -> str: + return f"{self.object_name}" + + def request_params( + self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, any] = None, next_page_token: Mapping[str, Any] = None + ) -> MutableMapping[str, Any]: + params = super().request_params(stream_state, stream_slice=stream_slice, next_page_token=next_page_token) + params["filter"] = self.filter_value + return params + + +class CompletedTasks(FreshsalesStream): + """ + API docs: https://developers.freshworks.com/crm/api/#tasks + """ + + object_name = "tasks" + filter_value = "completed" + + def path( + self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None + ) -> str: + return f"{self.object_name}" + + def request_params( + self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, any] = None, next_page_token: Mapping[str, Any] = None + ) -> MutableMapping[str, Any]: + params = super().request_params(stream_state, stream_slice=stream_slice, next_page_token=next_page_token) + params["filter"] = self.filter_value + return params + + +class PastAppointments(FreshsalesStream): + """ + API docs: https://developers.freshworks.com/crm/api/#appointments + """ + + object_name = "appointments" + filter_value = "past" + + def path( + self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None + ) -> str: + return f"{self.object_name}" + + def request_params( + self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, any] = None, next_page_token: Mapping[str, Any] = None + ) -> MutableMapping[str, Any]: + params = super().request_params(stream_state, stream_slice=stream_slice, next_page_token=next_page_token) + params["filter"] = self.filter_value + return params + + +class UpcomingAppointments(FreshsalesStream): + """ + API docs: https://developers.freshworks.com/crm/api/#appointments + """ + + object_name = "appointments" + filter_value = "upcoming" + + def path( + self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None + ) -> str: + return f"{self.object_name}" + + def request_params( + self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, any] = None, next_page_token: Mapping[str, Any] = None + ) -> MutableMapping[str, Any]: + params = super().request_params(stream_state, stream_slice=stream_slice, next_page_token=next_page_token) + params["filter"] = self.filter_value + return params + + +# Source +class SourceFreshsales(AbstractSource): + def check_connection(self, logger, config) -> Tuple[bool, any]: + auth = TokenAuthenticator(token=f'token={config["api_key"]}', auth_method="Token").get_auth_header() + url = f'https://{config["domain_name"]}/crm/sales/api/contacts/filters' + try: + session = requests.get(url, headers=auth) + session.raise_for_status() + return True, None + except requests.exceptions.RequestException as e: + return False, e + + def streams(self, config: Mapping[str, Any]) -> List[Stream]: + auth = TokenAuthenticator(token=f'token={config["api_key"]}', auth_method="Token") + args = {"authenticator": auth, "domain_name": config["domain_name"]} + return [ + Contacts(**args), + Accounts(**args), + OpenDeals(**args), + WonDeals(**args), + LostDeals(**args), + OpenTasks(**args), + CompletedTasks(**args), + PastAppointments(**args), + UpcomingAppointments(**args), + ] diff --git a/airbyte-integrations/connectors/source-freshsales/source_freshsales/spec.json b/airbyte-integrations/connectors/source-freshsales/source_freshsales/spec.json new file mode 100644 index 0000000000000..f4155198bc275 --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/source_freshsales/spec.json @@ -0,0 +1,22 @@ +{ + "documentationUrl": "https://docsurl.com", + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Freshsales Spec", + "type": "object", + "required": ["domain_name", "api_key"], + "additionalProperties": false, + "properties": { + "domain_name": { + "type": "string", + "description": "Freshsales domain", + "examples": ["mydomain.myfreshworks.com"] + }, + "api_key": { + "type": "string", + "description": "Your API Access Key. See here. The key is case sensitive.", + "airbyte_secret": true + } + } + } +} diff --git a/airbyte-integrations/connectors/source-freshsales/unit_tests/__init__.py b/airbyte-integrations/connectors/source-freshsales/unit_tests/__init__.py new file mode 100644 index 0000000000000..46b7376756ec6 --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/unit_tests/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# diff --git a/airbyte-integrations/connectors/source-freshsales/unit_tests/conftest.py b/airbyte-integrations/connectors/source-freshsales/unit_tests/conftest.py new file mode 100644 index 0000000000000..d03c2820311d0 --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/unit_tests/conftest.py @@ -0,0 +1,13 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + +import json + +import pytest + + +@pytest.fixture(scope="session", name="config") +def config_fixture(): + with open("secrets/config.json", "r") as config_file: + return json.load(config_file) diff --git a/airbyte-integrations/connectors/source-freshsales/unit_tests/test_source.py b/airbyte-integrations/connectors/source-freshsales/unit_tests/test_source.py new file mode 100644 index 0000000000000..132f3c417ad17 --- /dev/null +++ b/airbyte-integrations/connectors/source-freshsales/unit_tests/test_source.py @@ -0,0 +1,21 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + +from unittest.mock import MagicMock + +from source_freshsales.source import SourceFreshsales + + +def test_check_connection(mocker, config): + source = SourceFreshsales() + logger_mock = MagicMock() + assert source.check_connection(logger_mock, config) == (True, None) + + +def test_count_streams(mocker): + source = SourceFreshsales() + config_mock = MagicMock() + streams = source.streams(config_mock) + expected_streams_number = 9 + assert len(streams) == expected_streams_number From c9c38e404a21f55c95e4154f3aaea8408ee042c1 Mon Sep 17 00:00:00 2001 From: Collin Scangarella Date: Tue, 2 Nov 2021 20:44:10 -0700 Subject: [PATCH 18/83] fixed bug which crashes okta log incremental sync (#7584) * fixed bug which crashes okta log incremental sync * bump connector version * revert to pendulum --- .../1d4fdb25-64fc-4569-92da-fcdca79a8372.json | 2 +- .../resources/seed/source_definitions.yaml | 2 +- .../integration_tests/configured_catalog.json | 11 ++++++++ .../source-okta/source_okta/source.py | 26 +++++++++++++++++-- 4 files changed, 37 insertions(+), 4 deletions(-) diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/1d4fdb25-64fc-4569-92da-fcdca79a8372.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/1d4fdb25-64fc-4569-92da-fcdca79a8372.json index 46fde36888a3b..48997072d6514 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/1d4fdb25-64fc-4569-92da-fcdca79a8372.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/1d4fdb25-64fc-4569-92da-fcdca79a8372.json @@ -2,6 +2,6 @@ "sourceDefinitionId": "1d4fdb25-64fc-4569-92da-fcdca79a8372", "name": "Okta", "dockerRepository": "airbyte/source-okta", - "dockerImageTag": "0.1.2", + "dockerImageTag": "0.1.3", "documentationUrl": "https://docs.airbyte.io/integrations/sources/okta" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 11082ad6991b3..17d6acc05d129 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -349,7 +349,7 @@ - name: Okta sourceDefinitionId: 1d4fdb25-64fc-4569-92da-fcdca79a8372 dockerRepository: airbyte/source-okta - dockerImageTag: 0.1.2 + dockerImageTag: 0.1.3 documentationUrl: https://docs.airbyte.io/integrations/sources/okta sourceType: api - name: OneSignal diff --git a/airbyte-integrations/connectors/source-okta/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/source-okta/integration_tests/configured_catalog.json index a5fdac3293e30..7c58f625727a2 100644 --- a/airbyte-integrations/connectors/source-okta/integration_tests/configured_catalog.json +++ b/airbyte-integrations/connectors/source-okta/integration_tests/configured_catalog.json @@ -21,6 +21,17 @@ "destination_sync_mode": "overwrite", "cursor_field": ["lastUpdated"], "primary_key": [["id"]] + }, + { + "stream": { + "name": "logs", + "json_schema": {}, + "supported_sync_modes": ["full_refresh", "incremental"] + }, + "sync_mode": "incremental", + "destination_sync_mode": "overwrite", + "cursor_field": ["published"], + "primary_key": [["uuid"]] } ] } diff --git a/airbyte-integrations/connectors/source-okta/source_okta/source.py b/airbyte-integrations/connectors/source-okta/source_okta/source.py index 2bd410c9ae604..8f3a04ad4283a 100644 --- a/airbyte-integrations/connectors/source-okta/source_okta/source.py +++ b/airbyte-integrations/connectors/source-okta/source_okta/source.py @@ -4,6 +4,7 @@ from abc import ABC, abstractmethod +from datetime import datetime from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple from urllib import parse @@ -90,9 +91,14 @@ def get_updated_state(self, current_stream_state: MutableMapping[str, Any], late ) } - def request_params(self, stream_state=None, **kwargs): + def request_params( + self, + stream_state: Mapping[str, Any], + stream_slice: Mapping[str, any] = None, + next_page_token: Mapping[str, Any] = None, + ) -> MutableMapping[str, Any]: stream_state = stream_state or {} - params = super().request_params(stream_state=stream_state, **kwargs) + params = super().request_params(stream_state, stream_slice, next_page_token) latest_entry = stream_state.get(self.cursor_field) if latest_entry: params["filter"] = f'{self.cursor_field} gt "{latest_entry}"' @@ -114,6 +120,22 @@ class Logs(IncrementalOktaStream): def path(self, **kwargs) -> str: return "logs" + def request_params( + self, + stream_state: Mapping[str, Any], + stream_slice: Mapping[str, any] = None, + next_page_token: Mapping[str, Any] = None, + ) -> MutableMapping[str, Any]: + stream_state = stream_state or {} + params = { + "limit": self.page_size, + **(next_page_token or {}), + } + latest_entry = stream_state.get(self.cursor_field) + if latest_entry: + params["since"] = latest_entry + return params + class Users(IncrementalOktaStream): cursor_field = "lastUpdated" From 0e09ac3032771d1c8b00a4425bb72bfb1c79c72a Mon Sep 17 00:00:00 2001 From: Marcos Marx Date: Wed, 3 Nov 2021 01:06:38 -0300 Subject: [PATCH 19/83] Publish PR 6963: new source freshsales (#7585) * initial commit * finish implementing full_refresh * add other tables * cleaning up * add docs and use requests_native_auth * fix function return different number of values * change package author * fix schema * fix schema bug * linting * fix unit test * clean up * add null for schemas * remove fc_widget_collaboration col * change accpt test * add creds * run format * add config files * run format Co-authored-by: Tuan Nguyen --- .github/workflows/publish-command.yml | 1 + .github/workflows/test-command.yml | 1 + .../eca08d79-7b92-4065-b7f3-79c14836ebe7.json | 7 +++++ .../resources/seed/source_definitions.yaml | 6 +++++ ...ryptedOracleDestinationAcceptanceTest.java | 2 +- .../acceptance-test-config.yml | 2 +- .../integration_tests/invalid_config.json | 2 +- .../integration_tests/sample_config.json | 2 +- .../source_freshsales/schemas/accounts.json | 20 +++++++------- .../source_freshsales/schemas/contacts.json | 26 +++++++++---------- .../source-mixpanel/source_mixpanel/source.py | 1 - tools/bin/ci_credentials.sh | 1 + 12 files changed, 43 insertions(+), 28 deletions(-) create mode 100644 airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/eca08d79-7b92-4065-b7f3-79c14836ebe7.json diff --git a/.github/workflows/publish-command.yml b/.github/workflows/publish-command.yml index 40a166ff64418..56ade97c3d252 100644 --- a/.github/workflows/publish-command.yml +++ b/.github/workflows/publish-command.yml @@ -199,6 +199,7 @@ jobs: SOURCE_DELIGHTED_TEST_CREDS: ${{ secrets.SOURCE_DELIGHTED_TEST_CREDS }} SOURCE_RETENTLY_TEST_CREDS: ${{ secrets.SOURCE_RETENTLY_TEST_CREDS }} SOURCE_SENTRY_TEST_CREDS: ${{ secrets.SOURCE_SENTRY_TEST_CREDS }} + SOURCE_FRESHSALES_TEST_CREDS: ${{ secrets.SOURCE_FRESHSALES_TEST_CREDS }} - run: | echo "$SPEC_CACHE_SERVICE_ACCOUNT_KEY" > spec_cache_key_file.json && docker login -u airbytebot -p ${DOCKER_PASSWORD} ./tools/integrations/manage.sh publish airbyte-integrations/${{ github.event.inputs.connector }} ${{ github.event.inputs.run-tests }} --publish_spec_to_cache diff --git a/.github/workflows/test-command.yml b/.github/workflows/test-command.yml index 674d8657388c1..94e50a3f3ae41 100644 --- a/.github/workflows/test-command.yml +++ b/.github/workflows/test-command.yml @@ -194,6 +194,7 @@ jobs: SOURCE_DELIGHTED_TEST_CREDS: ${{ secrets.SOURCE_DELIGHTED_TEST_CREDS }} SOURCE_RETENTLY_TEST_CREDS: ${{ secrets.SOURCE_RETENTLY_TEST_CREDS }} SOURCE_SENTRY_TEST_CREDS: ${{ secrets.SOURCE_SENTRY_TEST_CREDS }} + SOURCE_FRESHSALES_TEST_CREDS: ${{ secrets.SOURCE_FRESHSALES_TEST_CREDS }} - run: | ./tools/bin/ci_integration_test.sh ${{ github.event.inputs.connector }} name: test ${{ github.event.inputs.connector }} diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/eca08d79-7b92-4065-b7f3-79c14836ebe7.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/eca08d79-7b92-4065-b7f3-79c14836ebe7.json new file mode 100644 index 0000000000000..d74256e9d2816 --- /dev/null +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/eca08d79-7b92-4065-b7f3-79c14836ebe7.json @@ -0,0 +1,7 @@ +{ + "sourceDefinitionId": "eca08d79-7b92-4065-b7f3-79c14836ebe7", + "name": "Freshsales", + "dockerRepository": "airbyte/source-freshsales", + "dockerImageTag": "0.1.0", + "documentationUrl": "https://docs.airbyte.io/integrations/sources/freshsales" +} diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 17d6acc05d129..324edc44f9b49 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -156,6 +156,12 @@ documentationUrl: https://docs.airbyte.io/integrations/sources/freshdesk icon: freshdesk.svg sourceType: api +- name: Freshsales + sourceDefinitionId: eca08d79-7b92-4065-b7f3-79c14836ebe7 + dockerRepository: airbyte/source-freshsales + dockerImageTag: 0.1.0 + documentationUrl: https://docs.airbyte.io/integrations/sources/freshsales + sourceType: api - name: Freshservice sourceDefinitionId: 9bb85338-ea95-4c93-b267-6be89125b267 dockerRepository: airbyte/source-freshservice diff --git a/airbyte-integrations/connectors/destination-oracle/src/test-integration/java/io/airbyte/integrations/destination/oracle/UnencryptedOracleDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-oracle/src/test-integration/java/io/airbyte/integrations/destination/oracle/UnencryptedOracleDestinationAcceptanceTest.java index 963e1277a2eec..8e57e31ef7ffc 100644 --- a/airbyte-integrations/connectors/destination-oracle/src/test-integration/java/io/airbyte/integrations/destination/oracle/UnencryptedOracleDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-oracle/src/test-integration/java/io/airbyte/integrations/destination/oracle/UnencryptedOracleDestinationAcceptanceTest.java @@ -13,8 +13,8 @@ import io.airbyte.commons.string.Strings; import io.airbyte.db.Database; import io.airbyte.db.Databases; -import io.airbyte.db.jdbc.JdbcUtils; import io.airbyte.db.jdbc.JdbcDatabase; +import io.airbyte.db.jdbc.JdbcUtils; import io.airbyte.integrations.destination.ExtendedNameTransformer; import io.airbyte.integrations.standardtest.destination.DestinationAcceptanceTest; import java.sql.SQLException; diff --git a/airbyte-integrations/connectors/source-freshsales/acceptance-test-config.yml b/airbyte-integrations/connectors/source-freshsales/acceptance-test-config.yml index dfd392cb6e673..d8678f3f24443 100644 --- a/airbyte-integrations/connectors/source-freshsales/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-freshsales/acceptance-test-config.yml @@ -14,7 +14,7 @@ tests: basic_read: - config_path: "secrets/config.json" configured_catalog_path: "integration_tests/configured_catalog.json" - empty_streams: [] + empty_streams: ["lost_deals", "won_deals"] full_refresh: - config_path: "secrets/config.json" configured_catalog_path: "integration_tests/configured_catalog.json" diff --git a/airbyte-integrations/connectors/source-freshsales/integration_tests/invalid_config.json b/airbyte-integrations/connectors/source-freshsales/integration_tests/invalid_config.json index 8b7f2e2e3e00d..31299549a84b8 100644 --- a/airbyte-integrations/connectors/source-freshsales/integration_tests/invalid_config.json +++ b/airbyte-integrations/connectors/source-freshsales/integration_tests/invalid_config.json @@ -1 +1 @@ -{"domain_name": "", "api_key": "ghiklmn"} \ No newline at end of file +{ "domain_name": "", "api_key": "ghiklmn" } diff --git a/airbyte-integrations/connectors/source-freshsales/integration_tests/sample_config.json b/airbyte-integrations/connectors/source-freshsales/integration_tests/sample_config.json index 1e106e9658f8e..690c924091122 100644 --- a/airbyte-integrations/connectors/source-freshsales/integration_tests/sample_config.json +++ b/airbyte-integrations/connectors/source-freshsales/integration_tests/sample_config.json @@ -1 +1 @@ -{"domain_name": "testabc.myfreshworks.com", "api_key": "ghiklmn"} \ No newline at end of file +{ "domain_name": "testabc.myfreshworks.com", "api_key": "ghiklmn" } diff --git a/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/accounts.json b/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/accounts.json index ec1193d483c39..d43ae83240c8d 100644 --- a/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/accounts.json +++ b/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/accounts.json @@ -24,21 +24,21 @@ "created_at": { "type": ["null", "string"] }, "updated_at": { "type": ["null", "string"] }, "parent_sales_account_id": { "type": ["null", "integer"] }, - "first_name": { "type": ["null", "string"] }, - "last_name": { "type": ["null", "string"] }, + "first_name": { "type": ["null", "string"] }, + "last_name": { "type": ["null", "string"] }, "display_name": { "type": ["null", "string"] }, - "avatar": { "type": ["null", "string"] }, - "job_title": { "type": ["null", "string"] }, - "email": { "type": ["null", "string"] }, - "emails": { "type": ["null", "array"] }, - "time_zone": { "type": ["null", "string"] }, - "work_number": { "type": ["null", "string"] }, + "avatar": { "type": ["null", "string"] }, + "job_title": { "type": ["null", "string"] }, + "email": { "type": ["null", "string"] }, + "emails": { "type": ["null", "array"] }, + "time_zone": { "type": ["null", "string"] }, + "work_number": { "type": ["null", "string"] }, "mobile_number": { "type": ["null", "string"] }, - "last_seen": { "type": ["null", "string"] }, + "last_seen": { "type": ["null", "string"] }, "lead_score": { "type": ["null", "integer"] }, "last_contacted": { "type": ["null", "string"] }, "open_deals_amount": { "type": ["null", "number"] }, - "won_deals_amount": { "type": ["null", "number"] }, + "won_deals_amount": { "type": ["null", "number"] }, "links": { "type": ["null", "object"] }, "last_contacted_sales_activity_mode": { "type": ["null", "string"] }, "custom_field": { "type": ["null", "object"] }, diff --git a/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/contacts.json b/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/contacts.json index cf32ac6abe1df..e7cc9ffc16273 100644 --- a/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/contacts.json +++ b/airbyte-integrations/connectors/source-freshsales/source_freshsales/schemas/contacts.json @@ -36,28 +36,28 @@ "sales_account_id": { "type": ["null", "integer"] }, "lifecycle_stage_id": { "type": ["null", "integer"] }, "display_name": { "type": ["null", "string"] }, - "avatar": { "type": ["null", "string"] }, - "last_seen": { "type": ["null", "string"] }, + "avatar": { "type": ["null", "string"] }, + "last_seen": { "type": ["null", "string"] }, "lead_score": { "type": ["null", "integer"] }, - "last_contacted": { "type": ["null", "string"] }, - "open_deals_amount": { "type": ["null", "number"] }, - "won_deals_amount": { "type": ["null", "number"] }, + "last_contacted": { "type": ["null", "string"] }, + "open_deals_amount": { "type": ["null", "number"] }, + "won_deals_amount": { "type": ["null", "number"] }, "links": { "type": ["null", "object"] }, "last_contacted_sales_activity_mode": { "type": ["null", "string"] }, - "custom_field": { "type": ["null", "object"] }, - "last_contacted_mode": { "type": ["null", "string"] }, - "recent_note": { "type": ["null", "string"] }, + "custom_field": { "type": ["null", "object"] }, + "last_contacted_mode": { "type": ["null", "string"] }, + "recent_note": { "type": ["null", "string"] }, "won_deals_count": { "type": ["null", "integer"] }, "last_contacted_via_sales_activity": { "type": ["null", "string"] }, - "completed_sales_sequences": { "type": ["null", "string"] }, - "active_sales_sequences": { "type": ["null", "string"] }, - "web_form_ids": { "type": ["null", "string"] }, + "completed_sales_sequences": { "type": ["null", "string"] }, + "active_sales_sequences": { "type": ["null", "string"] }, + "web_form_ids": { "type": ["null", "string"] }, "open_deals_count": { "type": ["null", "integer"] }, "last_assigned_at": { "type": ["null", "string"] }, "tags": { "type": ["null", "array"] }, - "is_deleted": { "type": ["null", "boolean"] }, + "is_deleted": { "type": ["null", "boolean"] }, "team_user_ids": { "type": ["null", "string"] }, - "work_email": { "type": ["null", "string"] }, + "work_email": { "type": ["null", "string"] }, "customer_fit": { "type": ["null", "integer"] }, "whatsapp_subscription_status": { "type": ["null", "integer"] }, "phone_numbers": { "type": ["null", "array"] } diff --git a/airbyte-integrations/connectors/source-mixpanel/source_mixpanel/source.py b/airbyte-integrations/connectors/source-mixpanel/source_mixpanel/source.py index 894301292adc7..7c3916d342cbf 100644 --- a/airbyte-integrations/connectors/source-mixpanel/source_mixpanel/source.py +++ b/airbyte-integrations/connectors/source-mixpanel/source_mixpanel/source.py @@ -729,7 +729,6 @@ def process_response(self, response: requests.Response, **kwargs) -> Iterable[Ma yield item - def get_json_schema(self) -> Mapping[str, Any]: """ :return: A dict of the JSON schema representing this stream. diff --git a/tools/bin/ci_credentials.sh b/tools/bin/ci_credentials.sh index 9b1dd92577a4f..03e35719baae4 100755 --- a/tools/bin/ci_credentials.sh +++ b/tools/bin/ci_credentials.sh @@ -74,6 +74,7 @@ write_standard_creds source-file "$AZURE_STORAGE_INTEGRATION_TEST_CREDS" "azblob write_standard_creds source-file "$FILE_SECURE_HTTPS_TEST_CREDS" write_standard_creds source-file-secure "$FILE_SECURE_HTTPS_TEST_CREDS" write_standard_creds source-freshdesk "$FRESHDESK_TEST_CREDS" +write_standard_creds source-freshsales "$SOURCE_FRESHSALES_TEST_CREDS" write_standard_creds source-freshservice "$SOURCE_FRESHSERVICE_TEST_CREDS" write_standard_creds source-facebook-marketing "$FACEBOOK_MARKETING_TEST_INTEGRATION_CREDS" write_standard_creds source-facebook-pages "$FACEBOOK_PAGES_INTEGRATION_TEST_CREDS" From dd86fa7178503597f9af937f88be0fc4224829c3 Mon Sep 17 00:00:00 2001 From: Marcos Marx Date: Wed, 3 Nov 2021 01:36:46 -0300 Subject: [PATCH 20/83] Test PR 7584: Okta source incremental streams (#7586) * fixed bug which crashes okta log incremental sync * bump connector version * update acceptance test * rollback pendulum function * add comment * bump version * run format Co-authored-by: collin --- .../1d4fdb25-64fc-4569-92da-fcdca79a8372.json | 2 +- .../init/src/main/resources/seed/source_definitions.yaml | 2 +- airbyte-integrations/connectors/source-okta/Dockerfile | 2 +- .../connectors/source-okta/acceptance-test-config.yml | 1 + .../connectors/source-okta/source_okta/source.py | 4 +++- docs/integrations/sources/okta.md | 1 + 6 files changed, 8 insertions(+), 4 deletions(-) diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/1d4fdb25-64fc-4569-92da-fcdca79a8372.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/1d4fdb25-64fc-4569-92da-fcdca79a8372.json index 48997072d6514..34f0a63393b41 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/1d4fdb25-64fc-4569-92da-fcdca79a8372.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/1d4fdb25-64fc-4569-92da-fcdca79a8372.json @@ -2,6 +2,6 @@ "sourceDefinitionId": "1d4fdb25-64fc-4569-92da-fcdca79a8372", "name": "Okta", "dockerRepository": "airbyte/source-okta", - "dockerImageTag": "0.1.3", + "dockerImageTag": "0.1.4", "documentationUrl": "https://docs.airbyte.io/integrations/sources/okta" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 324edc44f9b49..3a715c5dcd57a 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -355,7 +355,7 @@ - name: Okta sourceDefinitionId: 1d4fdb25-64fc-4569-92da-fcdca79a8372 dockerRepository: airbyte/source-okta - dockerImageTag: 0.1.3 + dockerImageTag: 0.1.4 documentationUrl: https://docs.airbyte.io/integrations/sources/okta sourceType: api - name: OneSignal diff --git a/airbyte-integrations/connectors/source-okta/Dockerfile b/airbyte-integrations/connectors/source-okta/Dockerfile index 5dc6182789386..1c554912b8906 100644 --- a/airbyte-integrations/connectors/source-okta/Dockerfile +++ b/airbyte-integrations/connectors/source-okta/Dockerfile @@ -12,5 +12,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.3 +LABEL io.airbyte.version=0.1.4 LABEL io.airbyte.name=airbyte/source-okta diff --git a/airbyte-integrations/connectors/source-okta/acceptance-test-config.yml b/airbyte-integrations/connectors/source-okta/acceptance-test-config.yml index 16df00be02454..1e60fe03ef3a0 100644 --- a/airbyte-integrations/connectors/source-okta/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-okta/acceptance-test-config.yml @@ -12,6 +12,7 @@ tests: basic_read: - config_path: "secrets/config.json" configured_catalog_path: "integration_tests/configured_catalog.json" + empty_streams: ["logs"] full_refresh: - config_path: "secrets/config.json" configured_catalog_path: "integration_tests/configured_catalog.json" diff --git a/airbyte-integrations/connectors/source-okta/source_okta/source.py b/airbyte-integrations/connectors/source-okta/source_okta/source.py index 8f3a04ad4283a..c39afa0eac49f 100644 --- a/airbyte-integrations/connectors/source-okta/source_okta/source.py +++ b/airbyte-integrations/connectors/source-okta/source_okta/source.py @@ -4,7 +4,6 @@ from abc import ABC, abstractmethod -from datetime import datetime from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple from urllib import parse @@ -114,6 +113,7 @@ def path(self, **kwargs) -> str: class Logs(IncrementalOktaStream): + cursor_field = "published" primary_key = "uuid" @@ -126,6 +126,8 @@ def request_params( stream_slice: Mapping[str, any] = None, next_page_token: Mapping[str, Any] = None, ) -> MutableMapping[str, Any]: + # The log stream use a different params to get data + # https://developer.okta.com/docs/reference/api/system-log/#datetime-filter stream_state = stream_state or {} params = { "limit": self.page_size, diff --git a/docs/integrations/sources/okta.md b/docs/integrations/sources/okta.md index c459e0b77159a..99e07056785b6 100644 --- a/docs/integrations/sources/okta.md +++ b/docs/integrations/sources/okta.md @@ -59,6 +59,7 @@ Different Okta APIs require different admin privilege levels. API tokens inherit | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.1.4 | 2021-11-02 | [7584](https://github.com/airbytehq/airbyte/pull/7584) | Fix incremental params for log stream | | 0.1.3 | 2021-09-08 | [5905](https://github.com/airbytehq/airbyte/pull/5905) | Fix incremental stream defect | | 0.1.2 | 2021-07-01 | [4456](https://github.com/airbytehq/airbyte/pull/4456) | Bugfix infinite pagination in logs stream | | 0.1.1 | 2021-06-09 | [3937](https://github.com/airbytehq/airbyte/pull/3973) | Add `AIRBYTE_ENTRYPOINT` env variable for kubernetes support | From a534bb2a8f29b20e3cc7c52fef1bc3c34783695d Mon Sep 17 00:00:00 2001 From: Lake Mossman Date: Tue, 2 Nov 2021 22:03:50 -0700 Subject: [PATCH 21/83] Generate seed connector specs on build (#7501) * add specs module with logic to fetch specs on build * format + build and add gradle dependency for new script * check seed file for existing specs + refactor * add tests + a bit more refactoring * run gw format * update yaml config persistence to merge specs into definitions * add comment * delete secrets migration to be consistent with master * add dep * add tests for GcsBucketSpecFetcher * get rid of static block + format * DRY up parse call * add GCS details to comment * formatting + fix test * update comment * do not format seed specs files * change signature of run to allow cloud to reuse this script * run gw format * revert commits that change signature of run * fix comment typo Co-authored-by: Davin Chia * rename enum to be distinct from the enum in cloud * add missing dependencies between modules * add readme for seed connector spec generator * reword * reference readme in comment * ignore 'spec' field in newFields logic Co-authored-by: Davin Chia --- .../java/io/airbyte/config/init/SeedType.java | 4 +- .../resources/seed/destination_specs.yaml | 2752 ++++++++ .../src/main/resources/seed/source_specs.yaml | 5836 +++++++++++++++++ airbyte-config/models/build.gradle | 3 +- .../main/resources/types/DockerImageSpec.yaml | 16 + .../DatabaseConfigPersistence.java | 9 +- .../YamlSeedConfigPersistence.java | 40 +- .../YamlSeedConfigPersistenceTest.java | 9 +- airbyte-config/specs/README.md | 16 + airbyte-config/specs/build.gradle | 24 + .../config/specs/GcsBucketSpecFetcher.java | 70 + .../specs/SeedConnectorSpecGenerator.java | 127 + .../config/specs/SeedConnectorType.java | 33 + .../specs/GcsBucketSpecFetcherTest.java | 79 + .../specs/SeedConnectorSpecGeneratorTest.java | 154 + airbyte-json-validation/build.gradle | 2 + airbyte-protocol/models/build.gradle | 2 + airbyte-scheduler/client/build.gradle | 1 + .../BucketSpecCacheSchedulerClient.java | 58 +- .../BucketSpecCacheSchedulerClientTest.java | 14 +- airbyte-server/build.gradle | 1 + build.gradle | 3 +- settings.gradle | 1 + 23 files changed, 9185 insertions(+), 69 deletions(-) create mode 100644 airbyte-config/init/src/main/resources/seed/destination_specs.yaml create mode 100644 airbyte-config/init/src/main/resources/seed/source_specs.yaml create mode 100644 airbyte-config/models/src/main/resources/types/DockerImageSpec.yaml create mode 100644 airbyte-config/specs/README.md create mode 100644 airbyte-config/specs/build.gradle create mode 100644 airbyte-config/specs/src/main/java/io/airbyte/config/specs/GcsBucketSpecFetcher.java create mode 100644 airbyte-config/specs/src/main/java/io/airbyte/config/specs/SeedConnectorSpecGenerator.java create mode 100644 airbyte-config/specs/src/main/java/io/airbyte/config/specs/SeedConnectorType.java create mode 100644 airbyte-config/specs/src/test/java/io/airbyte/config/specs/GcsBucketSpecFetcherTest.java create mode 100644 airbyte-config/specs/src/test/java/io/airbyte/config/specs/SeedConnectorSpecGeneratorTest.java diff --git a/airbyte-config/init/src/main/java/io/airbyte/config/init/SeedType.java b/airbyte-config/init/src/main/java/io/airbyte/config/init/SeedType.java index 47c4c419bcf10..3730369621090 100644 --- a/airbyte-config/init/src/main/java/io/airbyte/config/init/SeedType.java +++ b/airbyte-config/init/src/main/java/io/airbyte/config/init/SeedType.java @@ -7,7 +7,9 @@ public enum SeedType { STANDARD_SOURCE_DEFINITION("/seed/source_definitions.yaml", "sourceDefinitionId"), - STANDARD_DESTINATION_DEFINITION("/seed/destination_definitions.yaml", "destinationDefinitionId"); + STANDARD_DESTINATION_DEFINITION("/seed/destination_definitions.yaml", "destinationDefinitionId"), + SOURCE_SPEC("/seed/source_specs.yaml", "dockerImage"), + DESTINATION_SPEC("/seed/destination_specs.yaml", "dockerImage"); final String resourcePath; // ID field name diff --git a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml new file mode 100644 index 0000000000000..a248b4eff240c --- /dev/null +++ b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml @@ -0,0 +1,2752 @@ +# This file is generated by io.airbyte.config.specs.SeedConnectorSpecGenerator. +# Do NOT edit this file directly. See generator class for more details. +--- +- dockerImage: "airbyte/destination-azure-blob-storage:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/azureblobstorage" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "AzureBlobStorage Destination Spec" + type: "object" + required: + - "azure_blob_storage_account_name" + - "azure_blob_storage_account_key" + - "format" + additionalProperties: false + properties: + azure_blob_storage_endpoint_domain_name: + title: "Endpoint Domain Name" + type: "string" + default: "blob.core.windows.net" + description: "This is Azure Blob Storage endpoint domain name. Leave default\ + \ value (or leave it empty if run container from command line) to use\ + \ Microsoft native from example." + examples: + - "blob.core.windows.net" + azure_blob_storage_container_name: + title: "Azure blob storage container (Bucket) Name" + type: "string" + description: "The name of the Azure blob storage container. If not exists\ + \ - will be created automatically. May be empty, then will be created\ + \ automatically airbytecontainer+timestamp" + examples: + - "airbytetescontainername" + azure_blob_storage_account_name: + title: "Azure Blob Storage account name" + type: "string" + description: "The account's name of the Azure Blob Storage." + examples: + - "airbyte5storage" + azure_blob_storage_account_key: + description: "The Azure blob storage account key." + airbyte_secret: true + type: "string" + examples: + - "Z8ZkZpteggFx394vm+PJHnGTvdRncaYS+JhLKdj789YNmD+iyGTnG+PV+POiuYNhBg/ACS+LKjd%4FG3FHGN12Nd==" + format: + title: "Output Format" + type: "object" + description: "Output data format" + oneOf: + - title: "CSV: Comma-Separated Values" + required: + - "format_type" + - "flattening" + properties: + format_type: + type: "string" + const: "CSV" + flattening: + type: "string" + title: "Normalization (Flattening)" + description: "Whether the input json data should be normalized (flattened)\ + \ in the output CSV. Please refer to docs for details." + default: "No flattening" + enum: + - "No flattening" + - "Root level flattening" + - title: "JSON Lines: newline-delimited JSON" + required: + - "format_type" + properties: + format_type: + type: "string" + const: "JSONL" + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "overwrite" + - "append" +- dockerImage: "airbyte/destination-bigquery:0.5.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/bigquery" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "BigQuery Destination Spec" + type: "object" + required: + - "project_id" + - "dataset_id" + additionalProperties: true + properties: + big_query_client_buffer_size_mb: + title: "Google BigQuery client chunk size" + description: "Google BigQuery client's chunk(buffer) size (MIN=1, MAX =\ + \ 15) for each table. The default 15MiB value is used if not set explicitly.\ + \ It's recommended to decrease value for big data sets migration for less\ + \ HEAP memory consumption and avoiding crashes. For more details refer\ + \ to https://googleapis.dev/python/bigquery/latest/generated/google.cloud.bigquery.client.Client.html" + type: "integer" + minimum: 1 + maximum: 15 + default: 15 + examples: + - "15" + project_id: + type: "string" + description: "The GCP project ID for the project containing the target BigQuery\ + \ dataset." + title: "Project ID" + dataset_id: + type: "string" + description: "Default BigQuery Dataset ID tables are replicated to if the\ + \ source does not specify a namespace." + title: "Default Dataset ID" + dataset_location: + type: "string" + description: "The location of the dataset. Warning: Changes made after creation\ + \ will not be applied." + title: "Dataset Location" + default: "US" + enum: + - "US" + - "EU" + - "asia-east1" + - "asia-east2" + - "asia-northeast1" + - "asia-northeast2" + - "asia-northeast3" + - "asia-south1" + - "asia-southeast1" + - "asia-southeast2" + - "australia-southeast1" + - "europe-central1" + - "europe-central2" + - "europe-north1" + - "europe-west1" + - "europe-west2" + - "europe-west3" + - "europe-west4" + - "europe-west5" + - "europe-west6" + - "northamerica-northeast1" + - "southamerica-east1" + - "us-central1" + - "us-east1" + - "us-east4" + - "us-west-1" + - "us-west-2" + - "us-west-3" + - "us-west-4" + credentials_json: + type: "string" + description: "The contents of the JSON service account key. Check out the\ + \ docs if you need help generating this key. Default credentials will\ + \ be used if this field is left empty." + title: "Credentials JSON" + airbyte_secret: true + transformation_priority: + type: "string" + description: "When running custom transformations or Basic normalization,\ + \ running queries on interactive mode can hit BQ limits, choosing batch\ + \ will solve those limitss." + title: "Transformation Query Run Type" + default: "interactive" + enum: + - "interactive" + - "batch" + loading_method: + type: "object" + title: "Loading Method" + description: "Loading method used to send select the way data will be uploaded\ + \ to BigQuery." + oneOf: + - title: "Standard Inserts" + additionalProperties: false + description: "Direct uploading using streams." + required: + - "method" + properties: + method: + type: "string" + const: "Standard" + - title: "GCS Staging" + additionalProperties: false + description: "Writes large batches of records to a file, uploads the file\ + \ to GCS, then uses
COPY INTO table
to upload the file. Recommended\ + \ for large production workloads for better speed and scalability." + required: + - "method" + - "gcs_bucket_name" + - "gcs_bucket_path" + - "credential" + properties: + method: + type: "string" + const: "GCS Staging" + gcs_bucket_name: + title: "GCS Bucket Name" + type: "string" + description: "The name of the GCS bucket." + examples: + - "airbyte_sync" + gcs_bucket_path: + description: "Directory under the GCS bucket where data will be written." + type: "string" + examples: + - "data_sync/test" + keep_files_in_gcs-bucket: + type: "string" + description: "This upload method is supposed to temporary store records\ + \ in GCS bucket. What do you want to do with data in GCS bucket\ + \ when migration has finished?" + title: "GCS tmp files afterward processing" + default: "Delete all tmp files from GCS" + enum: + - "Delete all tmp files from GCS" + - "Keep all tmp files in GCS" + credential: + title: "Credential" + type: "object" + oneOf: + - title: "HMAC key" + required: + - "credential_type" + - "hmac_key_access_id" + - "hmac_key_secret" + properties: + credential_type: + type: "string" + const: "HMAC_KEY" + hmac_key_access_id: + type: "string" + description: "HMAC key access ID. When linked to a service account,\ + \ this ID is 61 characters long; when linked to a user account,\ + \ it is 24 characters long." + title: "HMAC Key Access ID" + airbyte_secret: true + examples: + - "1234567890abcdefghij1234" + hmac_key_secret: + type: "string" + description: "The corresponding secret for the access ID. It\ + \ is a 40-character base-64 encoded string." + title: "HMAC Key Secret" + airbyte_secret: true + examples: + - "1234567890abcdefghij1234567890ABCDEFGHIJ" + supportsIncremental: true + supportsNormalization: true + supportsDBT: true + supported_destination_sync_modes: + - "overwrite" + - "append" + - "append_dedup" +- dockerImage: "airbyte/destination-bigquery-denormalized:0.1.7" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/bigquery" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "BigQuery Denormalized Typed Struct Destination Spec" + type: "object" + required: + - "project_id" + - "dataset_id" + additionalProperties: true + properties: + project_id: + type: "string" + description: "The GCP project ID for the project containing the target BigQuery\ + \ dataset." + title: "Project ID" + dataset_id: + type: "string" + description: "Default BigQuery Dataset ID tables are replicated to if the\ + \ source does not specify a namespace." + title: "Default Dataset ID" + dataset_location: + type: "string" + description: "The location of the dataset. Warning: Changes made after creation\ + \ will not be applied." + title: "Dataset Location" + default: "US" + enum: + - "US" + - "EU" + - "asia-east1" + - "asia-east2" + - "asia-northeast1" + - "asia-northeast2" + - "asia-northeast3" + - "asia-south1" + - "asia-southeast1" + - "asia-southeast2" + - "australia-southeast1" + - "europe-central1" + - "europe-central2" + - "europe-north1" + - "europe-west1" + - "europe-west2" + - "europe-west3" + - "europe-west4" + - "europe-west5" + - "europe-west6" + - "northamerica-northeast1" + - "southamerica-east1" + - "us-central1" + - "us-east1" + - "us-east4" + - "us-west-1" + - "us-west-2" + - "us-west-3" + - "us-west-4" + credentials_json: + type: "string" + description: "The contents of the JSON service account key. Check out the\ + \ docs if you need help generating this key. Default credentials will\ + \ be used if this field is left empty." + title: "Credentials JSON" + airbyte_secret: true + supportsIncremental: true + supportsNormalization: false + supportsDBT: true + supported_destination_sync_modes: + - "overwrite" + - "append" +- dockerImage: "airbyte/destination-keen:0.2.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/keen" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Keen Spec" + type: "object" + required: + - "project_id" + - "api_key" + additionalProperties: false + properties: + project_id: + description: "Keen Project ID" + type: "string" + examples: + - "58b4acc22ba938934e888322e" + api_key: + title: "API Key" + description: "Keen Master API key" + type: "string" + examples: + - "ABCDEFGHIJKLMNOPRSTUWXYZ" + airbyte_secret: true + infer_timestamp: + title: "Infer Timestamp" + description: "Allow connector to guess keen.timestamp value based on the\ + \ streamed data" + type: "boolean" + default: true + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "overwrite" + - "append" +- dockerImage: "airbyte/destination-dynamodb:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/dynamodb" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "DynamoDB Destination Spec" + type: "object" + required: + - "dynamodb_table_name" + - "dynamodb_region" + - "access_key_id" + - "secret_access_key" + additionalProperties: false + properties: + dynamodb_endpoint: + title: "Endpoint" + type: "string" + default: "" + description: "This is your DynamoDB endpoint url.(if you are working with\ + \ AWS DynamoDB, just leave empty)." + examples: + - "http://localhost:9000" + dynamodb_table_name: + title: "DynamoDB Table Name" + type: "string" + description: "The name of the DynamoDB table." + examples: + - "airbyte_sync" + dynamodb_region: + title: "DynamoDB Region" + type: "string" + default: "" + description: "The region of the DynamoDB." + enum: + - "" + - "us-east-1" + - "us-east-2" + - "us-west-1" + - "us-west-2" + - "af-south-1" + - "ap-east-1" + - "ap-south-1" + - "ap-northeast-1" + - "ap-northeast-2" + - "ap-northeast-3" + - "ap-southeast-1" + - "ap-southeast-2" + - "ca-central-1" + - "cn-north-1" + - "cn-northwest-1" + - "eu-central-1" + - "eu-north-1" + - "eu-south-1" + - "eu-west-1" + - "eu-west-2" + - "eu-west-3" + - "sa-east-1" + - "me-south-1" + - "us-gov-east-1" + - "us-gov-west-1" + access_key_id: + type: "string" + description: "The access key id to access the DynamoDB. Airbyte requires\ + \ Read and Write permissions to the DynamoDB." + title: "DynamoDB Key Id" + airbyte_secret: true + examples: + - "A012345678910EXAMPLE" + secret_access_key: + type: "string" + description: "The corresponding secret to the access key id." + title: "DynamoDB Access Key" + airbyte_secret: true + examples: + - "a012345678910ABCDEFGH/AbCdEfGhEXAMPLEKEY" + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "overwrite" + - "append" +- dockerImage: "airbyte/destination-gcs:0.1.2" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/gcs" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "GCS Destination Spec" + type: "object" + required: + - "gcs_bucket_name" + - "gcs_bucket_path" + - "gcs_bucket_region" + - "credential" + - "format" + additionalProperties: false + properties: + gcs_bucket_name: + title: "GCS Bucket Name" + type: "string" + description: "The name of the GCS bucket." + examples: + - "airbyte_sync" + gcs_bucket_path: + description: "Directory under the GCS bucket where data will be written." + type: "string" + examples: + - "data_sync/test" + gcs_bucket_region: + title: "GCS Bucket Region" + type: "string" + default: "" + description: "The region of the GCS bucket." + enum: + - "" + - "-- North America --" + - "northamerica-northeast1" + - "us-central1" + - "us-east1" + - "us-east4" + - "us-west1" + - "us-west2" + - "us-west3" + - "us-west4" + - "-- South America --" + - "southamerica-east1" + - "-- Europe --" + - "europe-central2" + - "europe-north1" + - "europe-west1" + - "europe-west2" + - "europe-west3" + - "europe-west4" + - "europe-west6" + - "-- Asia --" + - "asia-east1" + - "asia-east2" + - "asia-northeast1" + - "asia-northeast2" + - "asia-northeast3" + - "asia-south1" + - "asia-south2" + - "asia-southeast1" + - "asia-southeast2" + - "-- Australia --" + - "australia-southeast1" + - "australia-southeast2" + - "-- Multi-regions --" + - "asia" + - "eu" + - "us" + - "-- Dual-regions --" + - "asia1" + - "eur4" + - "nam4" + credential: + title: "Credential" + type: "object" + oneOf: + - title: "HMAC key" + required: + - "credential_type" + - "hmac_key_access_id" + - "hmac_key_secret" + properties: + credential_type: + type: "string" + enum: + - "HMAC_KEY" + default: "HMAC_KEY" + hmac_key_access_id: + type: "string" + description: "HMAC key access ID. When linked to a service account,\ + \ this ID is 61 characters long; when linked to a user account,\ + \ it is 24 characters long." + title: "HMAC Key Access ID" + airbyte_secret: true + examples: + - "1234567890abcdefghij1234" + hmac_key_secret: + type: "string" + description: "The corresponding secret for the access ID. It is a\ + \ 40-character base-64 encoded string." + title: "HMAC Key Secret" + airbyte_secret: true + examples: + - "1234567890abcdefghij1234567890ABCDEFGHIJ" + format: + title: "Output Format" + type: "object" + description: "Output data format" + oneOf: + - title: "Avro: Apache Avro" + required: + - "format_type" + - "compression_codec" + properties: + format_type: + type: "string" + enum: + - "Avro" + default: "Avro" + compression_codec: + title: "Compression Codec" + description: "The compression algorithm used to compress data. Default\ + \ to no compression." + type: "object" + oneOf: + - title: "no compression" + required: + - "codec" + properties: + codec: + type: "string" + enum: + - "no compression" + default: "no compression" + - title: "Deflate" + required: + - "codec" + - "compression_level" + properties: + codec: + type: "string" + enum: + - "Deflate" + default: "Deflate" + compression_level: + title: "Deflate level" + description: "0: no compression & fastest, 9: best compression\ + \ & slowest." + type: "integer" + default: 0 + minimum: 0 + maximum: 9 + - title: "bzip2" + required: + - "codec" + properties: + codec: + type: "string" + enum: + - "bzip2" + default: "bzip2" + - title: "xz" + required: + - "codec" + - "compression_level" + properties: + codec: + type: "string" + enum: + - "xz" + default: "xz" + compression_level: + title: "Compression level" + description: "See here for details." + type: "integer" + default: 6 + minimum: 0 + maximum: 9 + - title: "zstandard" + required: + - "codec" + - "compression_level" + properties: + codec: + type: "string" + enum: + - "zstandard" + default: "zstandard" + compression_level: + title: "Compression level" + description: "Negative levels are 'fast' modes akin to lz4 or\ + \ snappy, levels above 9 are generally for archival purposes,\ + \ and levels above 18 use a lot of memory." + type: "integer" + default: 3 + minimum: -5 + maximum: 22 + include_checksum: + title: "Include checksum" + description: "If true, include a checksum with each data block." + type: "boolean" + default: false + - title: "snappy" + required: + - "codec" + properties: + codec: + type: "string" + enum: + - "snappy" + default: "snappy" + part_size_mb: + title: "Block Size (MB) for GCS multipart upload" + description: "This is the size of a \"Part\" being buffered in memory.\ + \ It limits the memory usage when writing. Larger values will allow\ + \ to upload a bigger files and improve the speed, but consumes9\ + \ more memory. Allowed values: min=5MB, max=525MB Default: 5MB." + type: "integer" + default: 5 + examples: + - 5 + - title: "CSV: Comma-Separated Values" + required: + - "format_type" + - "flattening" + properties: + format_type: + type: "string" + enum: + - "CSV" + default: "CSV" + flattening: + type: "string" + title: "Normalization (Flattening)" + description: "Whether the input json data should be normalized (flattened)\ + \ in the output CSV. Please refer to docs for details." + default: "No flattening" + enum: + - "No flattening" + - "Root level flattening" + part_size_mb: + title: "Block Size (MB) for GCS multipart upload" + description: "This is the size of a \"Part\" being buffered in memory.\ + \ It limits the memory usage when writing. Larger values will allow\ + \ to upload a bigger files and improve the speed, but consumes9\ + \ more memory. Allowed values: min=5MB, max=525MB Default: 5MB." + type: "integer" + default: 5 + examples: + - 5 + - title: "JSON Lines: newline-delimited JSON" + required: + - "format_type" + properties: + format_type: + type: "string" + enum: + - "JSONL" + default: "JSONL" + part_size_mb: + title: "Block Size (MB) for GCS multipart upload" + description: "This is the size of a \"Part\" being buffered in memory.\ + \ It limits the memory usage when writing. Larger values will allow\ + \ to upload a bigger files and improve the speed, but consumes9\ + \ more memory. Allowed values: min=5MB, max=525MB Default: 5MB." + type: "integer" + default: 5 + examples: + - 5 + - title: "Parquet: Columnar Storage" + required: + - "format_type" + properties: + format_type: + type: "string" + enum: + - "Parquet" + default: "Parquet" + compression_codec: + title: "Compression Codec" + description: "The compression algorithm used to compress data pages." + type: "string" + enum: + - "UNCOMPRESSED" + - "SNAPPY" + - "GZIP" + - "LZO" + - "BROTLI" + - "LZ4" + - "ZSTD" + default: "UNCOMPRESSED" + block_size_mb: + title: "Block Size (Row Group Size) (MB)" + description: "This is the size of a row group being buffered in memory.\ + \ It limits the memory usage when writing. Larger values will improve\ + \ the IO when reading, but consume more memory when writing. Default:\ + \ 128 MB." + type: "integer" + default: 128 + examples: + - 128 + max_padding_size_mb: + title: "Max Padding Size (MB)" + description: "Maximum size allowed as padding to align row groups.\ + \ This is also the minimum size of a row group. Default: 8 MB." + type: "integer" + default: 8 + examples: + - 8 + page_size_kb: + title: "Page Size (KB)" + description: "The page size is for compression. A block is composed\ + \ of pages. A page is the smallest unit that must be read fully\ + \ to access a single record. If this value is too small, the compression\ + \ will deteriorate. Default: 1024 KB." + type: "integer" + default: 1024 + examples: + - 1024 + dictionary_page_size_kb: + title: "Dictionary Page Size (KB)" + description: "There is one dictionary page per column per row group\ + \ when dictionary encoding is used. The dictionary page size works\ + \ like the page size but for dictionary. Default: 1024 KB." + type: "integer" + default: 1024 + examples: + - 1024 + dictionary_encoding: + title: "Dictionary Encoding" + description: "Default: true." + type: "boolean" + default: true + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "overwrite" + - "append" + $schema: "http://json-schema.org/draft-07/schema#" +- dockerImage: "airbyte/destination-pubsub:0.1.1" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/pubsub" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Google PubSub Destination Spec" + type: "object" + required: + - "project_id" + - "topic_id" + - "credentials_json" + additionalProperties: true + properties: + project_id: + type: "string" + description: "The GCP project ID for the project containing the target PubSub" + title: "Project ID" + topic_id: + type: "string" + description: "PubSub topic ID in the given GCP project ID" + title: "PubSub Topic ID" + credentials_json: + type: "string" + description: "The contents of the JSON service account key. Check out the\ + \ docs if you need help generating this key." + title: "Credentials JSON" + airbyte_secret: true + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "append" +- dockerImage: "airbyte/destination-kafka:0.1.2" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/kafka" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Kafka Destination Spec" + type: "object" + required: + - "bootstrap_servers" + - "topic_pattern" + - "protocol" + - "acks" + - "enable_idempotence" + - "compression_type" + - "batch_size" + - "linger_ms" + - "max_in_flight_requests_per_connection" + - "client_dns_lookup" + - "buffer_memory" + - "max_request_size" + - "retries" + - "socket_connection_setup_timeout_ms" + - "socket_connection_setup_timeout_max_ms" + - "max_block_ms" + - "request_timeout_ms" + - "delivery_timeout_ms" + - "send_buffer_bytes" + - "receive_buffer_bytes" + additionalProperties: true + properties: + bootstrap_servers: + title: "Bootstrap servers" + description: "A list of host/port pairs to use for establishing the initial\ + \ connection to the Kafka cluster. The client will make use of all servers\ + \ irrespective of which servers are specified here for bootstrapping—this\ + \ list only impacts the initial hosts used to discover the full set of\ + \ servers. This list should be in the form host1:port1,host2:port2,....\ + \ Since these servers are just used for the initial connection to discover\ + \ the full cluster membership (which may change dynamically), this list\ + \ need not contain the full set of servers (you may want more than one,\ + \ though, in case a server is down)." + type: "string" + examples: + - "kafka-broker1:9092,kafka-broker2:9092" + topic_pattern: + title: "Topic pattern" + description: "Topic pattern in which the records will be sent. You can use\ + \ patterns like '{namespace}' and/or '{stream}' to send the message to\ + \ a specific topic based on these values. Notice that the topic name will\ + \ be transformed to a standard naming convention." + type: "string" + examples: + - "sample.topic" + - "{namespace}.{stream}.sample" + test_topic: + title: "Test topic" + description: "Topic to test if Airbyte can produce messages." + type: "string" + examples: + - "test.topic" + sync_producer: + title: "Sync producer" + description: "Wait synchronously until the record has been sent to Kafka." + type: "boolean" + default: false + protocol: + title: "Protocol" + type: "object" + description: "Protocol used to communicate with brokers." + oneOf: + - title: "PLAINTEXT" + required: + - "security_protocol" + properties: + security_protocol: + type: "string" + enum: + - "PLAINTEXT" + default: "PLAINTEXT" + - title: "SASL PLAINTEXT" + required: + - "security_protocol" + - "sasl_mechanism" + - "sasl_jaas_config" + properties: + security_protocol: + type: "string" + enum: + - "SASL_PLAINTEXT" + default: "SASL_PLAINTEXT" + sasl_mechanism: + title: "SASL mechanism" + description: "SASL mechanism used for client connections. This may\ + \ be any mechanism for which a security provider is available." + type: "string" + default: "PLAIN" + enum: + - "PLAIN" + sasl_jaas_config: + title: "SASL JAAS config" + description: "JAAS login context parameters for SASL connections in\ + \ the format used by JAAS configuration files." + type: "string" + default: "" + airbyte_secret: true + - title: "SASL SSL" + required: + - "security_protocol" + - "sasl_mechanism" + - "sasl_jaas_config" + properties: + security_protocol: + type: "string" + enum: + - "SASL_SSL" + default: "SASL_SSL" + sasl_mechanism: + title: "SASL mechanism" + description: "SASL mechanism used for client connections. This may\ + \ be any mechanism for which a security provider is available." + type: "string" + default: "GSSAPI" + enum: + - "GSSAPI" + - "OAUTHBEARER" + - "SCRAM-SHA-256" + sasl_jaas_config: + title: "SASL JAAS config" + description: "JAAS login context parameters for SASL connections in\ + \ the format used by JAAS configuration files." + type: "string" + default: "" + airbyte_secret: true + client_id: + title: "Client ID" + description: "An id string to pass to the server when making requests. The\ + \ purpose of this is to be able to track the source of requests beyond\ + \ just ip/port by allowing a logical application name to be included in\ + \ server-side request logging." + type: "string" + examples: + - "airbyte-producer" + acks: + title: "ACKs" + description: "The number of acknowledgments the producer requires the leader\ + \ to have received before considering a request complete. This controls\ + \ the durability of records that are sent." + type: "string" + default: "1" + enum: + - "0" + - "1" + - "all" + enable_idempotence: + title: "Enable idempotence" + description: "When set to 'true', the producer will ensure that exactly\ + \ one copy of each message is written in the stream. If 'false', producer\ + \ retries due to broker failures, etc., may write duplicates of the retried\ + \ message in the stream." + type: "boolean" + default: false + compression_type: + title: "Compression type" + description: "The compression type for all data generated by the producer." + type: "string" + default: "none" + enum: + - "none" + - "gzip" + - "snappy" + - "lz4" + - "zstd" + batch_size: + title: "Batch size" + description: "The producer will attempt to batch records together into fewer\ + \ requests whenever multiple records are being sent to the same partition." + type: "integer" + examples: + - 16384 + linger_ms: + title: "Linger ms" + description: "The producer groups together any records that arrive in between\ + \ request transmissions into a single batched request." + type: "string" + examples: + - 0 + max_in_flight_requests_per_connection: + title: "Max in flight requests per connection" + description: "The maximum number of unacknowledged requests the client will\ + \ send on a single connection before blocking." + type: "integer" + examples: + - 5 + client_dns_lookup: + title: "Client DNS lookup" + description: "Controls how the client uses DNS lookups. If set to use_all_dns_ips,\ + \ connect to each returned IP address in sequence until a successful connection\ + \ is established. After a disconnection, the next IP is used. Once all\ + \ IPs have been used once, the client resolves the IP(s) from the hostname\ + \ again. If set to resolve_canonical_bootstrap_servers_only, resolve each\ + \ bootstrap address into a list of canonical names. After the bootstrap\ + \ phase, this behaves the same as use_all_dns_ips. If set to default (deprecated),\ + \ attempt to connect to the first IP address returned by the lookup, even\ + \ if the lookup returns multiple IP addresses." + type: "string" + default: "use_all_dns_ips" + enum: + - "default" + - "use_all_dns_ips" + - "resolve_canonical_bootstrap_servers_only" + - "use_all_dns_ips" + buffer_memory: + title: "Buffer memory" + description: "The total bytes of memory the producer can use to buffer records\ + \ waiting to be sent to the server." + type: "string" + examples: 33554432 + max_request_size: + title: "Max request size" + description: "The maximum size of a request in bytes." + type: "integer" + examples: + - 1048576 + retries: + title: "Retries" + description: "Setting a value greater than zero will cause the client to\ + \ resend any record whose send fails with a potentially transient error." + type: "integer" + examples: + - 2147483647 + socket_connection_setup_timeout_ms: + title: "Socket connection setup timeout" + description: "The amount of time the client will wait for the socket connection\ + \ to be established." + type: "string" + examples: + - 10000 + socket_connection_setup_timeout_max_ms: + title: "Socket connection setup max timeout" + description: "The maximum amount of time the client will wait for the socket\ + \ connection to be established. The connection setup timeout will increase\ + \ exponentially for each consecutive connection failure up to this maximum." + type: "string" + examples: + - 30000 + max_block_ms: + title: "Max block ms" + description: "The configuration controls how long the KafkaProducer's send(),\ + \ partitionsFor(), initTransactions(), sendOffsetsToTransaction(), commitTransaction()\ + \ and abortTransaction() methods will block." + type: "string" + examples: + - 60000 + request_timeout_ms: + title: "Request timeout" + description: "The configuration controls the maximum amount of time the\ + \ client will wait for the response of a request. If the response is not\ + \ received before the timeout elapses the client will resend the request\ + \ if necessary or fail the request if retries are exhausted." + type: "integer" + examples: + - 30000 + delivery_timeout_ms: + title: "Delivery timeout" + description: "An upper bound on the time to report success or failure after\ + \ a call to 'send()' returns." + type: "integer" + examples: + - 120000 + send_buffer_bytes: + title: "Send buffer bytes" + description: "The size of the TCP send buffer (SO_SNDBUF) to use when sending\ + \ data. If the value is -1, the OS default will be used." + type: "integer" + examples: + - 131072 + receive_buffer_bytes: + title: "Receive buffer bytes" + description: "The size of the TCP receive buffer (SO_RCVBUF) to use when\ + \ reading data. If the value is -1, the OS default will be used." + type: "integer" + examples: + - 32768 + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "append" +- dockerImage: "airbyte/destination-csv:0.2.8" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/local-csv" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "CSV Destination Spec" + type: "object" + required: + - "destination_path" + additionalProperties: false + properties: + destination_path: + description: "Path to the directory where csv files will be written. The\ + \ destination uses the local mount \"/local\" and any data files will\ + \ be placed inside that local mount. For more information check out our\ + \ docs" + type: "string" + examples: + - "/local" + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "overwrite" + - "append" +- dockerImage: "airbyte/destination-local-json:0.2.8" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/local-json" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Local Json Destination Spec" + type: "object" + required: + - "destination_path" + additionalProperties: false + properties: + destination_path: + description: "Path to the directory where json files will be written. The\ + \ files will be placed inside that local mount. For more information check\ + \ out our docs" + type: "string" + examples: + - "/json_data" + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "overwrite" + - "append" +- dockerImage: "airbyte/destination-mssql:0.1.10" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/mssql" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "MS SQL Server Destination Spec" + type: "object" + required: + - "host" + - "port" + - "username" + - "database" + - "schema" + additionalProperties: true + properties: + host: + title: "Host" + description: "Hostname of the database." + type: "string" + order: 0 + port: + title: "Port" + description: "Port of the database." + type: "integer" + minimum: 0 + maximum: 65536 + default: 1433 + examples: + - "1433" + order: 1 + database: + title: "DB Name" + description: "Name of the database." + type: "string" + order: 2 + schema: + title: "Default Schema" + description: "The default schema tables are written to if the source does\ + \ not specify a namespace. The usual value for this field is \"public\"\ + ." + type: "string" + examples: + - "public" + default: "public" + order: 3 + username: + title: "User" + description: "Username to use to access the database." + type: "string" + order: 4 + password: + title: "Password" + description: "Password associated with the username." + type: "string" + airbyte_secret: true + order: 5 + ssl_method: + title: "SSL Method" + type: "object" + description: "Encryption method to use when communicating with the database" + order: 6 + oneOf: + - title: "Unencrypted" + additionalProperties: false + description: "Data transfer will not be encrypted." + required: + - "ssl_method" + type: "object" + properties: + ssl_method: + type: "string" + enum: + - "unencrypted" + default: "unencrypted" + - title: "Encrypted (trust server certificate)" + additionalProperties: false + description: "Use the cert provided by the server without verification.\ + \ (For testing purposes only!)" + required: + - "ssl_method" + type: "object" + properties: + ssl_method: + type: "string" + enum: + - "encrypted_trust_server_certificate" + default: "encrypted_trust_server_certificate" + - title: "Encrypted (verify certificate)" + additionalProperties: false + description: "Verify and use the cert provided by the server." + required: + - "ssl_method" + - "trustStoreName" + - "trustStorePassword" + type: "object" + properties: + ssl_method: + type: "string" + enum: + - "encrypted_verify_certificate" + default: "encrypted_verify_certificate" + hostNameInCertificate: + title: "Host Name In Certificate" + type: "string" + description: "Specifies the host name of the server. The value of\ + \ this property must match the subject property of the certificate." + order: 7 + tunnel_method: + type: "object" + title: "SSH Tunnel Method" + description: "Whether to initiate an SSH tunnel before connecting to the\ + \ database, and if so, which kind of authentication to use." + oneOf: + - title: "No Tunnel" + required: + - "tunnel_method" + properties: + tunnel_method: + description: "No ssh tunnel needed to connect to database" + type: "string" + const: "NO_TUNNEL" + order: 0 + - title: "SSH Key Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "ssh_key" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and ssh key" + type: "string" + const: "SSH_KEY_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host." + type: "string" + order: 3 + ssh_key: + title: "SSH Private Key" + description: "OS-level user account ssh key credentials in RSA PEM\ + \ format ( created with ssh-keygen -t rsa -m PEM -f myuser_rsa )" + type: "string" + airbyte_secret: true + multiline: true + order: 4 + - title: "Password Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "tunnel_user_password" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and password authentication" + type: "string" + const: "SSH_PASSWORD_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host" + type: "string" + order: 3 + tunnel_user_password: + title: "Password" + description: "OS-level password for logging into the jump server host" + type: "string" + airbyte_secret: true + order: 4 + supportsIncremental: true + supportsNormalization: true + supportsDBT: true + supported_destination_sync_modes: + - "overwrite" + - "append" + - "append_dedup" +- dockerImage: "airbyte/destination-meilisearch:0.2.10" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/meilisearch" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "MeiliSearch Destination Spec" + type: "object" + required: + - "host" + additionalProperties: true + properties: + host: + title: "Host" + description: "Hostname of the MeiliSearch instance" + type: "string" + order: 0 + api_key: + title: "API Key" + airbyte_secret: true + description: "MeiliSearch instance API Key" + type: "string" + order: 1 + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "overwrite" + - "append" +- dockerImage: "airbyte/destination-mongodb:0.1.2" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/mongodb" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "MongoDB Destination Spec" + type: "object" + required: + - "database" + - "auth_type" + additionalProperties: true + properties: + instance_type: + description: "MongoDb instance to connect to. For MongoDB Atlas and Replica\ + \ Set TLS connection is used by default." + title: "MongoDb instance type" + type: "object" + order: 0 + oneOf: + - title: "Standalone MongoDb Instance" + required: + - "instance" + - "host" + - "port" + properties: + instance: + type: "string" + enum: + - "standalone" + default: "standalone" + host: + title: "Host" + type: "string" + description: "Host of a Mongo database to be replicated." + order: 0 + port: + title: "Port" + type: "integer" + description: "Port of a Mongo database to be replicated." + minimum: 0 + maximum: 65536 + default: 27017 + examples: + - "27017" + order: 1 + tls: + title: "TLS connection" + type: "boolean" + description: "Indicates whether TLS encryption protocol will be used\ + \ to connect to MongoDB. It is recommended to use TLS connection\ + \ if possible. For more information see documentation." + default: false + order: 2 + - title: "Replica Set" + required: + - "instance" + - "server_addresses" + properties: + instance: + type: "string" + enum: + - "replica" + default: "replica" + server_addresses: + title: "Server addresses" + type: "string" + description: "The members of a replica set. Please specify `host`:`port`\ + \ of each member seperated by comma." + examples: + - "host1:27017,host2:27017,host3:27017" + order: 0 + replica_set: + title: "Replica Set" + type: "string" + description: "A replica set name." + order: 1 + - title: "MongoDB Atlas" + additionalProperties: false + required: + - "instance" + - "cluster_url" + properties: + instance: + type: "string" + enum: + - "atlas" + default: "atlas" + cluster_url: + title: "Cluster URL" + type: "string" + description: "URL of a cluster to connect to." + order: 0 + database: + title: "DB Name" + description: "Name of the database." + type: "string" + order: 2 + auth_type: + title: "Authorization type" + type: "object" + description: "Authorization type." + oneOf: + - title: "None" + additionalProperties: false + description: "None." + required: + - "authorization" + type: "object" + properties: + authorization: + type: "string" + const: "none" + - title: "Login/Password" + additionalProperties: false + description: "Login/Password." + required: + - "authorization" + - "username" + - "password" + type: "object" + properties: + authorization: + type: "string" + const: "login/password" + username: + title: "User" + description: "Username to use to access the database." + type: "string" + order: 1 + password: + title: "Password" + description: "Password associated with the username." + type: "string" + airbyte_secret: true + order: 2 + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "overwrite" + - "append" +- dockerImage: "airbyte/destination-mysql:0.1.13" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/mysql" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "MySQL Destination Spec" + type: "object" + required: + - "host" + - "port" + - "username" + - "database" + additionalProperties: true + properties: + host: + title: "Host" + description: "Hostname of the database." + type: "string" + order: 0 + port: + title: "Port" + description: "Port of the database." + type: "integer" + minimum: 0 + maximum: 65536 + default: 3306 + examples: + - "3306" + order: 1 + database: + title: "DB Name" + description: "Name of the database." + type: "string" + order: 2 + username: + title: "User" + description: "Username to use to access the database." + type: "string" + order: 3 + password: + title: "Password" + description: "Password associated with the username." + type: "string" + airbyte_secret: true + order: 4 + ssl: + title: "SSL Connection" + description: "Encrypt data using SSL." + type: "boolean" + default: true + order: 5 + tunnel_method: + type: "object" + title: "SSH Tunnel Method" + description: "Whether to initiate an SSH tunnel before connecting to the\ + \ database, and if so, which kind of authentication to use." + oneOf: + - title: "No Tunnel" + required: + - "tunnel_method" + properties: + tunnel_method: + description: "No ssh tunnel needed to connect to database" + type: "string" + const: "NO_TUNNEL" + order: 0 + - title: "SSH Key Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "ssh_key" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and ssh key" + type: "string" + const: "SSH_KEY_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host." + type: "string" + order: 3 + ssh_key: + title: "SSH Private Key" + description: "OS-level user account ssh key credentials for logging\ + \ into the jump server host." + type: "string" + airbyte_secret: true + multiline: true + order: 4 + - title: "Password Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "tunnel_user_password" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and password authentication" + type: "string" + const: "SSH_PASSWORD_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host" + type: "string" + order: 3 + tunnel_user_password: + title: "Password" + description: "OS-level password for logging into the jump server host" + type: "string" + airbyte_secret: true + order: 4 + supportsIncremental: true + supportsNormalization: true + supportsDBT: true + supported_destination_sync_modes: + - "overwrite" + - "append" +- dockerImage: "airbyte/destination-oracle:0.1.11" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/oracle" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Oracle Destination Spec" + type: "object" + required: + - "host" + - "port" + - "username" + - "sid" + additionalProperties: true + properties: + host: + title: "Host" + description: "Hostname of the database." + type: "string" + order: 0 + port: + title: "Port" + description: "Port of the database." + type: "integer" + minimum: 0 + maximum: 65536 + default: 1521 + examples: + - "1521" + order: 1 + sid: + title: "SID" + description: "SID" + type: "string" + order: 2 + username: + title: "User" + description: "Username to use to access the database. This user must have\ + \ CREATE USER privileges in the database." + type: "string" + order: 3 + password: + title: "Password" + description: "Password associated with the username." + type: "string" + airbyte_secret: true + order: 4 + schema: + title: "Default Schema" + description: "The default schema tables are written to if the source does\ + \ not specify a namespace. The usual value for this field is \"airbyte\"\ + . In Oracle, schemas and users are the same thing, so the \"user\" parameter\ + \ is used as the login credentials and this is used for the default Airbyte\ + \ message schema." + type: "string" + examples: + - "airbyte" + default: "airbyte" + order: 5 + encryption: + title: "Encryption" + type: "object" + description: "Encryption method to use when communicating with the database" + order: 6 + oneOf: + - title: "Unencrypted" + additionalProperties: false + description: "Data transfer will not be encrypted." + required: + - "encryption_method" + properties: + encryption_method: + type: "string" + const: "unencrypted" + enum: + - "unencrypted" + default: "unencrypted" + - title: "Native Network Ecryption (NNE)" + additionalProperties: false + description: "Native network encryption gives you the ability to encrypt\ + \ database connections, without the configuration overhead of TCP/IP\ + \ and SSL/TLS and without the need to open and listen on different ports." + required: + - "encryption_method" + properties: + encryption_method: + type: "string" + const: "client_nne" + enum: + - "client_nne" + default: "client_nne" + encryption_algorithm: + type: "string" + description: "This parameter defines the encryption algorithm to be\ + \ used" + title: "Encryption Algorithm" + default: "AES256" + enum: + - "AES256" + - "RC4_56" + - "3DES168" + - title: "TLS Encrypted (verify certificate)" + additionalProperties: false + description: "Verify and use the cert provided by the server." + required: + - "encryption_method" + - "ssl_certificate" + properties: + encryption_method: + type: "string" + const: "encrypted_verify_certificate" + enum: + - "encrypted_verify_certificate" + default: "encrypted_verify_certificate" + ssl_certificate: + title: "SSL PEM file" + description: "Privacy Enhanced Mail (PEM) files are concatenated certificate\ + \ containers frequently used in certificate installations" + type: "string" + airbyte_secret: true + multiline: true + tunnel_method: + type: "object" + title: "SSH Tunnel Method" + description: "Whether to initiate an SSH tunnel before connecting to the\ + \ database, and if so, which kind of authentication to use." + oneOf: + - title: "No Tunnel" + required: + - "tunnel_method" + properties: + tunnel_method: + description: "No ssh tunnel needed to connect to database" + type: "string" + const: "NO_TUNNEL" + order: 0 + - title: "SSH Key Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "ssh_key" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and ssh key" + type: "string" + const: "SSH_KEY_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host." + type: "string" + order: 3 + ssh_key: + title: "SSH Private Key" + description: "OS-level user account ssh key credentials in RSA PEM\ + \ format ( created with ssh-keygen -t rsa -m PEM -f myuser_rsa )" + type: "string" + airbyte_secret: true + multiline: true + order: 4 + - title: "Password Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "tunnel_user_password" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and password authentication" + type: "string" + const: "SSH_PASSWORD_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host" + type: "string" + order: 3 + tunnel_user_password: + title: "Password" + description: "OS-level password for logging into the jump server host" + type: "string" + airbyte_secret: true + order: 4 + supportsIncremental: true + supportsNormalization: false + supportsDBT: true + supported_destination_sync_modes: + - "overwrite" + - "append" +- dockerImage: "airbyte/destination-postgres:0.3.11" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/postgres" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Postgres Destination Spec" + type: "object" + required: + - "host" + - "port" + - "username" + - "database" + - "schema" + additionalProperties: true + properties: + host: + title: "Host" + description: "Hostname of the database." + type: "string" + order: 0 + port: + title: "Port" + description: "Port of the database." + type: "integer" + minimum: 0 + maximum: 65536 + default: 5432 + examples: + - "5432" + order: 1 + database: + title: "DB Name" + description: "Name of the database." + type: "string" + order: 2 + schema: + title: "Default Schema" + description: "The default schema tables are written to if the source does\ + \ not specify a namespace. The usual value for this field is \"public\"\ + ." + type: "string" + examples: + - "public" + default: "public" + order: 3 + username: + title: "User" + description: "Username to use to access the database." + type: "string" + order: 4 + password: + title: "Password" + description: "Password associated with the username." + type: "string" + airbyte_secret: true + order: 5 + ssl: + title: "SSL Connection" + description: "Encrypt data using SSL." + type: "boolean" + default: false + order: 6 + tunnel_method: + type: "object" + title: "SSH Tunnel Method" + description: "Whether to initiate an SSH tunnel before connecting to the\ + \ database, and if so, which kind of authentication to use." + oneOf: + - title: "No Tunnel" + required: + - "tunnel_method" + properties: + tunnel_method: + description: "No ssh tunnel needed to connect to database" + type: "string" + const: "NO_TUNNEL" + order: 0 + - title: "SSH Key Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "ssh_key" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and ssh key" + type: "string" + const: "SSH_KEY_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host." + type: "string" + order: 3 + ssh_key: + title: "SSH Private Key" + description: "OS-level user account ssh key credentials for logging\ + \ into the jump server host." + type: "string" + airbyte_secret: true + multiline: true + order: 4 + - title: "Password Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "tunnel_user_password" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and password authentication" + type: "string" + const: "SSH_PASSWORD_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host" + type: "string" + order: 3 + tunnel_user_password: + title: "Password" + description: "OS-level password for logging into the jump server host" + type: "string" + airbyte_secret: true + order: 4 + supportsIncremental: true + supportsNormalization: true + supportsDBT: true + supported_destination_sync_modes: + - "overwrite" + - "append" + - "append_dedup" +- dockerImage: "airbyte/destination-redshift:0.3.19" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/redshift" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Redshift Destination Spec" + type: "object" + required: + - "host" + - "port" + - "database" + - "username" + - "password" + - "schema" + additionalProperties: true + properties: + host: + description: "Host Endpoint of the Redshift Cluster (must include the cluster-id,\ + \ region and end with .redshift.amazonaws.com)" + type: "string" + title: "Host" + port: + description: "Port of the database." + type: "integer" + minimum: 0 + maximum: 65536 + default: 5439 + examples: + - "5439" + title: "Port" + username: + description: "Username to use to access the database." + type: "string" + title: "Username" + password: + description: "Password associated with the username." + type: "string" + airbyte_secret: true + title: "Password" + database: + description: "Name of the database." + type: "string" + title: "Database" + schema: + description: "The default schema tables are written to if the source does\ + \ not specify a namespace. Unless specifically configured, the usual value\ + \ for this field is \"public\"." + type: "string" + examples: + - "public" + default: "public" + title: "Default Schema" + s3_bucket_name: + title: "S3 Bucket Name" + type: "string" + description: "The name of the staging S3 bucket to use if utilising a COPY\ + \ strategy. COPY is recommended for production workloads for better speed\ + \ and scalability. See AWS docs for more details." + examples: + - "airbyte.staging" + s3_bucket_region: + title: "S3 Bucket Region" + type: "string" + default: "" + description: "The region of the S3 staging bucket to use if utilising a\ + \ copy strategy." + enum: + - "" + - "us-east-1" + - "us-east-2" + - "us-west-1" + - "us-west-2" + - "af-south-1" + - "ap-east-1" + - "ap-south-1" + - "ap-northeast-1" + - "ap-northeast-2" + - "ap-northeast-3" + - "ap-southeast-1" + - "ap-southeast-2" + - "ca-central-1" + - "cn-north-1" + - "cn-northwest-1" + - "eu-central-1" + - "eu-north-1" + - "eu-south-1" + - "eu-west-1" + - "eu-west-2" + - "eu-west-3" + - "sa-east-1" + - "me-south-1" + access_key_id: + type: "string" + description: "The Access Key Id granting allow one to access the above S3\ + \ staging bucket. Airbyte requires Read and Write permissions to the given\ + \ bucket." + title: "S3 Key Id" + airbyte_secret: true + secret_access_key: + type: "string" + description: "The corresponding secret to the above access key id." + title: "S3 Access Key" + airbyte_secret: true + part_size: + type: "integer" + minimum: 10 + maximum: 100 + examples: + - "10" + description: "Optional. Increase this if syncing tables larger than 100GB.\ + \ Only relevant for COPY. Files are streamed to S3 in parts. This determines\ + \ the size of each part, in MBs. As S3 has a limit of 10,000 parts per\ + \ file, part size affects the table size. This is 10MB by default, resulting\ + \ in a default limit of 100GB tables. Note, a larger part size will result\ + \ in larger memory requirements. A rule of thumb is to multiply the part\ + \ size by 10 to get the memory requirement. Modify this with care." + title: "Stream Part Size" + supportsIncremental: true + supportsNormalization: true + supportsDBT: true + supported_destination_sync_modes: + - "overwrite" + - "append" + - "append_dedup" +- dockerImage: "airbyte/destination-s3:0.1.12" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/s3" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "S3 Destination Spec" + type: "object" + required: + - "s3_bucket_name" + - "s3_bucket_path" + - "s3_bucket_region" + - "access_key_id" + - "secret_access_key" + - "format" + additionalProperties: false + properties: + s3_endpoint: + title: "Endpoint" + type: "string" + default: "" + description: "This is your S3 endpoint url.(if you are working with AWS\ + \ S3, just leave empty)." + examples: + - "http://localhost:9000" + s3_bucket_name: + title: "S3 Bucket Name" + type: "string" + description: "The name of the S3 bucket." + examples: + - "airbyte_sync" + s3_bucket_path: + description: "Directory under the S3 bucket where data will be written." + type: "string" + examples: + - "data_sync/test" + s3_bucket_region: + title: "S3 Bucket Region" + type: "string" + default: "" + description: "The region of the S3 bucket." + enum: + - "" + - "us-east-1" + - "us-east-2" + - "us-west-1" + - "us-west-2" + - "af-south-1" + - "ap-east-1" + - "ap-south-1" + - "ap-northeast-1" + - "ap-northeast-2" + - "ap-northeast-3" + - "ap-southeast-1" + - "ap-southeast-2" + - "ca-central-1" + - "cn-north-1" + - "cn-northwest-1" + - "eu-central-1" + - "eu-north-1" + - "eu-south-1" + - "eu-west-1" + - "eu-west-2" + - "eu-west-3" + - "sa-east-1" + - "me-south-1" + - "us-gov-east-1" + - "us-gov-west-1" + access_key_id: + type: "string" + description: "The access key id to access the S3 bucket. Airbyte requires\ + \ Read and Write permissions to the given bucket." + title: "S3 Key Id" + airbyte_secret: true + examples: + - "A012345678910EXAMPLE" + secret_access_key: + type: "string" + description: "The corresponding secret to the access key id." + title: "S3 Access Key" + airbyte_secret: true + examples: + - "a012345678910ABCDEFGH/AbCdEfGhEXAMPLEKEY" + format: + title: "Output Format" + type: "object" + description: "Output data format" + oneOf: + - title: "Avro: Apache Avro" + required: + - "format_type" + - "compression_codec" + properties: + format_type: + type: "string" + enum: + - "Avro" + default: "Avro" + compression_codec: + title: "Compression Codec" + description: "The compression algorithm used to compress data. Default\ + \ to no compression." + type: "object" + oneOf: + - title: "no compression" + required: + - "codec" + properties: + codec: + type: "string" + enum: + - "no compression" + default: "no compression" + - title: "Deflate" + required: + - "codec" + - "compression_level" + properties: + codec: + type: "string" + enum: + - "Deflate" + default: "Deflate" + compression_level: + title: "Deflate level" + description: "0: no compression & fastest, 9: best compression\ + \ & slowest." + type: "integer" + default: 0 + minimum: 0 + maximum: 9 + - title: "bzip2" + required: + - "codec" + properties: + codec: + type: "string" + enum: + - "bzip2" + default: "bzip2" + - title: "xz" + required: + - "codec" + - "compression_level" + properties: + codec: + type: "string" + enum: + - "xz" + default: "xz" + compression_level: + title: "Compression level" + description: "See here for details." + type: "integer" + default: 6 + minimum: 0 + maximum: 9 + - title: "zstandard" + required: + - "codec" + - "compression_level" + properties: + codec: + type: "string" + enum: + - "zstandard" + default: "zstandard" + compression_level: + title: "Compression level" + description: "Negative levels are 'fast' modes akin to lz4 or\ + \ snappy, levels above 9 are generally for archival purposes,\ + \ and levels above 18 use a lot of memory." + type: "integer" + default: 3 + minimum: -5 + maximum: 22 + include_checksum: + title: "Include checksum" + description: "If true, include a checksum with each data block." + type: "boolean" + default: false + - title: "snappy" + required: + - "codec" + properties: + codec: + type: "string" + enum: + - "snappy" + default: "snappy" + part_size_mb: + title: "Block Size (MB) for Amazon S3 multipart upload" + description: "This is the size of a \"Part\" being buffered in memory.\ + \ It limits the memory usage when writing. Larger values will allow\ + \ to upload a bigger files and improve the speed, but consumes9\ + \ more memory. Allowed values: min=5MB, max=525MB Default: 5MB." + type: "integer" + default: 5 + examples: + - 5 + - title: "CSV: Comma-Separated Values" + required: + - "format_type" + - "flattening" + properties: + format_type: + type: "string" + enum: + - "CSV" + default: "CSV" + flattening: + type: "string" + title: "Normalization (Flattening)" + description: "Whether the input json data should be normalized (flattened)\ + \ in the output CSV. Please refer to docs for details." + default: "No flattening" + enum: + - "No flattening" + - "Root level flattening" + part_size_mb: + title: "Block Size (MB) for Amazon S3 multipart upload" + description: "This is the size of a \"Part\" being buffered in memory.\ + \ It limits the memory usage when writing. Larger values will allow\ + \ to upload a bigger files and improve the speed, but consumes9\ + \ more memory. Allowed values: min=5MB, max=525MB Default: 5MB." + type: "integer" + default: 5 + examples: + - 5 + - title: "JSON Lines: newline-delimited JSON" + required: + - "format_type" + properties: + format_type: + type: "string" + enum: + - "JSONL" + default: "JSONL" + part_size_mb: + title: "Block Size (MB) for Amazon S3 multipart upload" + description: "This is the size of a \"Part\" being buffered in memory.\ + \ It limits the memory usage when writing. Larger values will allow\ + \ to upload a bigger files and improve the speed, but consumes9\ + \ more memory. Allowed values: min=5MB, max=525MB Default: 5MB." + type: "integer" + default: 5 + examples: + - 5 + - title: "Parquet: Columnar Storage" + required: + - "format_type" + properties: + format_type: + type: "string" + enum: + - "Parquet" + default: "Parquet" + compression_codec: + title: "Compression Codec" + description: "The compression algorithm used to compress data pages." + type: "string" + enum: + - "UNCOMPRESSED" + - "SNAPPY" + - "GZIP" + - "LZO" + - "BROTLI" + - "LZ4" + - "ZSTD" + default: "UNCOMPRESSED" + block_size_mb: + title: "Block Size (Row Group Size) (MB)" + description: "This is the size of a row group being buffered in memory.\ + \ It limits the memory usage when writing. Larger values will improve\ + \ the IO when reading, but consume more memory when writing. Default:\ + \ 128 MB." + type: "integer" + default: 128 + examples: + - 128 + max_padding_size_mb: + title: "Max Padding Size (MB)" + description: "Maximum size allowed as padding to align row groups.\ + \ This is also the minimum size of a row group. Default: 8 MB." + type: "integer" + default: 8 + examples: + - 8 + page_size_kb: + title: "Page Size (KB)" + description: "The page size is for compression. A block is composed\ + \ of pages. A page is the smallest unit that must be read fully\ + \ to access a single record. If this value is too small, the compression\ + \ will deteriorate. Default: 1024 KB." + type: "integer" + default: 1024 + examples: + - 1024 + dictionary_page_size_kb: + title: "Dictionary Page Size (KB)" + description: "There is one dictionary page per column per row group\ + \ when dictionary encoding is used. The dictionary page size works\ + \ like the page size but for dictionary. Default: 1024 KB." + type: "integer" + default: 1024 + examples: + - 1024 + dictionary_encoding: + title: "Dictionary Encoding" + description: "Default: true." + type: "boolean" + default: true + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "overwrite" + - "append" +- dockerImage: "airbyte/destination-snowflake:0.3.16" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/snowflake" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Snowflake Destination Spec" + type: "object" + required: + - "host" + - "role" + - "warehouse" + - "database" + - "schema" + - "username" + - "password" + additionalProperties: true + properties: + host: + description: "Host domain of the snowflake instance (must include the account,\ + \ region, cloud environment, and end with snowflakecomputing.com)." + examples: + - "accountname.us-east-2.aws.snowflakecomputing.com" + type: "string" + title: "Host" + order: 0 + role: + description: "The role you created for Airbyte to access Snowflake." + examples: + - "AIRBYTE_ROLE" + type: "string" + title: "Role" + order: 1 + warehouse: + description: "The warehouse you created for Airbyte to sync data into." + examples: + - "AIRBYTE_WAREHOUSE" + type: "string" + title: "Warehouse" + order: 2 + database: + description: "The database you created for Airbyte to sync data into." + examples: + - "AIRBYTE_DATABASE" + type: "string" + title: "Database" + order: 3 + schema: + description: "The default Snowflake schema tables are written to if the\ + \ source does not specify a namespace." + examples: + - "AIRBYTE_SCHEMA" + type: "string" + title: "Default Schema" + order: 4 + username: + description: "The username you created to allow Airbyte to access the database." + examples: + - "AIRBYTE_USER" + type: "string" + title: "Username" + order: 5 + password: + description: "Password associated with the username." + type: "string" + airbyte_secret: true + title: "Password" + order: 6 + loading_method: + type: "object" + title: "Loading Method" + description: "Loading method used to send data to Snowflake." + order: 7 + oneOf: + - title: "Standard Inserts" + additionalProperties: false + description: "Uses
INSERT
statements to send batches of records\ + \ to Snowflake. Easiest (no setup) but not recommended for large production\ + \ workloads due to slow speed." + required: + - "method" + properties: + method: + type: "string" + enum: + - "Standard" + default: "Standard" + - title: "AWS S3 Staging" + additionalProperties: false + description: "Writes large batches of records to a file, uploads the file\ + \ to S3, then uses
COPY INTO table
to upload the file. Recommended\ + \ for large production workloads for better speed and scalability." + required: + - "method" + - "s3_bucket_name" + - "access_key_id" + - "secret_access_key" + properties: + method: + type: "string" + enum: + - "S3 Staging" + default: "S3 Staging" + order: 0 + s3_bucket_name: + title: "S3 Bucket Name" + type: "string" + description: "The name of the staging S3 bucket. Airbyte will write\ + \ files to this bucket and read them via
COPY
statements\ + \ on Snowflake." + examples: + - "airbyte.staging" + order: 1 + s3_bucket_region: + title: "S3 Bucket Region" + type: "string" + default: "" + description: "The region of the S3 staging bucket to use if utilising\ + \ a copy strategy." + enum: + - "" + - "us-east-1" + - "us-east-2" + - "us-west-1" + - "us-west-2" + - "af-south-1" + - "ap-east-1" + - "ap-south-1" + - "ap-northeast-1" + - "ap-northeast-2" + - "ap-northeast-3" + - "ap-southeast-1" + - "ap-southeast-2" + - "ca-central-1" + - "cn-north-1" + - "cn-northwest-1" + - "eu-central-1" + - "eu-west-1" + - "eu-west-2" + - "eu-west-3" + - "eu-south-1" + - "eu-north-1" + - "sa-east-1" + - "me-south-1" + order: 2 + access_key_id: + type: "string" + description: "The Access Key Id granting allow one to access the above\ + \ S3 staging bucket. Airbyte requires Read and Write permissions\ + \ to the given bucket." + title: "S3 Key Id" + airbyte_secret: true + order: 3 + secret_access_key: + type: "string" + description: "The corresponding secret to the above access key id." + title: "S3 Access Key" + airbyte_secret: true + order: 4 + - title: "GCS Staging" + additionalProperties: false + description: "Writes large batches of records to a file, uploads the file\ + \ to GCS, then uses
COPY INTO table
to upload the file. Recommended\ + \ for large production workloads for better speed and scalability." + required: + - "method" + - "project_id" + - "bucket_name" + - "credentials_json" + properties: + method: + type: "string" + enum: + - "GCS Staging" + default: "GCS Staging" + order: 0 + project_id: + title: "GCP Project ID" + type: "string" + description: "The name of the GCP project ID for your credentials." + examples: + - "my-project" + order: 1 + bucket_name: + title: "GCS Bucket Name" + type: "string" + description: "The name of the staging GCS bucket. Airbyte will write\ + \ files to this bucket and read them via
COPY
statements\ + \ on Snowflake." + examples: + - "airbyte-staging" + order: 2 + credentials_json: + title: "Google Application Credentials" + type: "string" + description: "The contents of the JSON key file that has read/write\ + \ permissions to the staging GCS bucket. You will separately need\ + \ to grant bucket access to your Snowflake GCP service account.\ + \ See the GCP docs for more information on how to generate a JSON key\ + \ for your service account." + airbyte_secret: true + multiline: true + order: 3 + supportsIncremental: true + supportsNormalization: true + supportsDBT: true + supported_destination_sync_modes: + - "overwrite" + - "append" + - "append_dedup" diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml new file mode 100644 index 0000000000000..71e4a45e69158 --- /dev/null +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -0,0 +1,5836 @@ +# This file is generated by io.airbyte.config.specs.SeedConnectorSpecGenerator. +# Do NOT edit this file directly. See generator class for more details. +--- +- dockerImage: "airbyte/source-aws-cloudtrail:0.1.2" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/aws-cloudtrail" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Aws CloudTrail Spec" + type: "object" + required: + - "aws_key_id" + - "aws_secret_key" + - "aws_region_name" + - "start_date" + additionalProperties: true + properties: + aws_key_id: + type: "string" + description: "Specifies an AWS access key associated with an IAM user or\ + \ role." + airbyte_secret: true + aws_secret_key: + type: "string" + description: "Specifies the secret key associated with the access key. This\ + \ is essentially the 'password' for the access key." + airbyte_secret: true + aws_region_name: + type: "string" + description: "The default AWS Region to use, for example, us-west-1 or us-west-2.\ + \ When specifying a Region inline during client initialization, this property\ + \ is named region_name." + start_date: + type: "string" + description: "The date you would like to replicate data. Data in ClouTraid\ + \ is available for last 90 days only. Format: YYYY-MM-DD." + examples: + - "2021-01-01" + default: "1970-01-01" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-amazon-ads:0.1.2" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/amazon-ads" + connectionSpecification: + title: "Amazon Ads Spec" + type: "object" + properties: + client_id: + title: "Client Id" + description: "Oauth client id How to create your Login with Amazon" + name: "Client ID" + type: "string" + client_secret: + title: "Client Secret" + description: "Oauth client secret How to create your Login with Amazon" + name: "Client secret" + airbyte_secret: true + type: "string" + scope: + title: "Scope" + description: "By default its advertising::campaign_management, but customers\ + \ may need to set scope to cpc_advertising:campaign_management." + default: "advertising::campaign_management" + name: "Client scope" + examples: + - "cpc_advertising:campaign_management" + type: "string" + refresh_token: + title: "Refresh Token" + description: "Oauth 2.0 refresh_token, read details here" + name: "Oauth refresh token" + airbyte_secret: true + type: "string" + start_date: + title: "Start Date" + description: "Start date for collectiong reports, should not be more than\ + \ 60 days in past. In YYYY-MM-DD format" + name: "Start date" + examples: + - "2022-10-10" + - "2022-10-22" + type: "string" + region: + description: "Region to pull data from (EU/NA/FE/SANDBOX)" + default: "NA" + name: "Region" + title: "AmazonAdsRegion" + enum: + - "NA" + - "EU" + - "FE" + - "SANDBOX" + type: "string" + profiles: + title: "Profiles" + description: "profile Ids you want to fetch data for" + name: "Profile Ids" + type: "array" + items: + type: "integer" + required: + - "client_id" + - "client_secret" + - "refresh_token" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-amazon-seller-partner:0.2.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/amazon-seller-partner" + changelogUrl: "https://docs.airbyte.io/integrations/sources/amazon-seller-partner" + connectionSpecification: + title: "Amazon Seller Partner Spec" + type: "object" + properties: + replication_start_date: + title: "Replication Start Date" + description: "UTC date and time in the format 2017-01-25T00:00:00Z. Any\ + \ data before this date will not be replicated." + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + examples: + - "2017-01-25T00:00:00Z" + type: "string" + refresh_token: + title: "Refresh Token" + description: "The refresh token used obtained via authorization (can be\ + \ passed to the client instead)" + airbyte_secret: true + type: "string" + lwa_app_id: + title: "Lwa App Id" + description: "Your login with amazon app id" + airbyte_secret: true + type: "string" + lwa_client_secret: + title: "Lwa Client Secret" + description: "Your login with amazon client secret" + airbyte_secret: true + type: "string" + aws_access_key: + title: "Aws Access Key" + description: "AWS user access key" + airbyte_secret: true + type: "string" + aws_secret_key: + title: "Aws Secret Key" + description: "AWS user secret key" + airbyte_secret: true + type: "string" + role_arn: + title: "Role Arn" + description: "The role's arn (needs permission to 'Assume Role' STS)" + airbyte_secret: true + type: "string" + aws_environment: + title: "AWSEnvironment" + description: "An enumeration." + enum: + - "PRODUCTION" + - "SANDBOX" + type: "string" + region: + title: "AWSRegion" + description: "An enumeration." + enum: + - "AE" + - "DE" + - "PL" + - "EG" + - "ES" + - "FR" + - "IN" + - "IT" + - "NL" + - "SA" + - "SE" + - "TR" + - "UK" + - "AU" + - "JP" + - "SG" + - "US" + - "BR" + - "CA" + - "MX" + - "GB" + type: "string" + required: + - "replication_start_date" + - "refresh_token" + - "lwa_app_id" + - "lwa_client_secret" + - "aws_access_key" + - "aws_secret_key" + - "role_arn" + - "aws_environment" + - "region" + definitions: + AWSEnvironment: + title: "AWSEnvironment" + description: "An enumeration." + enum: + - "PRODUCTION" + - "SANDBOX" + type: "string" + AWSRegion: + title: "AWSRegion" + description: "An enumeration." + enum: + - "AE" + - "DE" + - "PL" + - "EG" + - "ES" + - "FR" + - "IN" + - "IT" + - "NL" + - "SA" + - "SE" + - "TR" + - "UK" + - "AU" + - "JP" + - "SG" + - "US" + - "BR" + - "CA" + - "MX" + - "GB" + type: "string" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-amplitude:0.1.3" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/amplitude" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Amplitude Spec" + type: "object" + required: + - "api_key" + - "secret_key" + - "start_date" + additionalProperties: false + properties: + api_key: + type: "string" + description: "This is the project’s API key, used for calling Amplitude’\ + s APIs" + airbyte_secret: true + secret_key: + type: "string" + description: "This is the project's secret key, which is also used for calling\ + \ Amplitude’s APIs" + airbyte_secret: true + start_date: + type: "string" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + description: "UTC date and time in the format 2021-01-25T00:00:00Z. Any\ + \ data before this date will not be replicated." + examples: + - "2021-01-25T00:00:00Z" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-apify-dataset:0.1.1" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/apify-dataset" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Apify Dataset Spec" + type: "object" + required: + - "datasetId" + additionalProperties: false + properties: + datasetId: + type: "string" + description: "ID of the dataset you would like to load to Airbyte." + clean: + type: "boolean" + description: "If set to true, only clean items will be downloaded from the\ + \ dataset. See description of what clean means in Apify API docs. If not sure, set clean to false." + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-appstore-singer:0.2.4" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/appstore" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Source Appstore Singer Spec" + type: "object" + required: + - "key_id" + - "private_key" + - "issuer_id" + - "vendor" + - "start_date" + additionalProperties: false + properties: + key_id: + type: "string" + description: "Key_id is the API key you use to connect to appstore's API." + private_key: + type: "string" + description: "Private_key is the contents of the key file you use to connect to appstore's API." + airbyte_secret: true + multiline: true + issuer_id: + type: "string" + description: "Issuer_id is used to generate the credentials to connect to appstore's\ + \ API." + vendor: + type: "string" + description: "This is the Apple ID of your account." + start_date: + type: "string" + description: "Date from which to start pulling data." + examples: + - "2020-11-16T00:00:00Z" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-asana:0.1.3" + spec: + documentationUrl: "https://docsurl.com" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Asana Spec" + type: "object" + additionalProperties: true + properties: + credentials: + title: "Authentication mechanism" + description: "Choose how to authenticate to Github" + type: "object" + oneOf: + - type: "object" + title: "Authenticate with Personal Access Token" + required: + - "personal_access_token" + properties: + option_title: + type: "string" + title: "Credentials title" + description: "PAT Credentials" + const: "PAT Credentials" + personal_access_token: + type: "string" + title: "Personal Access Token" + description: "Asana Personal Access Token (generate yours here)." + airbyte_secret: true + - type: "object" + title: "Authenticate via Asana (Oauth)" + required: + - "client_id" + - "client_secret" + - "refresh_token" + properties: + option_title: + type: "string" + title: "Credentials title" + description: "OAuth Credentials" + const: "OAuth Credentials" + client_id: + type: "string" + title: "" + description: "" + airbyte_secret: true + client_secret: + type: "string" + title: "" + description: "" + airbyte_secret: true + refresh_token: + type: "string" + title: "" + description: "" + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] + authSpecification: + auth_type: "oauth2.0" + oauth2Specification: + rootObject: + - "credentials" + - "1" + oauthFlowInitParameters: + - - "client_id" + - - "client_secret" + oauthFlowOutputParameters: + - - "refresh_token" +- dockerImage: "airbyte/source-bamboo-hr:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/bamboo-hr" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Bamboo HR Spec" + type: "object" + required: + - "subdomain" + - "api_key" + additionalProperties: false + properties: + subdomain: + type: "string" + description: "Sub Domain of bamboo hr" + api_key: + type: "string" + description: "Api key of bamboo hr" + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-bigcommerce:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/bigcommerce" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "BigCommerce Source CDK Specifications" + type: "object" + required: + - "start_date" + - "store_hash" + - "access_token" + additionalProperties: false + properties: + start_date: + type: "string" + description: "The date you would like to replicate data. Format: YYYY-MM-DD." + examples: + - "2021-01-01" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" + store_hash: + type: "string" + description: "The hash code of the store. For https://api.bigcommerce.com/stores/HASH_CODE/v3/,\ + \ The store's hash code is 'HASH_CODE'." + access_token: + type: "string" + description: "The API Access Token." + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-bigquery:0.1.4" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/source/bigquery" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "BigQuery Source Spec" + type: "object" + required: + - "project_id" + - "credentials_json" + additionalProperties: false + properties: + project_id: + type: "string" + description: "The GCP project ID for the project containing the target BigQuery\ + \ dataset." + title: "Project ID" + dataset_id: + type: "string" + description: "The BigQuery Dataset ID to look for tables to replicate from." + title: "Default Dataset ID" + credentials_json: + type: "string" + description: "The contents of the JSON service account key. Check out the\ + \ docs\ + \ if you need help generating this key." + title: "Credentials JSON" + airbyte_secret: true + supportsIncremental: true + supportsNormalization: true + supportsDBT: true + supported_destination_sync_modes: [] + supported_sync_modes: + - "overwrite" + - "append" + - "append_dedup" +- dockerImage: "airbyte/source-bing-ads:0.1.1" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/bing-ads" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Bing Ads Spec" + type: "object" + required: + - "accounts" + - "client_id" + - "client_secret" + - "customer_id" + - "developer_token" + - "refresh_token" + - "user_id" + - "reports_start_date" + - "hourly_reports" + - "daily_reports" + - "weekly_reports" + - "monthly_reports" + additionalProperties: false + properties: + accounts: + title: "Accounts" + type: "object" + description: "Account selection strategy." + oneOf: + - title: "All accounts assigned to your user" + additionalProperties: false + description: "Fetch data for all available accounts." + required: + - "selection_strategy" + properties: + selection_strategy: + type: "string" + enum: + - "all" + const: "all" + - title: "Subset of your accounts" + additionalProperties: false + description: "Fetch data for subset of account ids." + required: + - "ids" + - "selection_strategy" + properties: + selection_strategy: + type: "string" + enum: + - "subset" + const: "subset" + ids: + type: "array" + description: "List of accounts from which data will be fetched." + items: + type: "string" + minItems: 1 + uniqueItems: true + client_id: + type: "string" + description: "ID of your Microsoft Advertising client application." + airbyte_secret: true + client_secret: + type: "string" + description: "Secret of your Microsoft Advertising client application." + airbyte_secret: true + customer_id: + type: "string" + description: "User's customer ID." + developer_token: + type: "string" + description: "Developer token associated with user." + airbyte_secret: true + refresh_token: + type: "string" + description: "The long-lived Refresh token received via grant_type=refresh_token\ + \ request." + airbyte_secret: true + user_id: + type: "string" + description: "Unique user identifier." + reports_start_date: + type: "string" + format: "date" + default: "2020-01-01" + description: "From which date perform initial sync for report related streams.\ + \ In YYYY-MM-DD format" + hourly_reports: + title: "Hourly reports" + type: "boolean" + description: "The report data will be aggregated by each hour of the day." + default: false + daily_reports: + title: "Daily reports" + type: "boolean" + description: "The report data will be aggregated by each day." + default: false + weekly_reports: + title: "Weekly reports" + type: "boolean" + description: "The report data will be aggregated by each week running from\ + \ Sunday through Saturday." + default: false + monthly_reports: + title: "Monthly reports" + type: "boolean" + description: "The report data will be aggregated by each month." + default: false + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-braintree:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/braintree" + connectionSpecification: + title: "Braintree Spec" + type: "object" + properties: + merchant_id: + title: "Merchant Id" + description: "Merchant ID is the unique identifier for entire gateway account." + name: "Merchant ID" + type: "string" + public_key: + title: "Public Key" + description: "This is your user-specific public identifier for Braintree." + name: "Public key" + type: "string" + private_key: + title: "Private Key" + description: "This is your user-specific private identifier." + name: "Private Key" + airbyte_secret: true + type: "string" + start_date: + title: "Start Date" + description: "The date from which you'd like to replicate data for Braintree\ + \ API for UTC timezone, All data generated after this date will be replicated." + name: "Start date" + examples: + - "2020" + - "2020-12-30" + - "2020-11-22 20:20:05" + type: "string" + format: "date-time" + environment: + description: "Environment specifies where the data will come from." + name: "Environment" + examples: + - "sandbox" + - "production" + - "qa" + - "development" + allOf: + - $ref: "#/definitions/Environment" + required: + - "merchant_id" + - "public_key" + - "private_key" + - "environment" + definitions: + Environment: + title: "Environment" + description: "An enumeration." + enum: + - "Development" + - "Sandbox" + - "Qa" + - "Production" + type: "string" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-cart:0.1.3" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/cart" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Cart Spec" + type: "object" + required: + - "access_token" + - "start_date" + - "store_name" + additionalProperties: true + properties: + access_token: + type: "string" + airbyte_secret: true + description: "API Key. See the docs for information on how to generate this key." + store_name: + type: "string" + description: "Store name. All API URLs start with https://[mystorename.com]/api/v1/,\ + \ where [mystorename.com] is the domain name of your store." + start_date: + title: "Start Date" + type: "string" + description: "The date from which you'd like to replicate the data" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + examples: + - "2021-01-01T00:00:00Z" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-chargebee:0.1.4" + spec: + documentationUrl: "https://apidocs.chargebee.com/docs/api" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Chargebee Spec" + type: "object" + required: + - "site" + - "site_api_key" + - "start_date" + - "product_catalog" + additionalProperties: false + properties: + site: + type: "string" + title: "Site" + description: "The site prefix for your Chargebee instance." + examples: + - "airbyte-test" + site_api_key: + type: "string" + title: "API Key" + description: "The API key from your Chargebee instance." + examples: + - "test_3yzfanAXF66USdWC9wQcM555DQJkSYoppu" + airbyte_secret: true + start_date: + type: "string" + title: "Start Date" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + description: "UTC date and time in the format 2021-01-25T00:00:00Z. Any\ + \ data before this date will not be replicated." + examples: + - "2021-01-25T00:00:00Z" + product_catalog: + title: "Product Catalog" + type: "string" + description: "Product Catalog version of your Chargebee site. Instructions\ + \ on how to find your version you may find here under `API Version` section." + enum: + - "1.0" + - "2.0" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-clickhouse:0.1.4" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/clickhouse" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "ClickHouse Source Spec" + type: "object" + required: + - "host" + - "port" + - "database" + - "username" + additionalProperties: false + properties: + host: + description: "Host Endpoint of the Clickhouse Cluster" + type: "string" + port: + description: "Port of the database." + type: "integer" + minimum: 0 + maximum: 65536 + default: 8123 + examples: + - "8123" + database: + description: "Name of the database." + type: "string" + examples: + - "default" + username: + description: "Username to use to access the database." + type: "string" + password: + description: "Password associated with the username." + type: "string" + airbyte_secret: true + ssl: + title: "SSL Connection" + description: "Encrypt data using SSL." + type: "boolean" + default: true + tunnel_method: + type: "object" + title: "SSH Tunnel Method" + description: "Whether to initiate an SSH tunnel before connecting to the\ + \ database, and if so, which kind of authentication to use." + oneOf: + - title: "No Tunnel" + required: + - "tunnel_method" + properties: + tunnel_method: + description: "No ssh tunnel needed to connect to database" + type: "string" + const: "NO_TUNNEL" + order: 0 + - title: "SSH Key Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "ssh_key" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and ssh key" + type: "string" + const: "SSH_KEY_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host." + type: "string" + order: 3 + ssh_key: + title: "SSH Private Key" + description: "OS-level user account ssh key credentials in RSA PEM\ + \ format ( created with ssh-keygen -t rsa -m PEM -f myuser_rsa )" + type: "string" + airbyte_secret: true + multiline: true + order: 4 + - title: "Password Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "tunnel_user_password" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and password authentication" + type: "string" + const: "SSH_PASSWORD_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host" + type: "string" + order: 3 + tunnel_user_password: + title: "Password" + description: "OS-level password for logging into the jump server host" + type: "string" + airbyte_secret: true + order: 4 + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-close-com:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/close-com" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Close.com Spec" + type: "object" + required: + - "api_key" + additionalProperties: false + properties: + api_key: + type: "string" + description: "Close.com API key (usually starts with 'api_'; find yours\ + \ here)." + airbyte_secret: true + start_date: + type: "string" + description: "The start date to sync data. Leave blank for full sync. Format:\ + \ YYYY-MM-DD." + examples: + - "2021-01-01" + default: "2021-01-01" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-cockroachdb:0.1.2" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/postgres" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Cockroach Source Spec" + type: "object" + required: + - "host" + - "port" + - "database" + - "username" + additionalProperties: false + properties: + host: + title: "Host" + description: "Hostname of the database." + type: "string" + order: 0 + port: + title: "Port" + description: "Port of the database." + type: "integer" + minimum: 0 + maximum: 65536 + default: 5432 + examples: + - "5432" + order: 1 + database: + title: "DB Name" + description: "Name of the database." + type: "string" + order: 2 + username: + title: "User" + description: "Username to use to access the database." + type: "string" + order: 3 + password: + title: "Password" + description: "Password associated with the username." + type: "string" + airbyte_secret: true + order: 4 + ssl: + title: "Connect using SSL" + description: "Encrypt client/server communications for increased security." + type: "boolean" + default: false + order: 5 + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-dixa:0.1.1" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/dixa" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Dixa Spec" + type: "object" + required: + - "api_token" + - "start_date" + additionalProperties: false + properties: + api_token: + type: "string" + description: "Dixa API token" + airbyte_secret: true + start_date: + type: "string" + description: "The connector pulls records updated from this date onwards." + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" + examples: + - "YYYY-MM-DD" + batch_size: + type: "integer" + description: "Number of days to batch into one request. Max 31." + pattern: "^[0-9]{1,2}$" + examples: + - 1 + - 31 + default: 31 + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-drift:0.2.3" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/drift" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Drift Spec" + type: "object" + required: + - "access_token" + additionalProperties: false + properties: + access_token: + type: "string" + description: "Drift Access Token. See the docs for more information on how to generate this key." + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-exchange-rates:0.2.3" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/exchangeratesapi" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "ratesapi.io Source Spec" + type: "object" + required: + - "start_date" + - "access_key" + additionalProperties: false + properties: + start_date: + type: "string" + description: "Start getting data from that date." + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" + examples: + - "YYYY-MM-DD" + access_key: + type: "string" + description: "Your API Access Key. See here. The key is case sensitive." + airbyte_secret: true + base: + type: "string" + description: "ISO reference currency. See here. Free plan doesn't support Source Currency Switching, default\ + \ base currency is EUR" + examples: + - "EUR" + - "USD" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-facebook-marketing:0.2.21" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/facebook-marketing" + changelogUrl: "https://docs.airbyte.io/integrations/sources/facebook-marketing" + connectionSpecification: + title: "Source Facebook Marketing" + type: "object" + properties: + account_id: + title: "Account Id" + description: "The Facebook Ad account ID to use when pulling data from the\ + \ Facebook Marketing API." + type: "string" + access_token: + title: "Access Token" + description: "The value of the access token generated. See the docs\ + \ for more information" + airbyte_secret: true + type: "string" + start_date: + title: "Start Date" + description: "The date from which you'd like to replicate data for AdCreatives\ + \ and AdInsights APIs, in the format YYYY-MM-DDT00:00:00Z. All data generated\ + \ after this date will be replicated." + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + examples: + - "2017-01-25T00:00:00Z" + type: "string" + format: "date-time" + end_date: + title: "End Date" + description: "The date until which you'd like to replicate data for AdCreatives\ + \ and AdInsights APIs, in the format YYYY-MM-DDT00:00:00Z. All data generated\ + \ between start_date and this date will be replicated. Not setting this\ + \ option will result in always syncing the latest data." + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + examples: + - "2017-01-26T00:00:00Z" + type: "string" + format: "date-time" + include_deleted: + title: "Include Deleted" + description: "Include data from deleted campaigns, ads, and adsets." + default: false + type: "boolean" + insights_lookback_window: + title: "Insights Lookback Window" + description: "The attribution window for the actions" + default: 28 + minimum: 0 + maximum: 28 + type: "integer" + insights_days_per_job: + title: "Insights Days Per Job" + description: "Number of days to sync in one job. The more data you have\ + \ - the smaller you want this parameter to be." + default: 7 + minimum: 1 + maximum: 30 + type: "integer" + custom_insights: + title: "Custom Insights" + description: "A list wich contains insights entries, each entry must have\ + \ a name and can contains fields, breakdowns or action_breakdowns)" + type: "array" + items: + title: "InsightConfig" + type: "object" + properties: + name: + title: "Name" + description: "The name value of insight" + type: "string" + fields: + title: "Fields" + description: "A list of chosen fields for fields parameter" + default: [] + type: "array" + items: + type: "string" + breakdowns: + title: "Breakdowns" + description: "A list of chosen breakdowns for breakdowns" + default: [] + type: "array" + items: + type: "string" + action_breakdowns: + title: "Action Breakdowns" + description: "A list of chosen action_breakdowns for action_breakdowns" + default: [] + type: "array" + items: + type: "string" + required: + - "name" + required: + - "account_id" + - "access_token" + - "start_date" + definitions: + InsightConfig: + title: "InsightConfig" + type: "object" + properties: + name: + title: "Name" + description: "The name value of insight" + type: "string" + fields: + title: "Fields" + description: "A list of chosen fields for fields parameter" + default: [] + type: "array" + items: + type: "string" + breakdowns: + title: "Breakdowns" + description: "A list of chosen breakdowns for breakdowns" + default: [] + type: "array" + items: + type: "string" + action_breakdowns: + title: "Action Breakdowns" + description: "A list of chosen action_breakdowns for action_breakdowns" + default: [] + type: "array" + items: + type: "string" + required: + - "name" + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "append" + authSpecification: + auth_type: "oauth2.0" + oauth2Specification: + rootObject: [] + oauthFlowInitParameters: [] + oauthFlowOutputParameters: + - - "access_token" +- dockerImage: "airbyte/source-facebook-pages:0.1.2" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/facebook-pages" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Facebook Pages Spec" + type: "object" + required: + - "access_token" + - "page_id" + additionalProperties: false + properties: + access_token: + type: "string" + description: "Facebook Page Access Token" + airbyte_secret: true + page_id: + type: "string" + description: "Page ID" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] + authSpecification: + auth_type: "oauth2.0" + oauth2Specification: + rootObject: [] + oauthFlowInitParameters: [] + oauthFlowOutputParameters: + - - "access_token" +- dockerImage: "airbyte/source-file:0.2.6" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/file" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "File Source Spec" + type: "object" + additionalProperties: false + required: + - "dataset_name" + - "format" + - "url" + - "provider" + properties: + dataset_name: + type: "string" + description: "Name of the final table where to replicate this file (should\ + \ include only letters, numbers dash and underscores)" + format: + type: "string" + enum: + - "csv" + - "json" + - "jsonl" + - "excel" + - "feather" + - "parquet" + default: "csv" + description: "File Format of the file to be replicated (Warning: some format\ + \ may be experimental, please refer to docs)." + reader_options: + type: "string" + description: "This should be a valid JSON string used by each reader/parser\ + \ to provide additional options and tune its behavior" + examples: + - "{}" + - "{'sep': ' '}" + url: + type: "string" + description: "URL path to access the file to be replicated" + provider: + type: "object" + description: "Storage Provider or Location of the file(s) to be replicated." + default: "Public Web" + oneOf: + - title: "HTTPS: Public Web" + required: + - "storage" + properties: + storage: + type: "string" + enum: + - "HTTPS" + default: "HTTPS" + - title: "GCS: Google Cloud Storage" + required: + - "storage" + properties: + storage: + type: "string" + enum: + - "GCS" + default: "GCS" + service_account_json: + type: "string" + description: "In order to access private Buckets stored on Google\ + \ Cloud, this connector would need a service account json credentials\ + \ with the proper permissions as described here. Please generate the credentials.json\ + \ file and copy/paste its content to this field (expecting JSON\ + \ formats). If accessing publicly available data, this field is\ + \ not necessary." + - title: "S3: Amazon Web Services" + required: + - "storage" + properties: + storage: + type: "string" + enum: + - "S3" + default: "S3" + aws_access_key_id: + type: "string" + description: "In order to access private Buckets stored on AWS S3,\ + \ this connector would need credentials with the proper permissions.\ + \ If accessing publicly available data, this field is not necessary." + aws_secret_access_key: + type: "string" + description: "In order to access private Buckets stored on AWS S3,\ + \ this connector would need credentials with the proper permissions.\ + \ If accessing publicly available data, this field is not necessary." + airbyte_secret: true + - title: "AzBlob: Azure Blob Storage" + required: + - "storage" + - "storage_account" + properties: + storage: + type: "string" + enum: + - "AzBlob" + default: "AzBlob" + storage_account: + type: "string" + description: "The globally unique name of the storage account that\ + \ the desired blob sits within. See here for more details." + sas_token: + type: "string" + description: "To access Azure Blob Storage, this connector would need\ + \ credentials with the proper permissions. One option is a SAS (Shared\ + \ Access Signature) token. If accessing publicly available data,\ + \ this field is not necessary." + airbyte_secret: true + shared_key: + type: "string" + description: "To access Azure Blob Storage, this connector would need\ + \ credentials with the proper permissions. One option is a storage\ + \ account shared key (aka account key or access key). If accessing\ + \ publicly available data, this field is not necessary." + airbyte_secret: true + - title: "SSH: Secure Shell" + required: + - "storage" + - "user" + - "host" + properties: + storage: + type: "string" + enum: + - "SSH" + default: "SSH" + user: + type: "string" + password: + type: "string" + airbyte_secret: true + host: + type: "string" + port: + type: "string" + default: "22" + - title: "SCP: Secure copy protocol" + required: + - "storage" + - "user" + - "host" + properties: + storage: + type: "string" + enum: + - "SCP" + default: "SCP" + user: + type: "string" + password: + type: "string" + airbyte_secret: true + host: + type: "string" + port: + type: "string" + default: "22" + - title: "SFTP: Secure File Transfer Protocol" + required: + - "storage" + - "user" + - "host" + properties: + storage: + type: "string" + enum: + - "SFTP" + default: "SFTP" + user: + type: "string" + password: + type: "string" + airbyte_secret: true + host: + type: "string" + port: + type: "string" + default: "22" + - title: "Local Filesystem (limited)" + required: + - "storage" + properties: + storage: + type: "string" + description: "WARNING: Note that local storage URL available for read\ + \ must start with the local mount \"/local/\" at the moment until\ + \ we implement more advanced docker mounting options..." + enum: + - "local" + default: "local" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-freshdesk:0.2.7" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/freshdesk" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Freshdesk Spec" + type: "object" + required: + - "domain" + - "api_key" + additionalProperties: false + properties: + domain: + type: "string" + description: "Freshdesk domain" + examples: + - "myaccount.freshdesk.com" + pattern: + - "^[a-zA-Z0-9._-]*\\.freshdesk\\.com$" + api_key: + type: "string" + description: "Freshdesk API Key. See the docs for more information on how to obtain this key." + airbyte_secret: true + requests_per_minute: + title: "Requests per minute" + type: "integer" + description: "Number of requests per minute that this source allowed to\ + \ use." + start_date: + title: "Start date" + description: "Date from which to start pulling data." + format: "date-time" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + examples: + - "2020-12-01T00:00:00Z" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-freshservice:0.1.0" + spec: + documentationUrl: "https://hub.docker.com/r/airbyte/source-freshservice" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Freshservice Spec" + type: "object" + required: + - "domain_name" + - "api_key" + - "start_date" + additionalProperties: false + properties: + domain_name: + type: "string" + description: "Freshservice domain" + examples: + - "mydomain.freshservice.com" + api_key: + title: "Api Key" + type: "string" + description: "Your API Access Key. See here. The key is case sensitive." + airbyte_secret: true + start_date: + title: "Replication Start Date" + type: "string" + description: "UTC date and time in the format 2020-10-01T00:00:00Z. Any\ + \ data before this date will not be replicated." + examples: + - "2020-10-01T00:00:00Z" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-github:0.2.3" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/github" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Github Source Spec" + type: "object" + required: + - "start_date" + - "repository" + additionalProperties: true + properties: + credentials: + title: "Authentication mechanism" + description: "Choose how to authenticate to Github" + type: "object" + oneOf: + - type: "object" + title: "Authenticate via Github (Oauth)" + required: + - "access_token" + properties: + option_title: + type: "string" + title: "Credentials title" + description: "OAuth Credentials" + const: "OAuth Credentials" + access_token: + type: "string" + title: "Access Token" + description: "Oauth access token" + airbyte_secret: true + - type: "object" + title: "Authenticate with Personal Access Token" + required: + - "personal_access_token" + properties: + option_title: + type: "string" + title: "Credentials title" + description: "PAT Credentials" + const: "PAT Credentials" + personal_access_token: + type: "string" + title: "Personal Access Tokens" + description: "Log into Github and then generate a personal access token. To load balance your API quota consumption\ + \ across multiple API tokens, input multiple tokens separated with\ + \ \",\"" + airbyte_secret: true + repository: + type: "string" + examples: + - "airbytehq/airbyte" + - "airbytehq/*" + title: "Github repositories" + description: "Space-delimited list of GitHub repositories/organizations,\ + \ e.g. `airbytehq/airbyte` for single repository and `airbytehq/*` for\ + \ get all repositories from organization" + start_date: + type: "string" + title: "Start date" + description: "The date from which you'd like to replicate data for GitHub\ + \ in the format YYYY-MM-DDT00:00:00Z. All data generated after this date\ + \ will be replicated. Note that it will be used only in the following\ + \ incremental streams: comments, commits and issues." + examples: + - "2021-03-01T00:00:00Z" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + branch: + type: "string" + title: "Branch" + examples: + - "airbytehq/airbyte/master" + description: "Space-delimited list of GitHub repository branches to pull\ + \ commits for, e.g. `airbytehq/airbyte/master`. If no branches are specified\ + \ for a repository, the default branch will be pulled." + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] + authSpecification: + auth_type: "oauth2.0" + oauth2Specification: + rootObject: + - "credentials" + - "0" + oauthFlowInitParameters: [] + oauthFlowOutputParameters: + - - "access_token" +- dockerImage: "airbyte/source-gitlab:0.1.2" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/gitlab" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Source Gitlab Singer Spec" + type: "object" + required: + - "api_url" + - "private_token" + - "start_date" + additionalProperties: false + properties: + api_url: + type: "string" + examples: + - "gitlab.com" + description: "Please enter your basic URL from Gitlab instance" + private_token: + type: "string" + description: "Log into your Gitlab account and then generate a personal\ + \ Access Token." + airbyte_secret: true + groups: + type: "string" + examples: + - "airbyte.io" + description: "Space-delimited list of groups. e.g. airbyte.io" + projects: + type: "string" + examples: + - "airbyte.io/documentation" + description: "Space-delimited list of projects. e.g. airbyte.io/documentation\ + \ meltano/tap-gitlab" + start_date: + type: "string" + description: "The date from which you'd like to replicate data for Gitlab\ + \ API, in the format YYYY-MM-DDT00:00:00Z. All data generated after this\ + \ date will be replicated." + examples: + - "2021-03-01T00:00:00Z" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-google-ads:0.1.15" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/google-ads" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Google Ads Spec" + type: "object" + required: + - "credentials" + - "start_date" + - "customer_id" + additionalProperties: true + properties: + credentials: + type: "object" + title: "Google Credentials" + required: + - "developer_token" + - "client_id" + - "client_secret" + - "refresh_token" + properties: + developer_token: + type: "string" + title: "Developer Token" + description: "Developer token granted by Google to use their APIs. More\ + \ instruction on how to find this value in our docs" + airbyte_secret: true + client_id: + type: "string" + title: "Client Id" + description: "Google client id. More instruction on how to find this\ + \ value in our docs" + client_secret: + type: "string" + title: "Client Secret" + description: "Google client secret. More instruction on how to find\ + \ this value in our docs" + airbyte_secret: true + access_token: + type: "string" + title: "Access Token" + description: "Access token generated using developer_token, oauth_client_id,\ + \ and oauth_client_secret. More instruction on how to find this value\ + \ in our docs" + airbyte_secret: true + refresh_token: + type: "string" + title: "Refresh Token" + description: "Refresh token generated using developer_token, oauth_client_id,\ + \ and oauth_client_secret. More instruction on how to find this value\ + \ in our docs" + airbyte_secret: true + customer_id: + title: "Customer Id" + type: "string" + description: "Customer id must be specified as a 10-digit number without\ + \ dashes. More instruction on how to find this value in our docs" + login_customer_id: + type: "string" + title: "Login Customer ID" + description: "If your access to the customer account is through a manager\ + \ account, this field is required and must be set to the customer ID of\ + \ the manager account (10-digit number without dashes). More information\ + \ about this field you can see here" + start_date: + type: "string" + title: "Start Date" + description: "UTC date and time in the format 2017-01-25. Any data before\ + \ this date will not be replicated." + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" + examples: + - "2017-01-25" + conversion_window_days: + title: "Conversion Window" + type: "integer" + description: "Define the historical replication lookback window in days" + minimum: 0 + maximum: 1095 + default: 14 + examples: + - 14 + custom_queries: + type: "array" + title: "Custom GAQL Queries" + items: + type: "object" + properties: + query: + type: "string" + title: "Custom query" + description: "A custom defined GAQL query for building the report.\ + \ Should not contain segments.date expression as it used by incremental\ + \ streams" + examples: + - "SELECT segments.ad_destination_type, campaign.advertising_channel_sub_type\ + \ FROM campaign WHERE campaign.status = 'PAUSED'" + table_name: + type: "string" + title: "Destination table name" + description: "The table name in your destination database for choosen\ + \ query." + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] + authSpecification: + auth_type: "oauth2.0" + oauth2Specification: + rootObject: + - "credentials" + oauthFlowInitParameters: + - - "client_id" + - - "client_secret" + - - "developer_token" + oauthFlowOutputParameters: + - - "access_token" + - - "refresh_token" +- dockerImage: "airbyte/source-google-analytics-v4:0.1.9" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/google-analytics-v4" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Google Analytics V4 Spec" + type: "object" + required: + - "view_id" + - "start_date" + additionalProperties: true + properties: + view_id: + type: "string" + title: "View ID" + description: "The ID for the Google Analytics View you want to fetch data\ + \ from. This can be found from the Google Analytics Account Explorer." + airbyte_secret: true + start_date: + type: "string" + title: "Start Date" + description: "A date in the format YYYY-MM-DD." + examples: + - "2020-06-01" + window_in_days: + type: "integer" + description: "The amount of days for each data-chunk begining from start_date.\ + \ Bigger the value - faster the fetch. (Min=1, as for a Day; Max=364,\ + \ as for a Year)." + examples: + - 30 + - 60 + - 90 + - 120 + - 200 + - 364 + default: 90 + custom_reports: + title: "Custom Reports" + type: "string" + description: "A JSON array describing the custom reports you want to sync\ + \ from GA. Check out the docs to get more information about this field." + credentials: + type: "object" + oneOf: + - title: "Authenticate via Google (Oauth)" + type: "object" + required: + - "client_id" + - "client_secret" + - "refresh_token" + properties: + auth_type: + type: "string" + const: "Client" + enum: + - "Client" + default: "Client" + order: 0 + client_id: + title: "Client ID" + type: "string" + description: "The Client ID of your developer application" + airbyte_secret: true + client_secret: + title: "Client Secret" + type: "string" + description: "The client secret of your developer application" + airbyte_secret: true + refresh_token: + title: "Refresh Token" + type: "string" + description: "A refresh token generated using the above client ID\ + \ and secret" + airbyte_secret: true + access_token: + title: "Access Token" + type: "string" + description: "A access token generated using the above client ID,\ + \ secret and refresh_token" + airbyte_secret: true + - type: "object" + title: "Service Account Key Authentication" + required: + - "credentials_json" + properties: + auth_type: + type: "string" + const: "Service" + enum: + - "Service" + default: "Service" + order: 0 + credentials_json: + type: "string" + description: "The JSON key of the service account to use for authorization" + examples: + - "{ \"type\": \"service_account\", \"project_id\": YOUR_PROJECT_ID,\ + \ \"private_key_id\": YOUR_PRIVATE_KEY, ... }" + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] + authSpecification: + auth_type: "oauth2.0" + oauth2Specification: + rootObject: + - "credentials" + - "0" + oauthFlowInitParameters: + - - "client_id" + - - "client_secret" + oauthFlowOutputParameters: + - - "access_token" + - - "refresh_token" +- dockerImage: "airbyte/source-google-directory:0.1.5" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/google-directory" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Google Directory Spec" + type: "object" + required: + - "credentials_json" + - "email" + additionalProperties: false + properties: + credentials_json: + type: "string" + description: "The contents of the JSON service account key. See the docs for more information on how to generate this key." + airbyte_secret: true + email: + type: "string" + description: "The email of the user, which has permissions to access the\ + \ Google Workspace Admin APIs." + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-google-search-console:0.1.6" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/google-search-console" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Google Search Console Spec" + type: "object" + additionalProperties: false + required: + - "site_urls" + - "start_date" + - "authorization" + properties: + site_urls: + type: "array" + items: + type: "string" + description: "Website URLs property; do not include the domain-level property\ + \ in the list" + examples: + - "https://example1.com" + - "https://example2.com" + start_date: + type: "string" + description: "The date from which you'd like to replicate data in the format\ + \ YYYY-MM-DD." + examples: + - "2021-01-01" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" + end_date: + type: "string" + description: "The date from which you'd like to replicate data in the format\ + \ YYYY-MM-DD. Must be greater or equal start_date field" + examples: + - "2021-12-12" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" + authorization: + type: "object" + title: "Authentication Type" + oneOf: + - title: "Authenticate via Google (Oauth)" + type: "object" + required: + - "auth_type" + - "client_id" + - "client_secret" + - "refresh_token" + properties: + auth_type: + type: "string" + const: "Client" + enum: + - "Client" + default: "Client" + order: 0 + client_id: + title: "Client ID" + type: "string" + description: "The Client ID of your developer application" + airbyte_secret: true + client_secret: + title: "Client Secret" + type: "string" + description: "The client secret of your developer application" + airbyte_secret: true + access_token: + title: "Access Token" + type: "string" + description: "An access token generated using the above client ID\ + \ and secret" + airbyte_secret: true + refresh_token: + title: "Refresh Token" + type: "string" + description: "A refresh token generated using the above client ID\ + \ and secret" + airbyte_secret: true + - type: "object" + title: "Service Account Key Authentication" + required: + - "auth_type" + - "service_account_info" + - "email" + properties: + auth_type: + type: "string" + const: "Service" + enum: + - "Service" + default: "Service" + order: 0 + service_account_info: + title: "Service Account JSON Key" + type: "string" + description: "The JSON key of the service account to use for authorization" + examples: + - "{ \"type\": \"service_account\", \"project_id\": YOUR_PROJECT_ID,\ + \ \"private_key_id\": YOUR_PRIVATE_KEY, ... }" + email: + title: "Admin Email" + type: "string" + description: "The email of the user which has permissions to access\ + \ the Google Workspace Admin APIs." + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] + authSpecification: + auth_type: "oauth2.0" + oauth2Specification: + rootObject: + - "authorization" + - "0" + oauthFlowInitParameters: + - - "client_id" + - - "client_secret" + oauthFlowOutputParameters: + - - "access_token" + - - "refresh_token" +- dockerImage: "airbyte/source-google-sheets:0.2.6" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/google-sheets" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Stripe Source Spec" + type: "object" + required: + - "spreadsheet_id" + additionalProperties: true + properties: + spreadsheet_id: + type: "string" + description: "The ID of the spreadsheet to be replicated." + credentials: + type: "object" + oneOf: + - title: "Authenticate via Google (Oauth)" + type: "object" + required: + - "auth_type" + - "client_id" + - "client_secret" + - "refresh_token" + properties: + auth_type: + type: "string" + const: "Client" + client_id: + title: "Client ID" + type: "string" + description: "The Client ID of your developer application" + airbyte_secret: true + client_secret: + title: "Client Secret" + type: "string" + description: "The client secret of your developer application" + airbyte_secret: true + refresh_token: + title: "Refresh Token" + type: "string" + description: "A refresh token generated using the above client ID\ + \ and secret" + airbyte_secret: true + - title: "Service Account Key Authentication" + type: "object" + required: + - "auth_type" + - "service_account_info" + properties: + auth_type: + type: "string" + const: "Service" + service_account_info: + type: "string" + description: "The JSON key of the service account to use for authorization" + examples: + - "{ \"type\": \"service_account\", \"project_id\": YOUR_PROJECT_ID,\ + \ \"private_key_id\": YOUR_PRIVATE_KEY, ... }" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] + authSpecification: + auth_type: "oauth2.0" + oauth2Specification: + rootObject: + - "credentials" + - 0 + oauthFlowInitParameters: + - - "client_id" + - - "client_secret" + oauthFlowOutputParameters: + - - "refresh_token" +- dockerImage: "airbyte/source-google-workspace-admin-reports:0.1.5" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/google-workspace-admin-reports" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Google Directory Spec" + type: "object" + required: + - "credentials_json" + - "email" + additionalProperties: false + properties: + credentials_json: + type: "string" + description: "The contents of the JSON service account key. See the docs for more information on how to generate this key." + airbyte_secret: true + email: + type: "string" + description: "The email of the user, which has permissions to access the\ + \ Google Workspace Admin APIs." + lookback: + type: "integer" + minimum: 0 + maximum: 180 + description: "Sets the range of time shown in the report. Reports API allows\ + \ from up to 180 days ago. " + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-greenhouse:0.2.5" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/greenhouse" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Greenhouse Spec" + type: "object" + required: + - "api_key" + additionalProperties: false + properties: + api_key: + type: "string" + description: "Greenhouse API Key. See the docs for more information on how to generate this key." + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-harvest:0.1.5" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/harvest" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Harvest Spec" + type: "object" + required: + - "api_token" + - "account_id" + - "replication_start_date" + additionalProperties: false + properties: + api_token: + title: "API Token" + description: "Harvest API Token." + airbyte_secret: true + type: "string" + account_id: + title: "Account ID" + description: "Harvest account ID. Required for all Harvest requests in pair\ + \ with API Key" + airbyte_secret: true + type: "string" + replication_start_date: + title: "Replication Start Date" + description: "UTC date and time in the format 2017-01-25T00:00:00Z. Any\ + \ data before this date will not be replicated." + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + examples: + - "2017-01-25T00:00:00Z" + type: "string" + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "append" +- dockerImage: "airbyte/source-hubspot:0.1.21" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/hubspot" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Hubspot Source Spec" + type: "object" + required: + - "start_date" + - "credentials" + additionalProperties: false + properties: + start_date: + type: "string" + title: "Replication start date" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + description: "UTC date and time in the format 2017-01-25T00:00:00Z. Any\ + \ data before this date will not be replicated." + examples: + - "2017-01-25T00:00:00Z" + credentials: + title: "Authentication mechanism" + description: "Choose either to provide the API key or the OAuth2.0 credentials" + type: "object" + oneOf: + - type: "object" + title: "Authenticate via Hubspot (Oauth)" + required: + - "redirect_uri" + - "client_id" + - "client_secret" + - "refresh_token" + - "access_token" + - "credentials_title" + properties: + credentials_title: + type: "string" + title: "Credentials title" + description: "Name of the credentials set" + const: "OAuth Credentials" + enum: + - "OAuth Credentials" + default: "OAuth Credentials" + order: 0 + client_id: + title: "Client ID" + description: "Hubspot client_id. See our docs if you need help finding this id." + type: "string" + examples: + - "123456789000" + client_secret: + title: "Client Secret" + description: "Hubspot client_secret. See our docs if you need help finding this secret." + type: "string" + examples: + - "secret" + airbyte_secret: true + refresh_token: + title: "Refresh token" + description: "Hubspot refresh_token. See our docs if you need help generating the token." + type: "string" + examples: + - "refresh_token" + airbyte_secret: true + - type: "object" + title: "API key" + required: + - "api_key" + - "credentials_title" + properties: + credentials_title: + type: "string" + title: "Credentials title" + description: "Name of the credentials set" + const: "API Key Credentials" + enum: + - "API Key Credentials" + default: "API Key Credentials" + order: 0 + api_key: + title: "API key" + description: "Hubspot API Key. See our docs if you need help finding this key." + type: "string" + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] + authSpecification: + auth_type: "oauth2.0" + oauth2Specification: + rootObject: + - "credentials" + - "0" + oauthFlowInitParameters: + - - "client_id" + - - "client_secret" + - - "refresh_token" + oauthFlowOutputParameters: + - - "refresh_token" +- dockerImage: "airbyte/source-db2:0.1.1" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/db2" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "IBM Db2 Source Spec" + type: "object" + required: + - "host" + - "port" + - "db" + - "username" + - "password" + additionalProperties: false + properties: + host: + description: "Host of the Db2." + type: "string" + port: + description: "Port of the database." + type: "integer" + minimum: 0 + maximum: 65536 + default: 8123 + examples: + - "8123" + db: + description: "Name of the database." + type: "string" + examples: + - "default" + username: + description: "Username to use to access the database." + type: "string" + password: + description: "Password associated with the username." + type: "string" + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-instagram:0.1.9" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/instagram" + changelogUrl: "https://docs.airbyte.io/integrations/sources/instagram" + connectionSpecification: + title: "Source Instagram" + type: "object" + properties: + start_date: + title: "Start Date" + description: "The date from which you'd like to replicate data for User\ + \ Insights, in the format YYYY-MM-DDT00:00:00Z. All data generated after\ + \ this date will be replicated." + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + examples: + - "2017-01-25T00:00:00Z" + type: "string" + format: "date-time" + access_token: + title: "Access Token" + description: "The value of the access token generated. See the docs for\ + \ more information" + airbyte_secret: true + type: "string" + required: + - "start_date" + - "access_token" + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "append" + authSpecification: + auth_type: "oauth2.0" + oauth2Specification: + rootObject: [] + oauthFlowInitParameters: [] + oauthFlowOutputParameters: + - - "access_token" +- dockerImage: "airbyte/source-intercom:0.1.6" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/intercom" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Source Intercom Spec" + type: "object" + required: + - "access_token" + - "start_date" + additionalProperties: false + properties: + access_token: + type: "string" + description: "Intercom Access Token. See the docs for more information on how to obtain this key." + airbyte_secret: true + start_date: + type: "string" + description: "The date from which you'd like to replicate data for Intercom\ + \ API, in the format YYYY-MM-DDT00:00:00Z. All data generated after this\ + \ date will be replicated." + examples: + - "2020-11-16T00:00:00Z" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-iterable:0.1.9" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/iterable" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Iterable Spec" + type: "object" + required: + - "start_date" + - "api_key" + additionalProperties: false + properties: + start_date: + type: "string" + description: "The date from which you'd like to replicate data for Iterable,\ + \ in the format YYYY-MM-DDT00:00:00Z. All data generated after this date\ + \ will be replicated." + examples: + - "2021-04-01T00:00:00Z" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + api_key: + type: "string" + description: "Iterable API Key. See the docs for more information on how to obtain this key." + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-jira:0.2.14" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/jira" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Jira Spec" + type: "object" + required: + - "api_token" + - "domain" + - "email" + additionalProperties: true + properties: + api_token: + type: "string" + description: "Jira API Token. See the docs for more information on how to generate this key." + airbyte_secret: true + domain: + type: "string" + examples: + - "domainname.atlassian.net" + pattern: "^[a-zA-Z0-9._-]*\\.atlassian\\.net$" + description: "Domain for your Jira account, e.g. airbyteio.atlassian.net" + email: + type: "string" + description: "The user email for your Jira account" + projects: + type: "array" + title: "Projects" + items: + type: "string" + examples: + - "PROJ1" + - "PROJ2" + description: "Comma-separated list of Jira project keys to replicate data\ + \ for" + start_date: + type: "string" + title: "Start Date" + description: "The date from which you'd like to replicate data for Jira\ + \ in the format YYYY-MM-DDT00:00:00Z. All data generated after this date\ + \ will be replicated. Note that it will be used only in the following\ + \ incremental streams: issues." + examples: + - "2021-03-01T00:00:00Z" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + additional_fields: + type: "array" + title: "Additional Fields" + items: + type: "string" + description: "Comma-separated list of additional fields to include in replicating\ + \ issues" + examples: + - "Field A" + - "Field B" + expand_issue_changelog: + type: "boolean" + title: "Expand Issue Changelog" + description: "Expand the changelog when replicating issues" + default: false + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-kafka:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/kafka" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Kafka Source Spec" + type: "object" + required: + - "bootstrap_servers" + - "subscription" + - "protocol" + additionalProperties: false + properties: + bootstrap_servers: + title: "Bootstrap servers" + description: "A list of host/port pairs to use for establishing the initial\ + \ connection to the Kafka cluster. The client will make use of all servers\ + \ irrespective of which servers are specified here for bootstrapping—this\ + \ list only impacts the initial hosts used to discover the full set of\ + \ servers. This list should be in the form host1:port1,host2:port2,....\ + \ Since these servers are just used for the initial connection to discover\ + \ the full cluster membership (which may change dynamically), this list\ + \ need not contain the full set of servers (you may want more than one,\ + \ though, in case a server is down)." + type: "string" + examples: + - "kafka-broker1:9092,kafka-broker2:9092" + subscription: + title: "Subscribe method" + type: "object" + description: "You can choose to manually assign a list of partitions, or\ + \ subscribe to all topics matching specified pattern to get dynamically\ + \ assigned partitions" + oneOf: + - title: "Manually assign a list of partitions" + required: + - "subscription_type" + - "topic_partitions" + properties: + subscription_type: + description: "Manually assign a list of partitions to this consumer.\ + \ This interface does not allow for incremental assignment and will\ + \ replace the previous assignment (if there is one).\nIf the given\ + \ list of topic partitions is empty, it is treated the same as unsubscribe()." + type: "string" + const: "assign" + enum: + - "assign" + default: "assign" + topic_partitions: + title: "List of topic:partition pairs" + type: "string" + examples: + - "sample.topic:0, sample.topic:1" + - title: "Subscribe to all topics matching specified pattern" + required: + - "subscription_type" + - "topic_pattern" + properties: + subscription_type: + description: "Topic pattern from which the records will be read." + type: "string" + const: "subscribe" + enum: + - "subscribe" + default: "subscribe" + topic_pattern: + title: "Topic pattern" + type: "string" + examples: + - "sample.topic" + test_topic: + title: "Test topic" + description: "Topic to test if Airbyte can consume messages." + type: "string" + examples: + - "test.topic" + group_id: + title: "Group ID" + description: "Group id." + type: "string" + examples: + - "group.id" + max_poll_records: + title: "Max poll records" + description: "The maximum number of records returned in a single call to\ + \ poll(). Note, that max_poll_records does not impact the underlying fetching\ + \ behavior. The consumer will cache the records from each fetch request\ + \ and returns them incrementally from each poll." + type: "integer" + default: 500 + protocol: + title: "Protocol" + type: "object" + description: "Protocol used to communicate with brokers." + oneOf: + - title: "PLAINTEXT" + required: + - "security_protocol" + properties: + security_protocol: + type: "string" + enum: + - "PLAINTEXT" + default: "PLAINTEXT" + - title: "SASL PLAINTEXT" + required: + - "security_protocol" + - "sasl_mechanism" + - "sasl_jaas_config" + properties: + security_protocol: + type: "string" + enum: + - "SASL_PLAINTEXT" + default: "SASL_PLAINTEXT" + sasl_mechanism: + title: "SASL mechanism" + description: "SASL mechanism used for client connections. This may\ + \ be any mechanism for which a security provider is available." + type: "string" + default: "PLAIN" + enum: + - "PLAIN" + sasl_jaas_config: + title: "SASL JAAS config" + description: "JAAS login context parameters for SASL connections in\ + \ the format used by JAAS configuration files." + type: "string" + default: "" + airbyte_secret: true + - title: "SASL SSL" + required: + - "security_protocol" + - "sasl_mechanism" + - "sasl_jaas_config" + properties: + security_protocol: + type: "string" + enum: + - "SASL_SSL" + default: "SASL_SSL" + sasl_mechanism: + title: "SASL mechanism" + description: "SASL mechanism used for client connections. This may\ + \ be any mechanism for which a security provider is available." + type: "string" + default: "GSSAPI" + enum: + - "GSSAPI" + - "OAUTHBEARER" + - "SCRAM-SHA-256" + sasl_jaas_config: + title: "SASL JAAS config" + description: "JAAS login context parameters for SASL connections in\ + \ the format used by JAAS configuration files." + type: "string" + default: "" + airbyte_secret: true + client_id: + title: "Client ID" + description: "An id string to pass to the server when making requests. The\ + \ purpose of this is to be able to track the source of requests beyond\ + \ just ip/port by allowing a logical application name to be included in\ + \ server-side request logging." + type: "string" + examples: + - "airbyte-consumer" + enable_auto_commit: + title: "Enable auto commit" + description: "If true the consumer's offset will be periodically committed\ + \ in the background." + type: "boolean" + default: true + auto_commit_interval_ms: + title: "Auto commit interval ms" + description: "The frequency in milliseconds that the consumer offsets are\ + \ auto-committed to Kafka if enable.auto.commit is set to true." + type: "integer" + default: 5000 + client_dns_lookup: + title: "Client DNS lookup" + description: "Controls how the client uses DNS lookups. If set to use_all_dns_ips,\ + \ connect to each returned IP address in sequence until a successful connection\ + \ is established. After a disconnection, the next IP is used. Once all\ + \ IPs have been used once, the client resolves the IP(s) from the hostname\ + \ again. If set to resolve_canonical_bootstrap_servers_only, resolve each\ + \ bootstrap address into a list of canonical names. After the bootstrap\ + \ phase, this behaves the same as use_all_dns_ips. If set to default (deprecated),\ + \ attempt to connect to the first IP address returned by the lookup, even\ + \ if the lookup returns multiple IP addresses." + type: "string" + default: "use_all_dns_ips" + enum: + - "default" + - "use_all_dns_ips" + - "resolve_canonical_bootstrap_servers_only" + retry_backoff_ms: + title: "Retry backoff ms" + description: "The amount of time to wait before attempting to retry a failed\ + \ request to a given topic partition. This avoids repeatedly sending requests\ + \ in a tight loop under some failure scenarios." + type: "integer" + default: 100 + request_timeout_ms: + title: "Request timeout ms" + description: "The configuration controls the maximum amount of time the\ + \ client will wait for the response of a request. If the response is not\ + \ received before the timeout elapses the client will resend the request\ + \ if necessary or fail the request if retries are exhausted." + type: "integer" + default: 30000 + receive_buffer_bytes: + title: "Receive buffer bytes" + description: "The size of the TCP receive buffer (SO_RCVBUF) to use when\ + \ reading data. If the value is -1, the OS default will be used." + type: "integer" + default: 32768 + auto_offset_reset: + title: "Auto offset reset" + description: "What to do when there is no initial offset in Kafka or if\ + \ the current offset does not exist any more on the server - earliest:\ + \ automatically reset the offset to the earliest offset, latest: automatically\ + \ reset the offset to the latest offset, none: throw exception to the\ + \ consumer if no previous offset is found for the consumer's group, anything\ + \ else: throw exception to the consumer." + type: "string" + default: "latest" + enum: + - "latest" + - "earliest" + - "none" + repeated_calls: + title: "Repeated calls" + description: "The number of repeated calls to poll() if no messages were\ + \ received." + type: "integer" + default: 3 + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] + supported_source_sync_modes: + - "append" +- dockerImage: "airbyte/source-klaviyo:0.1.2" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/klaviyo" + changelogUrl: "https://docs.airbyte.io/integrations/sources/klaviyo" + connectionSpecification: + title: "Klaviyo Spec" + type: "object" + properties: + api_key: + title: "Api Key" + description: "Klaviyo API Key. See our docs if you need help finding this key." + airbyte_secret: true + type: "string" + start_date: + title: "Start Date" + description: "UTC date and time in the format 2017-01-25T00:00:00Z. Any\ + \ data before this date will not be replicated." + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + examples: + - "2017-01-25T00:00:00Z" + type: "string" + required: + - "api_key" + - "start_date" + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "append" +- dockerImage: "airbyte/source-lever-hiring:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/lever-hiring" + changelogUrl: "https://docs.airbyte.io/integrations/sources/lever-hiring#changelog" + connectionSpecification: + title: "Lever Hiring Spec" + type: "object" + properties: + client_id: + title: "Client Id" + description: "The client application id as provided when registering the\ + \ application with Lever." + type: "string" + client_secret: + title: "Client Secret" + description: "The application secret as provided when registering the application\ + \ with Lever." + airbyte_secret: true + type: "string" + refresh_token: + title: "Refresh Token" + description: "The refresh token your application will need to submit to\ + \ get a new access token after it's expired." + type: "string" + environment: + title: "Environment" + description: "Sandbox or Production environment." + default: "Production" + enum: + - "Sandbox" + - "Production" + type: "string" + start_date: + title: "Start Date" + description: "UTC date and time in the format 2019-02-25T00:00:00Z. Any\ + \ data before this date will not be replicated." + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + examples: + - "2021-04-25T00:00:00Z" + type: "string" + required: + - "client_id" + - "client_secret" + - "refresh_token" + - "start_date" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] + authSpecification: + auth_type: "oauth2.0" + oauth2Specification: + rootObject: [] + oauthFlowInitParameters: + - - "client_id" + - - "client_secret" + - - "refresh_token" + oauthFlowOutputParameters: [] +- dockerImage: "airbyte/source-linkedin-ads:0.1.1" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/linkedin-ads" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Linkedin Ads Spec" + type: "object" + required: + - "start_date" + - "access_token" + additionalProperties: false + properties: + start_date: + type: "string" + title: "Start Date" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" + description: "Date in the format 2020-09-17. Any data before this date will\ + \ not be replicated." + examples: + - "2021-05-17" + access_token: + type: "string" + title: "Access Token" + description: "The token value ganerated using Auth Code" + airbyte_secret: true + account_ids: + title: "Account IDs" + type: "array" + description: "Specify the Account IDs separated by space, from which to\ + \ pull the data. Leave empty to pull from all associated accounts." + items: + type: "integer" + default: [] + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-looker:0.2.5" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/looker" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Looker Spec" + type: "object" + required: + - "domain" + - "client_id" + - "client_secret" + additionalProperties: false + properties: + domain: + type: "string" + examples: + - "domainname.looker.com" + - "looker.clientname.com" + - "123.123.124.123:8000" + description: "Domain for your Looker account, e.g. airbyte.cloud.looker.com,looker.[clientname].com,IP\ + \ address" + client_id: + title: "Client ID" + type: "string" + description: "The Client ID is first part of an API3 key that is specific\ + \ to each Looker user. See the docs for more information on how to generate this key." + client_secret: + title: "Client Secret" + type: "string" + description: "The Client Secret is second part of an API3 key." + run_look_ids: + title: "Look IDs to Run" + type: "array" + items: + type: "string" + pattern: "^[0-9]*$" + description: "The IDs of any Looks to run (optional)" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-mailchimp:0.2.8" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/mailchimp" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Mailchimp Spec" + type: "object" + required: + - "username" + - "apikey" + additionalProperties: false + properties: + username: + type: "string" + description: "The Username or email you use to sign into Mailchimp" + apikey: + type: "string" + airbyte_secret: true + description: "API Key. See the docs for information on how to generate this key." + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-marketo:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/marketo" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Source Marketo Spec" + type: "object" + required: + - "domain_url" + - "client_id" + - "client_secret" + - "start_date" + additionalProperties: false + properties: + domain_url: + type: "string" + description: "Your Marketo Base URL. See the docs for info on how to obtain this." + examples: + - "https://000-AAA-000.mktorest.com" + airbyte_secret: true + client_id: + type: "string" + description: "Your Marketo client_id. See the docs for info on how to obtain this." + airbyte_secret: true + client_secret: + type: "string" + description: "Your Marketo client secret. See the docs for info on how to obtain this." + airbyte_secret: true + start_date: + type: "string" + description: "Data generated in Marketo after this date will be replicated.\ + \ This date must be specified in the format YYYY-MM-DDT00:00:00Z." + examples: + - "2020-09-25T00:00:00Z" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + window_in_days: + type: "integer" + description: "The amount of days for each data-chunk begining from start_date.\ + \ (Min=1, as for a Day; Max=30, as for a Month)." + examples: + - 1 + - 5 + - 10 + - 15 + - 30 + default: 30 + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-mssql:0.3.6" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/mssql" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "MSSQL Source Spec" + type: "object" + required: + - "host" + - "port" + - "database" + - "username" + additionalProperties: false + properties: + host: + description: "Hostname of the database." + type: "string" + port: + description: "Port of the database." + type: "integer" + minimum: 0 + maximum: 65536 + examples: + - "1433" + database: + description: "Name of the database." + type: "string" + examples: + - "master" + username: + description: "Username to use to access the database." + type: "string" + password: + description: "Password associated with the username." + type: "string" + airbyte_secret: true + ssl_method: + title: "SSL Method" + type: "object" + description: "Encryption method to use when communicating with the database" + order: 6 + oneOf: + - title: "Unencrypted" + additionalProperties: false + description: "Data transfer will not be encrypted." + required: + - "ssl_method" + properties: + ssl_method: + type: "string" + const: "unencrypted" + enum: + - "unencrypted" + default: "unencrypted" + - title: "Encrypted (trust server certificate)" + additionalProperties: false + description: "Use the cert provided by the server without verification.\ + \ (For testing purposes only!)" + required: + - "ssl_method" + properties: + ssl_method: + type: "string" + const: "encrypted_trust_server_certificate" + enum: + - "encrypted_trust_server_certificate" + default: "encrypted_trust_server_certificate" + - title: "Encrypted (verify certificate)" + additionalProperties: false + description: "Verify and use the cert provided by the server." + required: + - "ssl_method" + - "trustStoreName" + - "trustStorePassword" + properties: + ssl_method: + type: "string" + const: "encrypted_verify_certificate" + enum: + - "encrypted_verify_certificate" + default: "encrypted_verify_certificate" + hostNameInCertificate: + title: "Host Name In Certificate" + type: "string" + description: "Specifies the host name of the server. The value of\ + \ this property must match the subject property of the certificate." + order: 7 + replication_method: + type: "string" + title: "Replication Method" + description: "Replication method to use for extracting data from the database.\ + \ STANDARD replication requires no setup on the DB side but will not be\ + \ able to represent deletions incrementally. CDC uses {TBC} to detect\ + \ inserts, updates, and deletes. This needs to be configured on the source\ + \ database itself." + default: "STANDARD" + enum: + - "STANDARD" + - "CDC" + tunnel_method: + type: "object" + title: "SSH Tunnel Method" + description: "Whether to initiate an SSH tunnel before connecting to the\ + \ database, and if so, which kind of authentication to use." + oneOf: + - title: "No Tunnel" + required: + - "tunnel_method" + properties: + tunnel_method: + description: "No ssh tunnel needed to connect to database" + type: "string" + const: "NO_TUNNEL" + order: 0 + - title: "SSH Key Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "ssh_key" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and ssh key" + type: "string" + const: "SSH_KEY_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host." + type: "string" + order: 3 + ssh_key: + title: "SSH Private Key" + description: "OS-level user account ssh key credentials for logging\ + \ into the jump server host." + type: "string" + airbyte_secret: true + multiline: true + order: 4 + - title: "Password Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "tunnel_user_password" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and password authentication" + type: "string" + const: "SSH_PASSWORD_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host" + type: "string" + order: 3 + tunnel_user_password: + title: "Password" + description: "OS-level password for logging into the jump server host" + type: "string" + airbyte_secret: true + order: 4 + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-microsoft-teams:0.2.2" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/microsoft-teams" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Microsoft Teams Spec" + type: "object" + required: + - "tenant_id" + - "client_id" + - "client_secret" + - "period" + additionalProperties: false + properties: + tenant_id: + title: "Directory (tenant) ID" + type: "string" + description: "Directory (tenant) ID" + client_id: + title: "Application (client) ID" + type: "string" + description: "Application (client) ID" + client_secret: + title: "Client Secret" + type: "string" + description: "Client secret" + airbyte_secret: true + period: + type: "string" + description: "Specifies the length of time over which the Team Device Report\ + \ stream is aggregated. The supported values are: D7, D30, D90, and D180." + examples: + - "D7" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-mixpanel:0.1.1" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/mixpanel" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Source Mixpanel Spec" + type: "object" + required: + - "api_secret" + additionalProperties: true + properties: + api_secret: + type: "string" + description: "Mixpanel API Secret. See the docs for more information on how to obtain this key." + airbyte_secret: true + attribution_window: + type: "integer" + description: "Latency minimum number of days to look-back to account for\ + \ delays in attributing accurate results. Default attribution window is\ + \ 5 days." + default: 5 + date_window_size: + type: "integer" + description: "Number of days for date window looping through transactional\ + \ endpoints with from_date and to_date. Default date_window_size is 30\ + \ days. Clients with large volumes of events may want to decrease this\ + \ to 14, 7, or even down to 1-2 days." + default: 30 + project_timezone: + type: "string" + description: "Time zone in which integer date times are stored. The project\ + \ timezone may be found in the project settings in the Mixpanel console." + default: "US/Pacific" + examples: + - "US/Pacific" + - "UTC" + select_properties_by_default: + type: "boolean" + description: "Setting this config parameter to true ensures that new properties\ + \ on events and engage records are captured. Otherwise new properties\ + \ will be ignored" + default: true + start_date: + type: "string" + description: "The default value to use if no bookmark exists for an endpoint.\ + \ Default is 1 year ago." + examples: + - "2021-11-16" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}(T[0-9]{2}:[0-9]{2}:[0-9]{2}Z)?$" + region: + type: "string" + enum: + - "US" + - "EU" + default: "US" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-mongodb-v2:0.1.3" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/mongodb-v2" + changelogUrl: "https://docs.airbyte.io/integrations/sources/mongodb-v2" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "MongoDb Source Spec" + type: "object" + required: + - "database" + additionalProperties: true + properties: + instance_type: + type: "object" + title: "MongoDb instance type" + description: "MongoDb instance to connect to. For MongoDB Atlas and Replica\ + \ Set TLS connection is used by default." + order: 0 + oneOf: + - title: "Standalone MongoDb Instance" + required: + - "instance" + - "host" + - "port" + properties: + instance: + type: "string" + enum: + - "standalone" + default: "standalone" + host: + title: "Host" + type: "string" + description: "Host of a Mongo database to be replicated." + order: 0 + port: + title: "Port" + type: "integer" + description: "Port of a Mongo database to be replicated." + minimum: 0 + maximum: 65536 + default: 27017 + examples: + - "27017" + order: 1 + tls: + title: "TLS connection" + type: "boolean" + description: "Indicates whether TLS encryption protocol will be used\ + \ to connect to MongoDB. It is recommended to use TLS connection\ + \ if possible. For more information see documentation." + default: false + order: 2 + - title: "Replica Set" + required: + - "instance" + - "server_addresses" + properties: + instance: + type: "string" + enum: + - "replica" + default: "replica" + server_addresses: + title: "Server addresses" + type: "string" + description: "The members of a replica set. Please specify `host`:`port`\ + \ of each member seperated by comma." + examples: + - "host1:27017,host2:27017,host3:27017" + order: 0 + replica_set: + title: "Replica Set" + type: "string" + description: "A replica set name." + order: 1 + - title: "MongoDB Atlas" + additionalProperties: false + required: + - "instance" + - "cluster_url" + properties: + instance: + type: "string" + enum: + - "atlas" + default: "atlas" + cluster_url: + title: "Cluster URL" + type: "string" + description: "URL of a cluster to connect to." + order: 0 + database: + title: "Database name" + type: "string" + description: "Database to be replicated." + order: 1 + user: + title: "User" + type: "string" + description: "User" + order: 2 + password: + title: "Password" + type: "string" + description: "Password" + airbyte_secret: true + order: 3 + auth_source: + title: "Authentication source" + type: "string" + description: "Authentication source where user information is stored" + default: "admin" + examples: + - "admin" + order: 4 + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-mysql:0.4.8" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/mysql" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "MySql Source Spec" + type: "object" + required: + - "host" + - "port" + - "database" + - "username" + - "replication_method" + additionalProperties: false + properties: + host: + description: "Hostname of the database." + type: "string" + order: 0 + port: + description: "Port of the database." + type: "integer" + minimum: 0 + maximum: 65536 + default: 3306 + examples: + - "3306" + order: 1 + database: + description: "Name of the database." + type: "string" + order: 2 + username: + description: "Username to use to access the database." + type: "string" + order: 3 + password: + description: "Password associated with the username." + type: "string" + airbyte_secret: true + order: 4 + jdbc_url_params: + description: "Additional properties to pass to the jdbc url string when\ + \ connecting to the database formatted as 'key=value' pairs separated\ + \ by the symbol '&'. (example: key1=value1&key2=value2&key3=value3)" + type: "string" + order: 5 + ssl: + title: "SSL Connection" + description: "Encrypt data using SSL." + type: "boolean" + default: true + order: 7 + replication_method: + type: "string" + title: "Replication Method" + description: "Replication method to use for extracting data from the database.\ + \ STANDARD replication requires no setup on the DB side but will not be\ + \ able to represent deletions incrementally. CDC uses the Binlog to detect\ + \ inserts, updates, and deletes. This needs to be configured on the source\ + \ database itself." + order: 6 + default: "STANDARD" + enum: + - "STANDARD" + - "CDC" + tunnel_method: + type: "object" + title: "SSH Tunnel Method" + description: "Whether to initiate an SSH tunnel before connecting to the\ + \ database, and if so, which kind of authentication to use." + oneOf: + - title: "No Tunnel" + required: + - "tunnel_method" + properties: + tunnel_method: + description: "No ssh tunnel needed to connect to database" + type: "string" + const: "NO_TUNNEL" + order: 0 + - title: "SSH Key Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "ssh_key" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and ssh key" + type: "string" + const: "SSH_KEY_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host." + type: "string" + order: 3 + ssh_key: + title: "SSH Private Key" + description: "OS-level user account ssh key credentials in RSA PEM\ + \ format ( created with ssh-keygen -t rsa -m PEM -f myuser_rsa )" + type: "string" + airbyte_secret: true + multiline: true + order: 4 + - title: "Password Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "tunnel_user_password" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and password authentication" + type: "string" + const: "SSH_PASSWORD_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host" + type: "string" + order: 3 + tunnel_user_password: + title: "Password" + description: "OS-level password for logging into the jump server host" + type: "string" + airbyte_secret: true + order: 4 + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-okta:0.1.2" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/okta" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Okta Spec" + type: "object" + required: + - "token" + - "base_url" + additionalProperties: false + properties: + token: + type: "string" + title: "API Token" + description: "A Okta token. See the docs for instructions on how to generate it." + airbyte_secret: true + base_url: + type: "string" + title: "Base URL" + description: "The Okta base URL." + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-onesignal:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/onesignal" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "OneSignal Source Spec" + type: "object" + required: + - "user_auth_key" + - "start_date" + - "outcome_names" + additionalProperties: false + properties: + user_auth_key: + type: "string" + description: "OneSignal User Auth Key, see the docs for more information on how to obtain this key." + airbyte_secret: true + start_date: + type: "string" + description: "The date from which you'd like to replicate data for OneSignal\ + \ API, in the format YYYY-MM-DDT00:00:00Z. All data generated after this\ + \ date will be replicated." + examples: + - "2020-11-16T00:00:00Z" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + outcome_names: + type: "string" + description: "Comma-separated list of names and the value (sum/count) for\ + \ the returned outcome data. See the docs for more details" + examples: + - "os__session_duration.count,os__click.count,CustomOutcomeName.sum" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-oracle:0.3.8" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/oracle" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Oracle Source Spec" + type: "object" + required: + - "host" + - "port" + - "sid" + - "username" + additionalProperties: false + properties: + host: + title: "Host" + description: "Hostname of the database." + type: "string" + port: + title: "Port" + description: "Port of the database.\nOracle Corporations recommends the\ + \ following port numbers:\n1521 - Default listening port for client connections\ + \ to the listener. \n2484 - Recommended and officially registered listening\ + \ port for client connections to the listener using TCP/IP with SSL" + type: "integer" + minimum: 0 + maximum: 65536 + default: 1521 + sid: + title: "SID (Oracle System Identifier)" + type: "string" + username: + title: "User" + description: "Username to use to access the database." + type: "string" + password: + title: "Password" + description: "Password associated with the username." + type: "string" + airbyte_secret: true + schemas: + title: "Schemas" + description: "List of schemas to sync from. Defaults to user. Case sensitive." + type: "array" + items: + type: "string" + minItems: 1 + uniqueItems: true + encryption: + title: "Encryption" + type: "object" + description: "Encryption method to use when communicating with the database" + order: 6 + oneOf: + - title: "Unencrypted" + additionalProperties: false + description: "Data transfer will not be encrypted." + required: + - "encryption_method" + properties: + encryption_method: + type: "string" + const: "unencrypted" + enum: + - "unencrypted" + default: "unencrypted" + - title: "Native Network Ecryption (NNE)" + additionalProperties: false + description: "Native network encryption gives you the ability to encrypt\ + \ database connections, without the configuration overhead of TCP/IP\ + \ and SSL/TLS and without the need to open and listen on different ports." + required: + - "encryption_method" + properties: + encryption_method: + type: "string" + const: "client_nne" + enum: + - "client_nne" + default: "client_nne" + encryption_algorithm: + type: "string" + description: "This parameter defines the encryption algorithm to be\ + \ used" + title: "Encryption Algorithm" + default: "AES256" + enum: + - "AES256" + - "RC4_56" + - "3DES168" + - title: "TLS Encrypted (verify certificate)" + additionalProperties: false + description: "Verify and use the cert provided by the server." + required: + - "encryption_method" + - "ssl_certificate" + properties: + encryption_method: + type: "string" + const: "encrypted_verify_certificate" + enum: + - "encrypted_verify_certificate" + default: "encrypted_verify_certificate" + ssl_certificate: + title: "SSL PEM file" + description: "Privacy Enhanced Mail (PEM) files are concatenated certificate\ + \ containers frequently used in certificate installations" + type: "string" + airbyte_secret: true + multiline: true + order: 4 + tunnel_method: + type: "object" + title: "SSH Tunnel Method" + description: "Whether to initiate an SSH tunnel before connecting to the\ + \ database, and if so, which kind of authentication to use." + oneOf: + - title: "No Tunnel" + required: + - "tunnel_method" + properties: + tunnel_method: + description: "No ssh tunnel needed to connect to database" + type: "string" + const: "NO_TUNNEL" + order: 0 + - title: "SSH Key Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "ssh_key" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and ssh key" + type: "string" + const: "SSH_KEY_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host." + type: "string" + order: 3 + ssh_key: + title: "SSH Private Key" + description: "OS-level user account ssh key credentials in RSA PEM\ + \ format ( created with ssh-keygen -t rsa -m PEM -f myuser_rsa )" + type: "string" + airbyte_secret: true + multiline: true + order: 4 + - title: "Password Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "tunnel_user_password" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and password authentication" + type: "string" + const: "SSH_PASSWORD_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host" + type: "string" + order: 3 + tunnel_user_password: + title: "Password" + description: "OS-level password for logging into the jump server host" + type: "string" + airbyte_secret: true + order: 4 + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-paypal-transaction:0.1.1" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/paypal-transactions" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Paypal Transaction Search" + type: "object" + required: + - "client_id" + - "secret" + - "start_date" + - "is_sandbox" + additionalProperties: true + properties: + client_id: + title: "Client ID" + type: "string" + description: "The Paypal Client ID for API credentials" + secret: + title: "Secret" + type: "string" + description: "The Secret for a given Client ID." + airbyte_secret: true + start_date: + type: "string" + title: "Start Date" + description: "Start Date for data extraction in ISO format. Date must be in range from 3 years till 12 hrs before\ + \ present time" + examples: + - "2021-06-11T23:59:59-00:00" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}[+-][0-9]{2}:[0-9]{2}$" + is_sandbox: + title: "Is Sandbox" + description: "Whether or not to Sandbox or Production environment to extract\ + \ data from" + type: "boolean" + default: false + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-pipedrive:0.1.6" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/pipedrive" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Pipedrive Spec" + type: "object" + required: + - "replication_start_date" + additionalProperties: true + properties: + authorization: + type: "object" + title: "Authentication Type" + oneOf: + - title: "Sign in via Pipedrive (OAuth)" + type: "object" + required: + - "auth_type" + - "client_id" + - "client_secret" + - "refresh_token" + properties: + auth_type: + type: "string" + const: "Client" + enum: + - "Client" + default: "Client" + order: 0 + client_id: + title: "Client ID" + type: "string" + description: "The Client ID of your developer application" + airbyte_secret: true + client_secret: + title: "Client Secret" + type: "string" + description: "The client secret of your developer application" + airbyte_secret: true + access_token: + title: "Access Token" + type: "string" + description: "An access token generated using the above client ID\ + \ and secret" + airbyte_secret: true + refresh_token: + title: "Refresh Token" + type: "string" + description: "A refresh token generated using the above client ID\ + \ and secret" + airbyte_secret: true + - type: "object" + title: "API Key Authentication" + required: + - "auth_type" + - "api_token" + properties: + auth_type: + type: "string" + const: "Token" + enum: + - "Token" + default: "Token" + order: 0 + api_token: + title: "API Token" + type: "string" + description: "Pipedrive API Token" + airbyte_secret: true + replication_start_date: + title: "Replication Start Date" + description: "UTC date and time in the format 2017-01-25T00:00:00Z. Any\ + \ data before this date will not be replicated. When specified and not\ + \ None, then stream will behave as incremental" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + examples: + - "2017-01-25T00:00:00Z" + type: "string" + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "append" +- dockerImage: "airbyte/source-plaid:0.2.1" + spec: + documentationUrl: "https://plaid.com/docs/api/" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + type: "object" + required: + - "access_token" + - "api_key" + - "client_id" + additionalProperties: false + properties: + access_token: + type: "string" + title: "Access Token" + description: "The end-user's Link access token." + api_key: + title: "API Key" + type: "string" + description: "The Plaid API key to use to hit the API." + airbyte_secret: true + client_id: + title: "Client ID" + type: "string" + description: "The Plaid client id" + plaid_env: + title: "Plaid Environment" + type: "string" + enum: + - "sandbox" + - "development" + - "production" + description: "The Plaid environment" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-pokeapi:0.1.1" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/pokeapi" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Pokeapi Spec" + type: "object" + required: + - "pokemon_name" + additionalProperties: false + properties: + pokemon_name: + type: "string" + description: "Pokemon requested from the API." + pattern: "^[a-z0-9_\\-]+$" + examples: + - "ditto, luxray, snorlax" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-posthog:0.1.4" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/posthog" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "PostHog Spec" + type: "object" + required: + - "api_key" + - "start_date" + additionalProperties: false + properties: + start_date: + title: "Start Date" + type: "string" + description: "The date from which you'd like to replicate the data" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + examples: + - "2021-01-01T00:00:00Z" + api_key: + type: "string" + airbyte_secret: true + description: "API Key. See the docs for information on how to generate this key." + base_url: + type: "string" + default: "https://app.posthog.com" + description: "Base PostHog url. Defaults to PostHog Cloud (https://app.posthog.com)." + examples: + - "https://posthog.example.com" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-postgres:0.3.11" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/postgres" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Postgres Source Spec" + type: "object" + required: + - "host" + - "port" + - "database" + - "username" + additionalProperties: false + properties: + host: + title: "Host" + description: "Hostname of the database." + type: "string" + order: 0 + port: + title: "Port" + description: "Port of the database." + type: "integer" + minimum: 0 + maximum: 65536 + default: 5432 + examples: + - "5432" + order: 1 + database: + title: "DB Name" + description: "Name of the database." + type: "string" + order: 2 + username: + title: "User" + description: "Username to use to access the database." + type: "string" + order: 3 + password: + title: "Password" + description: "Password associated with the username." + type: "string" + airbyte_secret: true + order: 4 + ssl: + title: "Connect using SSL" + description: "Encrypt client/server communications for increased security." + type: "boolean" + default: false + order: 5 + replication_method: + type: "object" + title: "Replication Method" + description: "Replication method to use for extracting data from the database." + order: 6 + oneOf: + - title: "Standard" + additionalProperties: false + description: "Standard replication requires no setup on the DB side but\ + \ will not be able to represent deletions incrementally." + required: + - "method" + properties: + method: + type: "string" + const: "Standard" + enum: + - "Standard" + default: "Standard" + order: 0 + - title: "Logical Replication (CDC)" + additionalProperties: false + description: "Logical replication uses the Postgres write-ahead log (WAL)\ + \ to detect inserts, updates, and deletes. This needs to be configured\ + \ on the source database itself. Only available on Postgres 10 and above.\ + \ Read the Postgres Source docs for more information." + required: + - "method" + - "replication_slot" + - "publication" + properties: + method: + type: "string" + const: "CDC" + enum: + - "CDC" + default: "CDC" + order: 0 + plugin: + type: "string" + description: "A logical decoding plug-in installed on the PostgreSQL\ + \ server. `pgoutput` plug-in is used by default.\nIf replication\ + \ table contains a lot of big jsonb values it is recommended to\ + \ use `wal2json` plug-in. For more information about `wal2json`\ + \ plug-in read Postgres Source docs." + enum: + - "pgoutput" + - "wal2json" + default: "pgoutput" + order: 1 + replication_slot: + type: "string" + description: "A plug-in logical replication slot." + order: 2 + publication: + type: "string" + description: "A Postgres publication used for consuming changes." + order: 3 + tunnel_method: + type: "object" + title: "SSH Tunnel Method" + description: "Whether to initiate an SSH tunnel before connecting to the\ + \ database, and if so, which kind of authentication to use." + oneOf: + - title: "No Tunnel" + required: + - "tunnel_method" + properties: + tunnel_method: + description: "No ssh tunnel needed to connect to database" + type: "string" + const: "NO_TUNNEL" + order: 0 + - title: "SSH Key Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "ssh_key" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and ssh key" + type: "string" + const: "SSH_KEY_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host." + type: "string" + order: 3 + ssh_key: + title: "SSH Private Key" + description: "OS-level user account ssh key credentials for logging\ + \ into the jump server host." + type: "string" + airbyte_secret: true + multiline: true + order: 4 + - title: "Password Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "tunnel_user_password" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and password authentication" + type: "string" + const: "SSH_PASSWORD_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host" + type: "string" + order: 3 + tunnel_user_password: + title: "Password" + description: "OS-level password for logging into the jump server host" + type: "string" + airbyte_secret: true + order: 4 + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-prestashop:0.1.0" + spec: + documentationUrl: "https://docsurl.com" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "PrestaShop Spec" + type: "object" + required: + - "url" + - "access_key" + additionalProperties: false + properties: + url: + type: "string" + description: "Shop URL without trailing slash (domain name or IP address)" + access_key: + type: "string" + description: "Your PrestaShop access key. See the docs for info on how to obtain this." + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-quickbooks-singer:0.1.3" + spec: + documentationUrl: "https://docsurl.com" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Source Quickbooks Singer Spec" + type: "object" + required: + - "client_id" + - "client_secret" + - "refresh_token" + - "realm_id" + - "user_agent" + - "start_date" + - "sandbox" + additionalProperties: false + properties: + client_id: + type: "string" + description: "Identifies which app is making the request. Obtain this value\ + \ from the Keys tab on the app profile via My Apps on the developer site.\ + \ There are two versions of this key: development and production" + client_secret: + description: " Obtain this value from the Keys tab on the app profile via\ + \ My Apps on the developer site. There are two versions of this key: development\ + \ and production" + type: "string" + airbyte_secret: true + refresh_token: + description: "A token used when refreshing the access token." + type: "string" + airbyte_secret: true + realm_id: + description: "Labeled Company ID. The Make API Calls panel is populated\ + \ with the realm id and the current access token" + type: "string" + airbyte_secret: true + user_agent: + type: "string" + description: "Process and email for API logging purposes. Example: tap-quickbooks\ + \ " + start_date: + description: "The default value to use if no bookmark exists for an endpoint\ + \ (rfc3339 date string) E.g, 2021-03-20T00:00:00Z" + type: "string" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + examples: + - "2021-03-20T00:00:00Z" + sandbox: + description: "Development or Production." + type: "boolean" + default: false + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-recharge:0.1.3" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/recharge" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Recharge Spec" + type: "object" + required: + - "start_date" + - "access_token" + additionalProperties: false + properties: + start_date: + type: "string" + description: "The date from which you'd like to replicate data for Recharge\ + \ API, in the format YYYY-MM-DDT00:00:00Z." + examples: + - "2021-05-14T00:00:00Z" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + access_token: + type: "string" + description: "The value of the Access Token generated. See the docs for more\ + \ information" + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-recurly:0.2.4" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/recurly" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Recurly Source Spec" + type: "object" + required: + - "api_key" + additionalProperties: false + properties: + api_key: + type: "string" + description: "Recurly API Key. See the docs for more information on how to generate this key." + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-redshift:0.3.4" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/redshift" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Redshift Source Spec" + type: "object" + required: + - "host" + - "port" + - "database" + - "username" + - "password" + additionalProperties: false + properties: + host: + description: "Host Endpoint of the Redshift Cluster (must include the cluster-id,\ + \ region and end with .redshift.amazonaws.com)" + type: "string" + port: + description: "Port of the database." + type: "integer" + minimum: 0 + maximum: 65536 + default: 5439 + examples: + - "5439" + database: + description: "Name of the database." + type: "string" + examples: + - "master" + username: + description: "Username to use to access the database." + type: "string" + password: + description: "Password associated with the username." + type: "string" + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-s3:0.1.6" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/s3" + changelogUrl: "https://docs.airbyte.io/integrations/sources/s3" + connectionSpecification: + title: "S3 Source Spec" + type: "object" + properties: + dataset: + title: "Dataset" + description: "This source creates one table per connection, this field is\ + \ the name of that table. This should include only letters, numbers, dash\ + \ and underscores. Note that this may be altered according to destination." + pattern: "^([A-Za-z0-9-_]+)$" + type: "string" + path_pattern: + title: "Path Pattern" + description: "Add at least 1 pattern here to match filepaths against. Use\ + \ | to separate multiple patterns. Airbyte uses these patterns to determine\ + \ which files to pick up from the provider storage. See wcmatch.glob to understand pattern syntax (GLOBSTAR\ + \ and SPLIT flags are enabled). Use pattern ** to pick\ + \ up all files." + examples: + - "**" + - "myFolder/myTableFiles/*.csv|myFolder/myOtherTableFiles/*.csv" + type: "string" + schema: + title: "Schema" + description: "Optionally provide a schema to enforce, as a valid JSON string.\ + \ Ensure this is a mapping of { \"column\" : \"type\" },\ + \ where types are valid JSON Schema datatypes. Leave as {} to auto-infer\ + \ the schema." + default: "{}" + examples: + - "{\"column_1\": \"number\", \"column_2\": \"string\", \"column_3\": \"\ + array\", \"column_4\": \"object\", \"column_5\": \"boolean\"}" + type: "string" + format: + title: "Format" + default: "csv" + type: "object" + oneOf: + - title: "csv" + description: "This connector utilises PyArrow (Apache Arrow) for CSV parsing." + type: "object" + properties: + filetype: + title: "Filetype" + const: "csv" + type: "string" + delimiter: + title: "Delimiter" + description: "The character delimiting individual cells in the CSV\ + \ data. This may only be a 1-character string." + default: "," + minLength: 1 + type: "string" + quote_char: + title: "Quote Char" + description: "The character used optionally for quoting CSV values.\ + \ To disallow quoting, make this field blank." + default: "\"" + type: "string" + escape_char: + title: "Escape Char" + description: "The character used optionally for escaping special characters.\ + \ To disallow escaping, leave this field blank." + type: "string" + encoding: + title: "Encoding" + description: "The character encoding of the CSV data. Leave blank\ + \ to default to UTF-8. See list of python encodings for allowable options." + type: "string" + double_quote: + title: "Double Quote" + description: "Whether two quotes in a quoted CSV value denote a single\ + \ quote in the data." + default: true + type: "boolean" + newlines_in_values: + title: "Newlines In Values" + description: "Whether newline characters are allowed in CSV values.\ + \ Turning this on may affect performance. Leave blank to default\ + \ to False." + default: false + type: "boolean" + block_size: + title: "Block Size" + description: "The chunk size in bytes to process at a time in memory\ + \ from each file. If your data is particularly wide and failing\ + \ during schema detection, increasing this should solve it. Beware\ + \ of raising this too high as you could hit OOM errors." + default: 10000 + type: "integer" + additional_reader_options: + title: "Additional Reader Options" + description: "Optionally add a valid JSON string here to provide additional\ + \ options to the csv reader. Mappings must correspond to options\ + \ detailed here. 'column_types' is used internally\ + \ to handle schema so overriding that would likely cause problems." + default: "{}" + examples: + - "{\"timestamp_parsers\": [\"%m/%d/%Y %H:%M\", \"%Y/%m/%d %H:%M\"\ + ], \"strings_can_be_null\": true, \"null_values\": [\"NA\", \"NULL\"\ + ]}" + type: "string" + advanced_options: + title: "Advanced Options" + description: "Optionally add a valid JSON string here to provide additional\ + \ Pyarrow ReadOptions. Specify 'column_names'\ + \ here if your CSV doesn't have header, or if you want to use custom\ + \ column names. 'block_size' and 'encoding' are already used above,\ + \ specify them again here will override the values above." + default: "{}" + examples: + - "{\"column_names\": [\"column1\", \"column2\"]}" + type: "string" + - title: "parquet" + description: "This connector utilises PyArrow (Apache Arrow) for Parquet parsing." + type: "object" + properties: + filetype: + title: "Filetype" + const: "parquet" + type: "string" + buffer_size: + title: "Buffer Size" + description: "Perform read buffering when deserializing individual\ + \ column chunks. By default every group column will be loaded fully\ + \ to memory. This option can help to optimize a work with memory\ + \ if your data is particularly wide or failing during detection\ + \ of OOM errors." + default: 0 + type: "integer" + columns: + title: "Columns" + description: "If you only want to sync a subset of the columns from\ + \ the file(s), add the columns you want here. Leave it empty to\ + \ sync all columns." + type: "array" + items: + type: "string" + batch_size: + title: "Batch Size" + description: "Maximum number of records per batch. Batches may be\ + \ smaller if there aren’t enough rows in the file. This option can\ + \ help to optimize a work with memory if your data is particularly\ + \ wide or failing during detection of OOM errors." + default: 65536 + type: "integer" + provider: + title: "S3: Amazon Web Services" + type: "object" + properties: + bucket: + title: "Bucket" + description: "Name of the S3 bucket where the file(s) exist." + type: "string" + aws_access_key_id: + title: "Aws Access Key Id" + description: "In order to access private Buckets stored on AWS S3, this\ + \ connector requires credentials with the proper permissions. If accessing\ + \ publicly available data, this field is not necessary." + airbyte_secret: true + type: "string" + aws_secret_access_key: + title: "Aws Secret Access Key" + description: "In order to access private Buckets stored on AWS S3, this\ + \ connector requires credentials with the proper permissions. If accessing\ + \ publicly available data, this field is not necessary." + airbyte_secret: true + type: "string" + path_prefix: + title: "Path Prefix" + description: "By providing a path-like prefix (e.g. myFolder/thisTable/)\ + \ under which all the relevant files sit, we can optimise finding\ + \ these in S3. This is optional but recommended if your bucket contains\ + \ many folders/files." + default: "" + type: "string" + endpoint: + title: "Endpoint" + description: "Endpoint to an S3 compatible service. Leave empty to use\ + \ AWS." + default: "" + type: "string" + use_ssl: + title: "Use Ssl" + description: "Is remote server using secure SSL/TLS connection" + type: "boolean" + verify_ssl_cert: + title: "Verify Ssl Cert" + description: "Allow self signed certificates" + type: "boolean" + required: + - "bucket" + required: + - "dataset" + - "path_pattern" + - "provider" + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "overwrite" + - "append" + - "append_dedup" +- dockerImage: "airbyte/source-salesloft:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/salesloft" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Source Salesloft Spec" + type: "object" + required: + - "client_id" + - "client_secret" + - "refresh_token" + - "start_date" + additionalProperties: false + properties: + client_id: + type: "string" + description: "Salesloft client id." + client_secret: + type: "string" + description: "Salesloft client secret." + airbyte_secret: true + refresh_token: + type: "string" + description: "Salesloft refresh token." + airbyte_secret: true + start_date: + type: "string" + description: "The date from which you'd like to replicate data for Salesloft\ + \ API, in the format YYYY-MM-DDT00:00:00Z. All data generated after this\ + \ date will be replicated." + examples: + - "2020-11-16T00:00:00Z" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-salesforce:0.1.2" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/salesforce" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Salesforce Source Spec" + type: "object" + required: + - "client_id" + - "client_secret" + - "refresh_token" + - "start_date" + - "api_type" + additionalProperties: false + properties: + client_id: + description: "The Consumer Key that can be found when viewing your app in\ + \ Salesforce" + type: "string" + client_secret: + description: "The Consumer Secret that can be found when viewing your app\ + \ in Salesforce" + type: "string" + airbyte_secret: true + refresh_token: + description: "Salesforce Refresh Token used for Airbyte to access your Salesforce\ + \ account. If you don't know what this is, follow this guide to retrieve it." + type: "string" + airbyte_secret: true + start_date: + description: "UTC date and time in the format 2017-01-25T00:00:00Z. Any\ + \ data before this date will not be replicated." + type: "string" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + examples: + - "2021-07-25T00:00:00Z" + is_sandbox: + description: "Whether or not the the app is in a Salesforce sandbox. If\ + \ you do not know what this, assume it is false. We provide more info\ + \ on this field in the docs." + type: "boolean" + default: false + api_type: + description: "Unless you know that you are transferring a very small amount\ + \ of data, prefer using the BULK API. This will help avoid using up all\ + \ of your API call quota with Salesforce. Valid values are BULK or REST." + type: "string" + enum: + - "BULK" + - "REST" + default: "BULK" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] + authSpecification: + auth_type: "oauth2.0" + oauth2Specification: + rootObject: [] + oauthFlowInitParameters: + - - "client_id" + - - "client_secret" + oauthFlowOutputParameters: + - - "refresh_token" +- dockerImage: "airbyte/source-sendgrid:0.2.6" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/sendgrid" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Sendgrid Spec" + type: "object" + required: + - "apikey" + additionalProperties: false + properties: + apikey: + type: "string" + description: "API Key, use admin to generate this key." + start_time: + type: "integer" + description: "Start time in timestamp integer format. Any data before this\ + \ timestamp will not be replicated." + examples: + - 1558359837 + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-shopify:0.1.21" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/shopify" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Shopify Source CDK Specifications" + type: "object" + required: + - "shop" + - "start_date" + - "auth_method" + additionalProperties: false + properties: + shop: + type: "string" + description: "The name of the shopify store. For https://EXAMPLE.myshopify.com,\ + \ the shop name is 'EXAMPLE'." + start_date: + type: "string" + description: "The date you would like to replicate data. Format: YYYY-MM-DD." + examples: + - "2021-01-01" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" + auth_method: + title: "Shopify Authorization Method" + type: "object" + oneOf: + - type: "object" + title: "OAuth2.0" + required: + - "client_id" + - "client_secret" + - "access_token" + properties: + auth_method: + type: "string" + const: "access_token" + enum: + - "access_token" + default: "access_token" + order: 0 + client_id: + type: "string" + description: "The API Key of the Shopify developer application." + airbyte_secret: true + client_secret: + type: "string" + description: "The API Secret the Shopify developer application." + airbyte_secret: true + access_token: + type: "string" + description: "Access Token for making authenticated requests." + airbyte_secret: true + - title: "API Password" + type: "object" + required: + - "api_password" + properties: + auth_method: + type: "string" + const: "api_password" + enum: + - "api_password" + default: "api_password" + order: 0 + api_password: + type: "string" + description: "The API PASSWORD for your private application in `Shopify`\ + \ shop." + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] + authSpecification: + auth_type: "oauth2.0" + oauth2Specification: + rootObject: + - "auth_method" + - "0" + oauthFlowInitParameters: + - - "client_id" + - - "client_secret" + oauthFlowOutputParameters: + - - "access_token" +- dockerImage: "airbyte/source-shortio:0.1.0" + spec: + documentationUrl: "https://developers.short.io/reference" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Shortio Spec" + type: "object" + required: + - "domain_id" + - "secret_key" + - "start_date" + additionalProperties: false + properties: + domain_id: + type: "string" + description: "Domain ID" + airbyte_secret: false + secret_key: + type: "string" + description: "Short.io Secret key" + airbyte_secret: true + start_date: + type: "string" + description: "Start Date, YYYY-MM-DD" + airbyte_secret: false + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-slack:0.1.12" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/slack" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Slack Spec" + type: "object" + required: + - "start_date" + - "lookback_window" + - "join_channels" + additionalProperties: true + properties: + start_date: + type: "string" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + description: "UTC date and time in the format 2017-01-25T00:00:00Z. Any\ + \ data before this date will not be replicated." + examples: + - "2017-01-25T00:00:00Z" + title: "Start Date" + lookback_window: + type: "integer" + title: "Threads Lookback window (Days)" + description: "How far into the past to look for messages in threads." + examples: + - 7 + - 14 + join_channels: + type: "boolean" + default: true + title: "Join all channels" + description: "Whether to join all channels or to sync data only from channels\ + \ the bot is already in. If false, you'll need to manually add the bot\ + \ to all the channels from which you'd like to sync messages. " + credentials: + title: "Authentication mechanism" + description: "Choose how to authenticate into Slack" + type: "object" + oneOf: + - type: "object" + title: "Sign in via Slack (OAuth)" + required: + - "access_token" + - "client_id" + - "client_secret" + - "option_title" + properties: + option_title: + type: "string" + const: "Default OAuth2.0 authorization" + client_id: + title: "Client ID" + description: "Slack client_id. See our docs if you need help finding this id." + type: "string" + examples: + - "slack-client-id-example" + client_secret: + title: "Client Secret" + description: "Slack client_secret. See our docs if you need help finding this secret." + type: "string" + examples: + - "slack-client-secret-example" + airbyte_secret: true + access_token: + title: "Access token" + description: "Slack access_token. See our docs if you need help generating the token." + type: "string" + examples: + - "slack-access-token-example" + airbyte_secret: true + refresh_token: + title: "Refresh token" + description: "Slack refresh_token. See our docs if you need help generating the token." + type: "string" + examples: + - "slack-refresh-token-example" + airbyte_secret: true + order: 0 + - type: "object" + title: "API Token" + required: + - "api_token" + - "option_title" + properties: + option_title: + type: "string" + const: "API Token Credentials" + api_token: + type: "string" + title: "API Token" + description: "A Slack bot token. See the docs for instructions on how to generate it." + airbyte_secret: true + order: 1 + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] + authSpecification: + auth_type: "oauth2.0" + oauth2Specification: + rootObject: + - "credentials" + - "0" + oauthFlowInitParameters: + - - "client_id" + - - "client_secret" + oauthFlowOutputParameters: + - - "access_token" + - - "refresh_token" +- dockerImage: "airbyte/source-smartsheets:0.1.5" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/smartsheets" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Smartsheets Source Spec" + type: "object" + required: + - "access_token" + - "spreadsheet_id" + additionalProperties: false + properties: + access_token: + title: "API Access token" + description: "Found in Profile > Apps & Integrations > API Access within\ + \ Smartsheet app" + type: "string" + airbyte_secret: true + spreadsheet_id: + title: "Smartsheet ID" + description: "Found in File > Properties" + type: "string" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-snapchat-marketing:0.1.1" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/snapchat-marketing" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Snapchat Marketing Spec" + type: "object" + required: + - "client_id" + - "client_secret" + - "refresh_token" + additionalProperties: false + properties: + client_id: + title: "Client ID" + type: "string" + description: "The Snapchat Client ID for API credentials." + airbyte_secret: true + client_secret: + title: "Client Secret" + type: "string" + description: "The Client Secret for a given Client ID." + airbyte_secret: true + refresh_token: + title: "API Refresh Token" + type: "string" + description: "Refresh Token to get next api key after expiration. Is given\ + \ with API Key" + airbyte_secret: true + start_date: + title: "Start Date" + type: "string" + description: "The start date to sync data. Leave blank for full sync. Format:\ + \ YYYY-MM-DD." + examples: + - "2021-01-01" + default: "1970-01-01" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-snowflake:0.1.2" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/snowflake" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Snowflake Source Spec" + type: "object" + required: + - "host" + - "role" + - "warehouse" + - "database" + - "schema" + - "username" + - "password" + additionalProperties: false + properties: + host: + description: "Host domain of the snowflake instance (must include the account,\ + \ region, cloud environment, and end with snowflakecomputing.com)." + examples: + - "accountname.us-east-2.aws.snowflakecomputing.com" + type: "string" + title: "Account name" + order: 0 + role: + description: "The role you created for Airbyte to access Snowflake." + examples: + - "AIRBYTE_ROLE" + type: "string" + title: "Role" + order: 1 + warehouse: + description: "The warehouse you created for Airbyte to access data into." + examples: + - "AIRBYTE_WAREHOUSE" + type: "string" + title: "Warehouse" + order: 2 + database: + description: "The database you created for Airbyte to access data into." + examples: + - "AIRBYTE_DATABASE" + type: "string" + title: "Database" + order: 3 + schema: + description: "The source Snowflake schema tables." + examples: + - "AIRBYTE_SCHEMA" + type: "string" + title: "Schema" + order: 4 + username: + description: "The username you created to allow Airbyte to access the database." + examples: + - "AIRBYTE_USER" + type: "string" + title: "Username" + order: 5 + password: + description: "Password associated with the username." + type: "string" + airbyte_secret: true + title: "Password" + order: 6 + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-square:0.1.1" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/square" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Square Source CDK Specifications" + type: "object" + required: + - "api_key" + - "is_sandbox" + additionalProperties: false + properties: + api_key: + type: "string" + description: "The API key for a Square application" + airbyte_secret: true + is_sandbox: + type: "boolean" + description: "Determines the sandbox (true) or production (false) API version" + examples: + - true + - false + default: true + start_date: + type: "string" + description: "The start date to sync data. Leave blank for full sync. Format:\ + \ YYYY-MM-DD." + examples: + - "2021-01-01" + default: "1970-01-01" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" + include_deleted_objects: + type: "boolean" + description: "In some streams there is and option to include deleted objects\ + \ (Items, Categories, Discounts, Taxes)" + examples: + - true + - false + default: false + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-strava:0.1.0" + spec: + documentationUrl: "https://docsurl.com" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Strava Spec" + type: "object" + required: + - "client_id" + - "client_secret" + - "refresh_token" + - "athlete_id" + - "start_date" + additionalProperties: false + properties: + client_id: + type: "string" + description: "Strava Client ID" + pattern: "^[0-9_\\-]+$" + examples: + - "12345" + client_secret: + type: "string" + description: "Strava Client Secret" + pattern: "^[0-9a-fA-F]+$" + examples: + - "fc6243f283e51f6ca989aab298b17da125496f50" + airbyte_secret: true + refresh_token: + type: "string" + description: "Strava Refresh Token with activity:read_all permissions" + pattern: "^[0-9a-fA-F]+$" + examples: + - "fc6243f283e51f6ca989aab298b17da125496f50" + airbyte_secret: true + athlete_id: + type: "integer" + description: "Strava Athlete ID" + pattern: "^[0-9_\\-]+$" + examples: + - "17831421" + start_date: + type: "string" + description: "Start Query Timestamp in UTC" + examples: + - "2016-12-31 23:59:59" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-stripe:0.1.21" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/stripe" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Stripe Source Spec" + type: "object" + required: + - "client_secret" + - "account_id" + - "start_date" + additionalProperties: false + properties: + client_secret: + type: "string" + pattern: "^(s|r)k_(live|test)_[a-zA-Z0-9]+$" + description: "Stripe API key (usually starts with 'sk_live_'; find yours\ + \ here)." + airbyte_secret: true + account_id: + type: "string" + description: "Your Stripe account ID (starts with 'acct_', find yours here)." + start_date: + type: "string" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + description: "UTC date and time in the format 2017-01-25T00:00:00Z. Any\ + \ data before this date will not be replicated." + examples: + - "2017-01-25T00:00:00Z" + lookback_window_days: + type: "integer" + title: "Lookback Window (in days)" + default: 0 + minimum: 0 + description: "When set, the connector will always reload data from the past\ + \ N days, where N is the value set here. This is useful if your data is\ + \ updated after creation." + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-surveymonkey:0.1.2" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/surveymonkey" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "SurveyMonkey Spec" + type: "object" + required: + - "start_date" + additionalProperties: true + properties: + start_date: + title: "Start Date" + type: "string" + description: "The date from which you'd like to replicate the data" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z?$" + examples: + - "2021-01-01T00:00:00Z" + credentials: + type: "object" + title: "Authentication Type" + oneOf: + - title: "Authenticate via OAuth" + type: "object" + required: + - "client_id" + - "client_secret" + - "access_token" + - "auth_type" + properties: + auth_type: + type: "string" + const: "OAuth" + enum: + - "OAuth" + default: "OAuth" + order: 0 + client_id: + title: "Client ID" + type: "string" + description: "The Client ID of your developer application" + airbyte_secret: true + client_secret: + title: "Client Secret" + type: "string" + description: "The client secret of your developer application" + airbyte_secret: true + access_token: + title: "Access Token" + type: "string" + description: "An access token generated using the above client ID\ + \ and secret" + airbyte_secret: true + - type: "object" + title: "Token Authentication" + additionalProperties: false + required: + - "access_token" + - "auth_type" + properties: + auth_type: + type: "string" + const: "Token" + enum: + - "Token" + default: "Token" + order: 0 + access_token: + type: "string" + airbyte_secret: true + description: "API Token. See the docs for information on how to generate this key." + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] + authSpecification: + auth_type: "oauth2.0" + oauth2Specification: + rootObject: + - "credentials" + - "0" + oauthFlowInitParameters: + - - "client_id" + - - "client_secret" + oauthFlowOutputParameters: + - - "access_token" +- dockerImage: "airbyte/source-tempo:0.2.3" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Tempo Spec" + type: "object" + required: + - "api_token" + additionalProperties: false + properties: + api_token: + type: "string" + description: "Tempo API Token." + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-tiktok-marketing:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/tiktok-marketing" + changelogUrl: "https://docs.airbyte.io/integrations/sources/tiktok-marketing" + connectionSpecification: + title: "TikTok Marketing Source Spec" + type: "object" + properties: + environment: + title: "Environment" + default: "Production" + oneOf: + - title: "Production" + type: "object" + properties: + environment: + title: "Environment" + const: "prod" + type: "string" + app_id: + title: "App Id" + description: "The App id applied by the developer." + type: "string" + secret: + title: "Secret" + description: "The private key of the developer's application." + airbyte_secret: true + type: "string" + required: + - "app_id" + - "secret" + - title: "Sandbox" + type: "object" + properties: + environment: + title: "Environment" + const: "sandbox" + type: "string" + advertiser_id: + title: "Advertiser Id" + description: "The Advertiser ID which generated for the developer's\ + \ Sandbox application." + type: "string" + required: + - "advertiser_id" + type: "object" + access_token: + title: "Access Token" + description: "Long-term Authorized Access Token." + airbyte_secret: true + type: "string" + start_date: + title: "Start Date" + description: "Start Date in format: YYYY-MM-DD." + default: "01-09-2016" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" + type: "string" + required: + - "access_token" + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "overwrite" + - "append" + - "append_dedup" +- dockerImage: "airbyte/source-trello:0.1.1" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/trello" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Trello Spec" + type: "object" + required: + - "token" + - "key" + - "start_date" + additionalProperties: true + properties: + token: + type: "string" + title: "API token" + description: "A Trello token. See the docs for instructions on how to generate it." + airbyte_secret: true + key: + type: "string" + title: "API key" + description: "A Trello token. See the docs for instructions on how to generate it." + airbyte_secret: true + start_date: + type: "string" + title: "Start date" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}.[0-9]{3}Z$" + description: "UTC date and time in the format 2017-01-25T00:00:00Z. Any\ + \ data before this date will not be replicated." + examples: + - "2021-03-01T00:00:00.000Z" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] + authSpecification: + auth_type: "oauth2.0" + oauth2Specification: + rootObject: [] + oauthFlowInitParameters: [] + oauthFlowOutputParameters: + - - "token" + - - "key" +- dockerImage: "airbyte/source-twilio:0.1.1" + spec: + documentationUrl: "https://hub.docker.com/r/airbyte/source-twilio" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Twilio Spec" + type: "object" + required: + - "account_sid" + - "auth_token" + - "start_date" + additionalProperties: false + properties: + account_sid: + title: "Account ID" + description: "Twilio account SID" + airbyte_secret: true + type: "string" + auth_token: + title: "Auth Token" + description: "Twilio Auth Token." + airbyte_secret: true + type: "string" + start_date: + title: "Replication Start Date" + description: "UTC date and time in the format 2020-10-01T00:00:00Z. Any\ + \ data before this date will not be replicated." + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + examples: + - "2020-10-01T00:00:00Z" + type: "string" + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "append" +- dockerImage: "airbyte/source-typeform:0.1.2" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/typeform" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Typeform Spec" + type: "object" + required: + - "token" + - "start_date" + additionalProperties: true + properties: + start_date: + type: "string" + description: "The date you would like to replicate data. Format: YYYY-MM-DDTHH:mm:ss[Z]." + examples: + - "2020-01-01T00:00:00Z" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + token: + type: "string" + description: "The API Token for a Typeform account." + airbyte_secret: true + form_ids: + title: "Form IDs to replicate" + description: "When this parameter is set, the connector will replicate data\ + \ only from the input forms. Otherwise, all forms in your Typeform account\ + \ will be replicated. You can find form IDs in your form URLs. For example,\ + \ in the URL \"https://mysite.typeform.com/to/u6nXL7\" the form_id is\ + \ u6nXL7. You can find form URLs on Share panel" + type: "array" + items: + type: "string" + uniqueItems: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-us-census:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/us-census" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "https://api.census.gov/ Source Spec" + type: "object" + required: + - "api_key" + - "query_path" + additionalProperties: false + properties: + query_params: + type: "string" + description: "The query parameters portion of the GET request, without the\ + \ api key" + pattern: "^\\w+=[\\w,:*]+(&(?!key)\\w+=[\\w,:*]+)*$" + examples: + - "get=NAME,NAICS2017_LABEL,LFO_LABEL,EMPSZES_LABEL,ESTAB,PAYANN,PAYQTR1,EMP&for=us:*&NAICS2017=72&LFO=001&EMPSZES=001" + - "get=MOVEDIN,GEOID1,GEOID2,MOVEDOUT,FULL1_NAME,FULL2_NAME,MOVEDNET&for=county:*" + query_path: + type: "string" + description: "The path portion of the GET request" + pattern: "^data(\\/[\\w\\d]+)+$" + examples: + - "data/2019/cbp" + - "data/2018/acs" + - "data/timeseries/healthins/sahie" + api_key: + type: "string" + description: "Your API Key. Get your key here." + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-zendesk-chat:0.1.3" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/zendesk-chat" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Zendesk Chat Spec" + type: "object" + required: + - "start_date" + - "access_token" + additionalProperties: false + properties: + start_date: + type: "string" + description: "The date from which you'd like to replicate data for Zendesk\ + \ Chat API, in the format YYYY-MM-DDT00:00:00Z." + examples: + - "2021-02-01T00:00:00Z" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + access_token: + type: "string" + description: "The value of the Access Token generated. See the docs for\ + \ more information" + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-zendesk-sunshine:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/zendesk_sunshine" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Zendesk Sunshine Spec" + type: "object" + required: + - "api_token" + - "email" + - "start_date" + - "subdomain" + additionalProperties: false + properties: + api_token: + type: "string" + airbyte_secret: true + description: "API Token. See the docs for information on how to generate this key." + email: + type: "string" + description: "The user email for your Zendesk account" + subdomain: + type: "string" + description: "The subdomain for your Zendesk Account" + start_date: + title: "Start Date" + type: "string" + description: "The date from which you'd like to replicate the data" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + examples: "2021-01-01T00:00:00.000000Z" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-zendesk-support:0.1.3" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/zendesk-support" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Source Zendesk Support Spec" + type: "object" + required: + - "start_date" + - "subdomain" + - "auth_method" + additionalProperties: false + properties: + start_date: + type: "string" + description: "The date from which you'd like to replicate data for Zendesk\ + \ Support API, in the format YYYY-MM-DDT00:00:00Z. All data generated\ + \ after this date will be replicated." + examples: + - "2020-10-15T00:00:00Z" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + subdomain: + type: "string" + description: "The subdomain for your Zendesk Support" + auth_method: + title: "ZenDesk Authorization Method" + type: "object" + default: "api_token" + description: "Zendesk service provides 2 auth method: API token and oAuth2.\ + \ Now only the first one is available. Another one will be added in the\ + \ future" + oneOf: + - title: "API Token" + type: "object" + required: + - "email" + - "api_token" + additionalProperties: false + properties: + auth_method: + type: "string" + const: "api_token" + email: + type: "string" + description: "The user email for your Zendesk account" + api_token: + type: "string" + description: "The value of the API token generated. See the docs\ + \ for more information" + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-zendesk-talk:0.1.2" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/zendesk-talk" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Zendesk Talk Spec" + type: "object" + required: + - "start_date" + - "subdomain" + - "access_token" + - "email" + additionalProperties: false + properties: + start_date: + type: "string" + description: "The date from which you'd like to replicate data for Zendesk\ + \ Talk API, in the format YYYY-MM-DDT00:00:00Z." + examples: + - "2021-04-01T00:00:00Z" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + subdomain: + type: "string" + description: "The subdomain for your Zendesk Talk" + access_token: + type: "string" + description: "The value of the API token generated. See the docs for more information" + airbyte_secret: true + email: + type: "string" + description: "The user email for your Zendesk account" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-zoom-singer:0.2.4" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/zoom" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Source Zoom Singer Spec" + type: "object" + required: + - "jwt" + additionalProperties: false + properties: + jwt: + title: "JWT Token" + type: "string" + description: "Zoom JWT Token. See the docs for more information on how to obtain this key." + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-zuora:0.1.3" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/zuora" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Zuora Connector Configuration" + type: "object" + required: + - "start_date" + - "tenant_endpoint" + - "data_query" + - "client_id" + - "client_secret" + properties: + start_date: + type: "string" + title: "Start Date" + description: "Start Date in format: YYYY-MM-DD" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" + window_in_days: + type: "string" + title: "Query Window (in days)" + description: "The amount of days for each data-chunk begining from start_date.\ + \ Bigger the value - faster the fetch. (0.1 - as for couple of hours,\ + \ 1 - as for a Day; 364 - as for a Year)." + examples: + - "0.5" + - "1" + - "30" + - "60" + - "90" + - "120" + - "200" + - "364" + pattern: "^(0|[1-9]\\d*)(\\.\\d+)?$" + default: "90" + tenant_endpoint: + title: "Tenant Endpoint Location" + type: "string" + description: "Please choose the right endpoint where your Tenant is located.\ + \ More info by this Link" + enum: + - "US Production" + - "US Cloud Production" + - "US API Sandbox" + - "US Cloud API Sandbox" + - "US Central Sandbox" + - "US Performance Test" + - "EU Production" + - "EU API Sandbox" + - "EU Central Sandbox" + data_query: + title: "Data Query Type" + type: "string" + description: "Choose between `Live`, or `Unlimited` - the optimized, replicated\ + \ database at 12 hours freshness for high volume extraction Link" + enum: + - "Live" + - "Unlimited" + default: "Live" + client_id: + type: "string" + title: "Client ID" + description: "Your OAuth user Client ID" + airbyte_secret: true + client_secret: + type: "string" + title: "Client Secret" + description: "Your OAuth user Client Secret" + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] diff --git a/airbyte-config/models/build.gradle b/airbyte-config/models/build.gradle index 271b3fe685e7e..d62c88c7e1633 100644 --- a/airbyte-config/models/build.gradle +++ b/airbyte-config/models/build.gradle @@ -7,10 +7,11 @@ plugins { dependencies { implementation project(':airbyte-json-validation') implementation project(':airbyte-protocol:models') + implementation project(':airbyte-commons') } jsonSchema2Pojo { - sourceType = SourceType.YAMLSCHEMA + sourceType = SourceType.YAMLSCHEMA source = files("${sourceSets.main.output.resourcesDir}/types") targetDirectory = new File(project.buildDir, 'generated/src/gen/java/') diff --git a/airbyte-config/models/src/main/resources/types/DockerImageSpec.yaml b/airbyte-config/models/src/main/resources/types/DockerImageSpec.yaml new file mode 100644 index 0000000000000..0d3becf8e74c6 --- /dev/null +++ b/airbyte-config/models/src/main/resources/types/DockerImageSpec.yaml @@ -0,0 +1,16 @@ +--- +"$schema": http://json-schema.org/draft-07/schema# +"$id": https://github.com/airbytehq/airbyte/blob/master/airbyte-config/models/src/main/resources/types/DockerImageSpec.yaml +title: DockerImageSpec +description: docker image name and the connector specification associated with it +type: object +required: + - dockerImage + - spec +additionalProperties: false +properties: + dockerImage: + type: string + spec: + type: object + existingJavaType: io.airbyte.protocol.models.ConnectorSpecification diff --git a/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/DatabaseConfigPersistence.java b/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/DatabaseConfigPersistence.java index 19035ed42295d..2181bcb162640 100644 --- a/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/DatabaseConfigPersistence.java +++ b/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/DatabaseConfigPersistence.java @@ -366,7 +366,14 @@ ConnectorCounter updateConnectorDefinitions(final DSLContext ctx, final ConnectorInfo connectorInfo = connectorRepositoryToIdVersionMap.get(repository); final JsonNode currentDefinition = connectorInfo.definition; - final Set newFields = getNewFields(currentDefinition, latestDefinition); + + // todo (lmossman) - this logic to remove the "spec" field is temporary; it is necessary to avoid + // breaking users who are actively using an old connector version, otherwise specs from the most + // recent connector versions may be inserted into the db which could be incompatible with the + // version they are actually using. + // Once the faux major version bump has been merged, this "new field" logic will be removed + // entirely. + final Set newFields = Sets.difference(getNewFields(currentDefinition, latestDefinition), Set.of("spec")); // Process connector in use if (connectorRepositoriesInUse.contains(repository)) { diff --git a/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/YamlSeedConfigPersistence.java b/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/YamlSeedConfigPersistence.java index 3bca71d57c2db..2902d15ef1e97 100644 --- a/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/YamlSeedConfigPersistence.java +++ b/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/YamlSeedConfigPersistence.java @@ -5,8 +5,10 @@ package io.airbyte.config.persistence; import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; import com.google.common.collect.ImmutableMap; import com.google.common.io.Resources; +import io.airbyte.commons.docker.DockerUtils; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.util.MoreIterators; import io.airbyte.commons.yaml.Yamls; @@ -19,6 +21,7 @@ import java.nio.charset.StandardCharsets; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -45,11 +48,40 @@ public static YamlSeedConfigPersistence get(final Class seedDefinitionsResour return new YamlSeedConfigPersistence(seedDefinitionsResourceClass); } - private YamlSeedConfigPersistence(final Class seedDefinitionsResourceClass) throws IOException { + private YamlSeedConfigPersistence(final Class seedResourceClass) throws IOException { + final Map sourceDefinitionConfigs = getConfigs(seedResourceClass, SeedType.STANDARD_SOURCE_DEFINITION); + final Map sourceSpecConfigs = getConfigs(seedResourceClass, SeedType.SOURCE_SPEC); + final Map fullSourceDefinitionConfigs = sourceDefinitionConfigs.entrySet().stream() + .collect(Collectors.toMap(Entry::getKey, e -> mergeSpecIntoDefinition(e.getValue(), sourceSpecConfigs))); + + final Map destinationDefinitionConfigs = getConfigs(seedResourceClass, SeedType.STANDARD_DESTINATION_DEFINITION); + final Map destinationSpecConfigs = getConfigs(seedResourceClass, SeedType.DESTINATION_SPEC); + final Map fullDestinationDefinitionConfigs = destinationDefinitionConfigs.entrySet().stream() + .collect(Collectors.toMap(Entry::getKey, e -> mergeSpecIntoDefinition(e.getValue(), destinationSpecConfigs))); + this.allSeedConfigs = ImmutableMap.>builder() - .put(SeedType.STANDARD_SOURCE_DEFINITION, getConfigs(seedDefinitionsResourceClass, SeedType.STANDARD_SOURCE_DEFINITION)) - .put(SeedType.STANDARD_DESTINATION_DEFINITION, getConfigs(seedDefinitionsResourceClass, SeedType.STANDARD_DESTINATION_DEFINITION)) - .build(); + .put(SeedType.STANDARD_SOURCE_DEFINITION, fullSourceDefinitionConfigs) + .put(SeedType.STANDARD_DESTINATION_DEFINITION, fullDestinationDefinitionConfigs).build(); + } + + /** + * Merges the corresponding spec JSON into the definition JSON. This is necessary because specs are + * stored in a separate resource file from definitions. + * + * @param definitionJson JSON of connector definition that is missing a spec + * @param specConfigs map of docker image to JSON of docker image/connector spec pair + * @return JSON of connector definition including the connector spec + */ + private JsonNode mergeSpecIntoDefinition(final JsonNode definitionJson, final Map specConfigs) { + final String dockerImage = DockerUtils.getTaggedImageName( + definitionJson.get("dockerRepository").asText(), + definitionJson.get("dockerImageTag").asText()); + final JsonNode specConfigJson = specConfigs.get(dockerImage); + if (specConfigJson == null || specConfigJson.get("spec") == null) { + throw new UnsupportedOperationException(String.format("There is no seed spec for docker image %s", dockerImage)); + } + ((ObjectNode) definitionJson).set("spec", specConfigJson.get("spec")); + return definitionJson; } @SuppressWarnings("UnstableApiUsage") diff --git a/airbyte-config/persistence/src/test/java/io/airbyte/config/persistence/YamlSeedConfigPersistenceTest.java b/airbyte-config/persistence/src/test/java/io/airbyte/config/persistence/YamlSeedConfigPersistenceTest.java index 8a740ba535688..a6f261628046c 100644 --- a/airbyte-config/persistence/src/test/java/io/airbyte/config/persistence/YamlSeedConfigPersistenceTest.java +++ b/airbyte-config/persistence/src/test/java/io/airbyte/config/persistence/YamlSeedConfigPersistenceTest.java @@ -15,6 +15,7 @@ import io.airbyte.config.StandardSync; import io.airbyte.config.StandardWorkspace; import java.io.IOException; +import java.net.URI; import java.util.Collections; import java.util.Map; import java.util.stream.Stream; @@ -41,6 +42,7 @@ public void testGetConfig() throws Exception { assertEquals("airbyte/source-mysql", mysqlSource.getDockerRepository()); assertEquals("https://docs.airbyte.io/integrations/sources/mysql", mysqlSource.getDocumentationUrl()); assertEquals("mysql.svg", mysqlSource.getIcon()); + assertEquals(URI.create("https://docs.airbyte.io/integrations/sources/mysql"), mysqlSource.getSpec().getDocumentationUrl()); // destination final String s3DestinationId = "4816b78f-1489-44c1-9060-4b19d5fa9362"; @@ -50,13 +52,16 @@ public void testGetConfig() throws Exception { assertEquals("S3", s3Destination.getName()); assertEquals("airbyte/destination-s3", s3Destination.getDockerRepository()); assertEquals("https://docs.airbyte.io/integrations/destinations/s3", s3Destination.getDocumentationUrl()); + assertEquals(URI.create("https://docs.airbyte.io/integrations/destinations/s3"), s3Destination.getSpec().getDocumentationUrl()); } @Test public void testGetInvalidConfig() { - assertThrows(UnsupportedOperationException.class, + assertThrows( + UnsupportedOperationException.class, () -> PERSISTENCE.getConfig(ConfigSchema.STANDARD_SYNC, "invalid_id", StandardSync.class)); - assertThrows(ConfigNotFoundException.class, + assertThrows( + ConfigNotFoundException.class, () -> PERSISTENCE.getConfig(ConfigSchema.STANDARD_SOURCE_DEFINITION, "invalid_id", StandardWorkspace.class)); } diff --git a/airbyte-config/specs/README.md b/airbyte-config/specs/README.md new file mode 100644 index 0000000000000..8d043e1ec9729 --- /dev/null +++ b/airbyte-config/specs/README.md @@ -0,0 +1,16 @@ +# Generating Seed Connector Specs + +The catalog of seeded connector definitions is stored and manually updated in the `airbyte-config/init/src/main/resources/seed/*_definitions.yaml` +files. These manually-maintained connector definitions intentionally _do not_ contain the connector specs, in an effort to keep these files +human-readable and easily-editable, and because specs can be automatically fetched. + +This automatic fetching of connector specs is the goal of the SeedConnectorSpecGenerator. This class reads the connector definitions in +the `airbyte-config/init/src/main/resources/seed/*_definitions.yaml` files, fetches the corresponding specs from the GCS bucket cache, and writes the +specs to the `airbyte-config/init/src/main/resources/seed/*_specs.yaml` files. See the +[SeedConnectorSpecGenerator](src/main/java/io/airbyte/config/specs/SeedConnectorSpecGenerator.java) class for more details. + +Therefore, whenever a connector definition is updated in the `airbyte-config/init/src/main/resources/seed/*_definitions.yaml` files, the +SeedConnectorSpecGenerator should be re-ran to generate the updated connector specs files. To do so, +run `./gradlew :airbyte-config:init:processResources`, or just build the platform project, and commit the changes to your PR. If you do not do this, +the build in the CI will fail because there will be a diff in the generated files as you have not checked in the changes that were applied by the +generator. diff --git a/airbyte-config/specs/build.gradle b/airbyte-config/specs/build.gradle new file mode 100644 index 0000000000000..91d1fd0921706 --- /dev/null +++ b/airbyte-config/specs/build.gradle @@ -0,0 +1,24 @@ +plugins { + id 'java' +} + +dependencies { + implementation 'commons-cli:commons-cli:1.4' + + implementation project(':airbyte-commons') + implementation project(':airbyte-commons-cli') + implementation project(':airbyte-config:models') + implementation project(':airbyte-protocol:models') + implementation project(':airbyte-json-validation') +} + +task generateSeedConnectorSpecs(type: JavaExec, dependsOn: compileJava) { + classpath = sourceSets.main.runtimeClasspath + + mainClass = 'io.airbyte.config.specs.SeedConnectorSpecGenerator' + + args '--seed-root' + args new File(project(":airbyte-config:init").projectDir, '/src/main/resources/seed') +} + +project(":airbyte-config:init").tasks.processResources.dependsOn(generateSeedConnectorSpecs) diff --git a/airbyte-config/specs/src/main/java/io/airbyte/config/specs/GcsBucketSpecFetcher.java b/airbyte-config/specs/src/main/java/io/airbyte/config/specs/GcsBucketSpecFetcher.java new file mode 100644 index 0000000000000..832326c551c46 --- /dev/null +++ b/airbyte-config/specs/src/main/java/io/airbyte/config/specs/GcsBucketSpecFetcher.java @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.config.specs; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.api.client.util.Preconditions; +import com.google.cloud.storage.Blob; +import com.google.cloud.storage.Storage; +import io.airbyte.commons.json.Jsons; +import io.airbyte.protocol.models.AirbyteProtocolSchema; +import io.airbyte.protocol.models.ConnectorSpecification; +import io.airbyte.validation.json.JsonSchemaValidator; +import io.airbyte.validation.json.JsonValidationException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Path; +import java.util.Optional; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class GcsBucketSpecFetcher { + + private static final Logger LOGGER = LoggerFactory.getLogger(GcsBucketSpecFetcher.class); + + private final Storage storage; + private final String bucketName; + + public GcsBucketSpecFetcher(final Storage storage, final String bucketName) { + this.storage = storage; + this.bucketName = bucketName; + } + + public String getBucketName() { + return bucketName; + } + + public Optional attemptFetch(final String dockerImage) { + final String[] dockerImageComponents = dockerImage.split(":"); + Preconditions.checkArgument(dockerImageComponents.length == 2, "Invalidate docker image: " + dockerImage); + final String dockerImageName = dockerImageComponents[0]; + final String dockerImageTag = dockerImageComponents[1]; + + final Path specPath = Path.of("specs").resolve(dockerImageName).resolve(dockerImageTag).resolve("spec.json"); + LOGGER.debug("Checking path for cached spec: {} {}", bucketName, specPath); + final Blob specAsBlob = storage.get(bucketName, specPath.toString()); + + // if null it means the object was not found. + if (specAsBlob == null) { + LOGGER.debug("Spec not found in bucket storage"); + return Optional.empty(); + } + + final String specAsString = new String(specAsBlob.getContent(), StandardCharsets.UTF_8); + try { + validateConfig(Jsons.deserialize(specAsString)); + } catch (final JsonValidationException e) { + LOGGER.error("Received invalid spec from bucket store. {}", e.toString()); + return Optional.empty(); + } + return Optional.of(Jsons.deserialize(specAsString, ConnectorSpecification.class)); + } + + private static void validateConfig(final JsonNode json) throws JsonValidationException { + final JsonSchemaValidator jsonSchemaValidator = new JsonSchemaValidator(); + final JsonNode specJsonSchema = JsonSchemaValidator.getSchema(AirbyteProtocolSchema.PROTOCOL.getFile(), "ConnectorSpecification"); + jsonSchemaValidator.ensure(specJsonSchema, json); + } + +} diff --git a/airbyte-config/specs/src/main/java/io/airbyte/config/specs/SeedConnectorSpecGenerator.java b/airbyte-config/specs/src/main/java/io/airbyte/config/specs/SeedConnectorSpecGenerator.java new file mode 100644 index 0000000000000..05a273b08d84a --- /dev/null +++ b/airbyte-config/specs/src/main/java/io/airbyte/config/specs/SeedConnectorSpecGenerator.java @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.config.specs; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.cloud.storage.StorageOptions; +import com.google.common.annotations.VisibleForTesting; +import io.airbyte.commons.cli.Clis; +import io.airbyte.commons.io.IOs; +import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.util.MoreIterators; +import io.airbyte.commons.yaml.Yamls; +import io.airbyte.config.DockerImageSpec; +import io.airbyte.config.EnvConfigs; +import io.airbyte.protocol.models.ConnectorSpecification; +import java.io.IOException; +import java.nio.file.Path; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.Options; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This script is responsible for ensuring that up-to-date {@link ConnectorSpecification}s for every + * connector definition in the seed are stored in a corresponding resource file, for the purpose of + * seeding the specs into the config database on server startup. See + * ./airbyte-config/specs/readme.md for more details on how this class is run and how it fits into + * the project. + *

+ * Specs are stored in a separate file from the definitions in an effort to keep the definitions + * yaml files human-readable and easily-editable, as specs can be rather large. + *

+ * Specs are fetched from the GCS spec cache bucket, so if any specs are missing from the bucket + * then this will fail. Note that this script only pulls specs from the bucket cache; it never + * pushes specs to the bucket. Since this script runs at build time, the decision was to depend on + * the bucket cache rather than running a docker container to fetch the spec during the build which + * could be slow and unwieldy. If there is a failure, check the bucket cache and figure out how to + * get the correct spec in there. + */ +public class SeedConnectorSpecGenerator { + + private static final String DOCKER_REPOSITORY_FIELD = "dockerRepository"; + private static final String DOCKER_IMAGE_TAG_FIELD = "dockerImageTag"; + private static final String DOCKER_IMAGE_FIELD = "dockerImage"; + private static final String SPEC_FIELD = "spec"; + private static final String SPEC_BUCKET_NAME = new EnvConfigs().getSpecCacheBucket(); + + private static final Logger LOGGER = LoggerFactory.getLogger(SeedConnectorSpecGenerator.class); + + private static final Option SEED_ROOT_OPTION = Option.builder("s").longOpt("seed-root").hasArg(true).required(true) + .desc("path to where seed resource files are stored").build(); + private static final Options OPTIONS = new Options().addOption(SEED_ROOT_OPTION); + + private final GcsBucketSpecFetcher bucketSpecFetcher; + + public SeedConnectorSpecGenerator(final GcsBucketSpecFetcher bucketSpecFetcher) { + this.bucketSpecFetcher = bucketSpecFetcher; + } + + public static void main(final String[] args) throws Exception { + final CommandLine parsed = Clis.parse(args, OPTIONS); + final Path outputRoot = Path.of(parsed.getOptionValue(SEED_ROOT_OPTION.getOpt())); + + final GcsBucketSpecFetcher bucketSpecFetcher = new GcsBucketSpecFetcher(StorageOptions.getDefaultInstance().getService(), SPEC_BUCKET_NAME); + final SeedConnectorSpecGenerator seedConnectorSpecGenerator = new SeedConnectorSpecGenerator(bucketSpecFetcher); + seedConnectorSpecGenerator.run(outputRoot, SeedConnectorType.SOURCE); + seedConnectorSpecGenerator.run(outputRoot, SeedConnectorType.DESTINATION); + } + + public void run(final Path seedRoot, final SeedConnectorType seedConnectorType) throws IOException { + LOGGER.info("Updating seeded {} definition specs if necessary...", seedConnectorType.name()); + + final JsonNode seedDefinitionsJson = yamlToJson(seedRoot, seedConnectorType.getDefinitionFileName()); + final JsonNode seedSpecsJson = yamlToJson(seedRoot, seedConnectorType.getSpecFileName()); + + final List updatedSeedSpecs = fetchUpdatedSeedSpecs(seedDefinitionsJson, seedSpecsJson); + + final String outputString = String.format("# This file is generated by %s.\n", this.getClass().getName()) + + "# Do NOT edit this file directly. See generator class for more details.\n" + + Yamls.serialize(updatedSeedSpecs); + final Path outputPath = IOs.writeFile(seedRoot.resolve(seedConnectorType.getSpecFileName()), outputString); + + LOGGER.info("Finished updating {}", outputPath); + } + + private JsonNode yamlToJson(final Path root, final String fileName) { + final String yamlString = IOs.readFile(root, fileName); + return Yamls.deserialize(yamlString); + } + + @VisibleForTesting + final List fetchUpdatedSeedSpecs(final JsonNode seedDefinitions, final JsonNode currentSeedSpecs) { + final List seedDefinitionsDockerImages = MoreIterators.toList(seedDefinitions.elements()) + .stream() + .map(json -> String.format("%s:%s", json.get(DOCKER_REPOSITORY_FIELD).asText(), json.get(DOCKER_IMAGE_TAG_FIELD).asText())) + .collect(Collectors.toList()); + + final Map currentSeedImageToSpec = MoreIterators.toList(currentSeedSpecs.elements()) + .stream() + .collect(Collectors.toMap( + json -> json.get(DOCKER_IMAGE_FIELD).asText(), + json -> new DockerImageSpec().withDockerImage(json.get(DOCKER_IMAGE_FIELD).asText()) + .withSpec(Jsons.object(json.get(SPEC_FIELD), ConnectorSpecification.class)))); + + return seedDefinitionsDockerImages + .stream() + .map(dockerImage -> currentSeedImageToSpec.containsKey(dockerImage) ? currentSeedImageToSpec.get(dockerImage) : fetchSpecFromGCS(dockerImage)) + .collect(Collectors.toList()); + } + + private DockerImageSpec fetchSpecFromGCS(final String dockerImage) { + LOGGER.info("Seeded spec not found for docker image {} - fetching from GCS bucket {}...", dockerImage, bucketSpecFetcher.getBucketName()); + final ConnectorSpecification spec = bucketSpecFetcher.attemptFetch(dockerImage) + .orElseThrow(() -> new RuntimeException(String.format( + "Failed to fetch valid spec file for docker image %s from GCS bucket %s", + dockerImage, + bucketSpecFetcher.getBucketName()))); + return new DockerImageSpec().withDockerImage(dockerImage).withSpec(spec); + } + +} diff --git a/airbyte-config/specs/src/main/java/io/airbyte/config/specs/SeedConnectorType.java b/airbyte-config/specs/src/main/java/io/airbyte/config/specs/SeedConnectorType.java new file mode 100644 index 0000000000000..36d1326af215b --- /dev/null +++ b/airbyte-config/specs/src/main/java/io/airbyte/config/specs/SeedConnectorType.java @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.config.specs; + +public enum SeedConnectorType { + + SOURCE( + "source_definitions.yaml", + "source_specs.yaml"), + DESTINATION( + "destination_definitions.yaml", + "destination_specs.yaml"); + + private final String definitionFileName; + private final String specFileName; + + SeedConnectorType(final String definitionFileName, + final String specFileName) { + this.definitionFileName = definitionFileName; + this.specFileName = specFileName; + } + + public String getDefinitionFileName() { + return definitionFileName; + } + + public String getSpecFileName() { + return specFileName; + } + +} diff --git a/airbyte-config/specs/src/test/java/io/airbyte/config/specs/GcsBucketSpecFetcherTest.java b/airbyte-config/specs/src/test/java/io/airbyte/config/specs/GcsBucketSpecFetcherTest.java new file mode 100644 index 0000000000000..25e16bea545bf --- /dev/null +++ b/airbyte-config/specs/src/test/java/io/airbyte/config/specs/GcsBucketSpecFetcherTest.java @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.config.specs; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import com.google.cloud.storage.Blob; +import com.google.cloud.storage.Storage; +import com.google.common.collect.ImmutableMap; +import io.airbyte.commons.json.Jsons; +import io.airbyte.protocol.models.ConnectorSpecification; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Path; +import java.util.Optional; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +class GcsBucketSpecFetcherTest { + + private static final String BUCKET_NAME = "bucket"; + private static final String DOCKER_REPOSITORY = "image"; + private static final String DOCKER_IMAGE_TAG = "0.1.0"; + private static final String DOCKER_IMAGE = DOCKER_REPOSITORY + ":" + DOCKER_IMAGE_TAG; + private static final String SPEC_PATH = Path.of("specs").resolve(DOCKER_REPOSITORY).resolve(DOCKER_IMAGE_TAG).resolve("spec.json").toString(); + + private Storage storage; + private Blob specBlob; + private final ConnectorSpecification spec = new ConnectorSpecification().withConnectionSpecification(Jsons.jsonNode(ImmutableMap.of("foo", "bar"))); + + @SuppressWarnings("unchecked") + @BeforeEach + void setup() throws IOException { + storage = mock(Storage.class); + + final byte[] specBytes = Jsons.toBytes(Jsons.jsonNode(spec)); + specBlob = mock(Blob.class); + when(specBlob.getContent()).thenReturn(specBytes); + } + + @Test + void testGetsSpecIfPresent() throws IOException { + when(storage.get(BUCKET_NAME, SPEC_PATH)).thenReturn(specBlob); + + final GcsBucketSpecFetcher bucketSpecFetcher = new GcsBucketSpecFetcher(storage, BUCKET_NAME); + final Optional returnedSpec = bucketSpecFetcher.attemptFetch(DOCKER_IMAGE); + + assertTrue(returnedSpec.isPresent()); + assertEquals(spec, returnedSpec.get()); + } + + @Test + void testReturnsEmptyIfNotPresent() throws IOException { + when(storage.get(BUCKET_NAME, SPEC_PATH)).thenReturn(null); + + final GcsBucketSpecFetcher bucketSpecFetcher = new GcsBucketSpecFetcher(storage, BUCKET_NAME); + final Optional returnedSpec = bucketSpecFetcher.attemptFetch(DOCKER_IMAGE); + + assertTrue(returnedSpec.isEmpty()); + } + + @Test + void testReturnsEmptyIfInvalidSpec() throws IOException { + final Blob invalidSpecBlob = mock(Blob.class); + when(invalidSpecBlob.getContent()).thenReturn("{\"notASpec\": true}".getBytes(StandardCharsets.UTF_8)); + when(storage.get(BUCKET_NAME, SPEC_PATH)).thenReturn(invalidSpecBlob); + + final GcsBucketSpecFetcher bucketSpecFetcher = new GcsBucketSpecFetcher(storage, BUCKET_NAME); + final Optional returnedSpec = bucketSpecFetcher.attemptFetch(DOCKER_IMAGE); + + assertTrue(returnedSpec.isEmpty()); + } + +} diff --git a/airbyte-config/specs/src/test/java/io/airbyte/config/specs/SeedConnectorSpecGeneratorTest.java b/airbyte-config/specs/src/test/java/io/airbyte/config/specs/SeedConnectorSpecGeneratorTest.java new file mode 100644 index 0000000000000..0925608a2f62f --- /dev/null +++ b/airbyte-config/specs/src/test/java/io/airbyte/config/specs/SeedConnectorSpecGeneratorTest.java @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.config.specs; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; +import io.airbyte.commons.json.Jsons; +import io.airbyte.config.DockerImageSpec; +import io.airbyte.config.StandardDestinationDefinition; +import io.airbyte.protocol.models.ConnectorSpecification; +import java.util.Arrays; +import java.util.List; +import java.util.Optional; +import java.util.UUID; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +class SeedConnectorSpecGeneratorTest { + + private static final UUID DEF_ID1 = java.util.UUID.randomUUID(); + private static final UUID DEF_ID2 = java.util.UUID.randomUUID(); + private static final String CONNECTOR_NAME1 = "connector1"; + private static final String CONNECTOR_NAME2 = "connector2"; + private static final String DOCUMENTATION_URL = "https://wwww.example.com"; + private static final String DOCKER_REPOSITORY1 = "airbyte/connector1"; + private static final String DOCKER_REPOSITORY2 = "airbyte/connector2"; + private static final String DOCKER_TAG1 = "0.1.0"; + private static final String DOCKER_TAG2 = "0.2.0"; + private static final String BUCKET_NAME = "bucket"; + + private SeedConnectorSpecGenerator seedConnectorSpecGenerator; + private GcsBucketSpecFetcher bucketSpecFetcherMock; + + @BeforeEach + void setup() { + bucketSpecFetcherMock = mock(GcsBucketSpecFetcher.class); + when(bucketSpecFetcherMock.getBucketName()).thenReturn(BUCKET_NAME); + + seedConnectorSpecGenerator = new SeedConnectorSpecGenerator(bucketSpecFetcherMock); + } + + @Test + void testMissingSpecIsFetched() { + final StandardDestinationDefinition sourceDefinition1 = new StandardDestinationDefinition() + .withDestinationDefinitionId(DEF_ID1) + .withDockerRepository(DOCKER_REPOSITORY1) + .withDockerImageTag(DOCKER_TAG1) + .withName(CONNECTOR_NAME1) + .withDocumentationUrl(DOCUMENTATION_URL); + final ConnectorSpecification spec1 = new ConnectorSpecification().withConnectionSpecification(Jsons.jsonNode(ImmutableMap.of("foo1", "bar1"))); + final DockerImageSpec dockerImageSpec1 = new DockerImageSpec().withDockerImage(DOCKER_REPOSITORY1 + ":" + DOCKER_TAG1).withSpec(spec1); + + final StandardDestinationDefinition sourceDefinition2 = new StandardDestinationDefinition() + .withDestinationDefinitionId(DEF_ID2) + .withDockerRepository(DOCKER_REPOSITORY2) + .withDockerImageTag(DOCKER_TAG2) + .withName(CONNECTOR_NAME2) + .withDocumentationUrl(DOCUMENTATION_URL); + final ConnectorSpecification spec2 = new ConnectorSpecification().withConnectionSpecification(Jsons.jsonNode(ImmutableMap.of("foo2", "bar2"))); + final DockerImageSpec dockerImageSpec2 = new DockerImageSpec().withDockerImage(DOCKER_REPOSITORY2 + ":" + DOCKER_TAG2).withSpec(spec2); + + final JsonNode seedDefinitions = Jsons.jsonNode(Arrays.asList(sourceDefinition1, sourceDefinition2)); + final JsonNode seedSpecs = Jsons.jsonNode(List.of(dockerImageSpec1)); + + when(bucketSpecFetcherMock.attemptFetch(DOCKER_REPOSITORY2 + ":" + DOCKER_TAG2)).thenReturn(Optional.of(spec2)); + + final List actualSeedSpecs = seedConnectorSpecGenerator.fetchUpdatedSeedSpecs(seedDefinitions, seedSpecs); + final List expectedSeedSpecs = Arrays.asList(dockerImageSpec1, dockerImageSpec2); + + assertEquals(expectedSeedSpecs, actualSeedSpecs); + } + + @Test + void testOutdatedSpecIsFetched() { + final StandardDestinationDefinition sourceDefinition = new StandardDestinationDefinition() + .withDestinationDefinitionId(DEF_ID1) + .withDockerRepository(DOCKER_REPOSITORY1) + .withDockerImageTag(DOCKER_TAG2) + .withName(CONNECTOR_NAME1) + .withDocumentationUrl(DOCUMENTATION_URL); + final ConnectorSpecification outdatedSpec = new ConnectorSpecification().withConnectionSpecification(Jsons.jsonNode(ImmutableMap.of( + "foo1", + "bar1"))); + final DockerImageSpec outdatedDockerImageSpec = new DockerImageSpec().withDockerImage(DOCKER_REPOSITORY1 + ":" + DOCKER_TAG1) + .withSpec(outdatedSpec); + + final JsonNode seedDefinitions = Jsons.jsonNode(List.of(sourceDefinition)); + final JsonNode seedSpecs = Jsons.jsonNode(List.of(outdatedDockerImageSpec)); + + final ConnectorSpecification newSpec = new ConnectorSpecification().withConnectionSpecification(Jsons.jsonNode(ImmutableMap.of("foo2", "bar2"))); + final DockerImageSpec newDockerImageSpec = new DockerImageSpec().withDockerImage(DOCKER_REPOSITORY1 + ":" + DOCKER_TAG2).withSpec(newSpec); + + when(bucketSpecFetcherMock.attemptFetch(DOCKER_REPOSITORY1 + ":" + DOCKER_TAG2)).thenReturn(Optional.of(newSpec)); + + final List actualSeedSpecs = seedConnectorSpecGenerator.fetchUpdatedSeedSpecs(seedDefinitions, seedSpecs); + final List expectedSeedSpecs = List.of(newDockerImageSpec); + + assertEquals(expectedSeedSpecs, actualSeedSpecs); + } + + @Test + void testExtraneousSpecIsRemoved() { + final StandardDestinationDefinition sourceDefinition = new StandardDestinationDefinition() + .withDestinationDefinitionId(DEF_ID1) + .withDockerRepository(DOCKER_REPOSITORY1) + .withDockerImageTag(DOCKER_TAG1) + .withName(CONNECTOR_NAME1) + .withDocumentationUrl(DOCUMENTATION_URL); + final ConnectorSpecification spec1 = new ConnectorSpecification().withConnectionSpecification(Jsons.jsonNode(ImmutableMap.of("foo1", "bar1"))); + final DockerImageSpec dockerImageSpec1 = new DockerImageSpec().withDockerImage(DOCKER_REPOSITORY1 + ":" + DOCKER_TAG1).withSpec(spec1); + + final ConnectorSpecification spec2 = new ConnectorSpecification().withConnectionSpecification(Jsons.jsonNode(ImmutableMap.of("foo2", "bar2"))); + final DockerImageSpec dockerImageSpec2 = new DockerImageSpec().withDockerImage(DOCKER_REPOSITORY2 + ":" + DOCKER_TAG2).withSpec(spec2); + + final JsonNode seedDefinitions = Jsons.jsonNode(List.of(sourceDefinition)); + final JsonNode seedSpecs = Jsons.jsonNode(Arrays.asList(dockerImageSpec1, dockerImageSpec2)); + + final List actualSeedSpecs = seedConnectorSpecGenerator.fetchUpdatedSeedSpecs(seedDefinitions, seedSpecs); + final List expectedSeedSpecs = List.of(dockerImageSpec1); + + assertEquals(expectedSeedSpecs, actualSeedSpecs); + } + + @Test + void testNoFetchIsPerformedIfAllSpecsUpToDate() { + final StandardDestinationDefinition sourceDefinition = new StandardDestinationDefinition() + .withDestinationDefinitionId(DEF_ID1) + .withDockerRepository(DOCKER_REPOSITORY1) + .withDockerImageTag(DOCKER_TAG1) + .withName(CONNECTOR_NAME1) + .withDocumentationUrl(DOCUMENTATION_URL); + final ConnectorSpecification spec = new ConnectorSpecification().withConnectionSpecification(Jsons.jsonNode(ImmutableMap.of("foo", "bar"))); + final DockerImageSpec dockerImageSpec = new DockerImageSpec().withDockerImage(DOCKER_REPOSITORY1 + ":" + DOCKER_TAG1).withSpec(spec); + + final JsonNode seedDefinitions = Jsons.jsonNode(List.of(sourceDefinition)); + final JsonNode seedSpecs = Jsons.jsonNode(List.of(dockerImageSpec)); + + final List actualSeedSpecs = seedConnectorSpecGenerator.fetchUpdatedSeedSpecs(seedDefinitions, seedSpecs); + final List expectedSeedSpecs = List.of(dockerImageSpec); + + assertEquals(expectedSeedSpecs, actualSeedSpecs); + verify(bucketSpecFetcherMock, never()).attemptFetch(any()); + } + +} diff --git a/airbyte-json-validation/build.gradle b/airbyte-json-validation/build.gradle index 4881fc079a464..2114a4a053e8b 100644 --- a/airbyte-json-validation/build.gradle +++ b/airbyte-json-validation/build.gradle @@ -6,4 +6,6 @@ dependencies { implementation 'com.networknt:json-schema-validator:1.0.42' // needed so that we can follow $ref when parsing json. jackson does not support this natively. implementation 'me.andrz.jackson:jackson-json-reference-core:0.3.2' + + implementation project(':airbyte-commons') } diff --git a/airbyte-protocol/models/build.gradle b/airbyte-protocol/models/build.gradle index e4199332b848d..85f8d48cac5ab 100644 --- a/airbyte-protocol/models/build.gradle +++ b/airbyte-protocol/models/build.gradle @@ -7,6 +7,8 @@ plugins { dependencies { implementation 'javax.validation:validation-api:1.1.0.Final' implementation 'org.apache.commons:commons-lang3:3.11' + + implementation project(':airbyte-commons') } jsonSchema2Pojo { diff --git a/airbyte-scheduler/client/build.gradle b/airbyte-scheduler/client/build.gradle index d90a0262c97c7..5e319c0418efa 100644 --- a/airbyte-scheduler/client/build.gradle +++ b/airbyte-scheduler/client/build.gradle @@ -5,6 +5,7 @@ plugins { dependencies { implementation project(':airbyte-config:models') implementation project(':airbyte-config:persistence') + implementation project(':airbyte-config:specs') implementation project(':airbyte-json-validation') implementation project(':airbyte-protocol:models') implementation project(':airbyte-scheduler:models') diff --git a/airbyte-scheduler/client/src/main/java/io/airbyte/scheduler/client/BucketSpecCacheSchedulerClient.java b/airbyte-scheduler/client/src/main/java/io/airbyte/scheduler/client/BucketSpecCacheSchedulerClient.java index a615643d0830e..bcdc972c2cb4a 100644 --- a/airbyte-scheduler/client/src/main/java/io/airbyte/scheduler/client/BucketSpecCacheSchedulerClient.java +++ b/airbyte-scheduler/client/src/main/java/io/airbyte/scheduler/client/BucketSpecCacheSchedulerClient.java @@ -4,27 +4,17 @@ package io.airbyte.scheduler.client; -import com.fasterxml.jackson.databind.JsonNode; -import com.google.api.client.util.Preconditions; -import com.google.cloud.storage.Blob; -import com.google.cloud.storage.Storage; import com.google.cloud.storage.StorageOptions; import com.google.common.annotations.VisibleForTesting; -import io.airbyte.commons.json.Jsons; import io.airbyte.config.DestinationConnection; import io.airbyte.config.JobConfig.ConfigType; import io.airbyte.config.SourceConnection; import io.airbyte.config.StandardCheckConnectionOutput; +import io.airbyte.config.specs.GcsBucketSpecFetcher; import io.airbyte.protocol.models.AirbyteCatalog; -import io.airbyte.protocol.models.AirbyteProtocolSchema; import io.airbyte.protocol.models.ConnectorSpecification; -import io.airbyte.validation.json.JsonSchemaValidator; -import io.airbyte.validation.json.JsonValidationException; import java.io.IOException; -import java.nio.charset.StandardCharsets; -import java.nio.file.Path; import java.util.Optional; -import java.util.function.Function; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -33,17 +23,15 @@ public class BucketSpecCacheSchedulerClient implements SynchronousSchedulerClien private static final Logger LOGGER = LoggerFactory.getLogger(BucketSpecCacheSchedulerClient.class); private final SynchronousSchedulerClient client; - private final Function> bucketSpecFetcher; + private final GcsBucketSpecFetcher bucketSpecFetcher; public BucketSpecCacheSchedulerClient(final SynchronousSchedulerClient client, final String bucketName) { - this( - client, - dockerImage -> attemptToFetchSpecFromBucket(StorageOptions.getDefaultInstance().getService(), bucketName, dockerImage)); + this.client = client; + this.bucketSpecFetcher = new GcsBucketSpecFetcher(StorageOptions.getDefaultInstance().getService(), bucketName); } @VisibleForTesting - BucketSpecCacheSchedulerClient(final SynchronousSchedulerClient client, - final Function> bucketSpecFetcher) { + BucketSpecCacheSchedulerClient(final SynchronousSchedulerClient client, final GcsBucketSpecFetcher bucketSpecFetcher) { this.client = client; this.bucketSpecFetcher = bucketSpecFetcher; } @@ -72,7 +60,7 @@ public SynchronousResponse createGetSpecJob(final String Optional cachedSpecOptional; // never want to fail because we could not fetch from off board storage. try { - cachedSpecOptional = bucketSpecFetcher.apply(dockerImage); + cachedSpecOptional = bucketSpecFetcher.attemptFetch(dockerImage); LOGGER.debug("Spec bucket cache: Call to cache did not fail."); } catch (final RuntimeException e) { cachedSpecOptional = Optional.empty(); @@ -88,38 +76,4 @@ public SynchronousResponse createGetSpecJob(final String } } - private static void validateConfig(final JsonNode json) throws JsonValidationException { - final JsonSchemaValidator jsonSchemaValidator = new JsonSchemaValidator(); - final JsonNode specJsonSchema = JsonSchemaValidator.getSchema(AirbyteProtocolSchema.PROTOCOL.getFile(), "ConnectorSpecification"); - jsonSchemaValidator.ensure(specJsonSchema, json); - } - - public static Optional attemptToFetchSpecFromBucket(final Storage storage, - final String bucketName, - final String dockerImage) { - final String[] dockerImageComponents = dockerImage.split(":"); - Preconditions.checkArgument(dockerImageComponents.length == 2, "Invalidate docker image: " + dockerImage); - final String dockerImageName = dockerImageComponents[0]; - final String dockerImageTag = dockerImageComponents[1]; - - final Path specPath = Path.of("specs").resolve(dockerImageName).resolve(dockerImageTag).resolve("spec.json"); - LOGGER.debug("Checking path for cached spec: {} {}", bucketName, specPath); - final Blob specAsBlob = storage.get(bucketName, specPath.toString()); - - // if null it means the object was not found. - if (specAsBlob == null) { - LOGGER.debug("Spec not found in bucket storage"); - return Optional.empty(); - } - - final String specAsString = new String(specAsBlob.getContent(), StandardCharsets.UTF_8); - try { - validateConfig(Jsons.deserialize(specAsString)); - } catch (final JsonValidationException e) { - LOGGER.error("Received invalid spec from bucket store. {}", e.toString()); - return Optional.empty(); - } - return Optional.of(Jsons.deserialize(specAsString, ConnectorSpecification.class)); - } - } diff --git a/airbyte-scheduler/client/src/test/java/io/airbyte/scheduler/client/BucketSpecCacheSchedulerClientTest.java b/airbyte-scheduler/client/src/test/java/io/airbyte/scheduler/client/BucketSpecCacheSchedulerClientTest.java index cf21fd2b160df..01f4595b94685 100644 --- a/airbyte-scheduler/client/src/test/java/io/airbyte/scheduler/client/BucketSpecCacheSchedulerClientTest.java +++ b/airbyte-scheduler/client/src/test/java/io/airbyte/scheduler/client/BucketSpecCacheSchedulerClientTest.java @@ -10,10 +10,10 @@ import static org.mockito.Mockito.verifyNoInteractions; import static org.mockito.Mockito.when; +import io.airbyte.config.specs.GcsBucketSpecFetcher; import io.airbyte.protocol.models.ConnectorSpecification; import java.io.IOException; import java.util.Optional; -import java.util.function.Function; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; @@ -21,18 +21,18 @@ class BucketSpecCacheSchedulerClientTest { private SynchronousSchedulerClient defaultClientMock; - private Function> bucketSpecFetcherMock; + private GcsBucketSpecFetcher bucketSpecFetcherMock; @SuppressWarnings("unchecked") @BeforeEach void setup() { defaultClientMock = mock(SynchronousSchedulerClient.class); - bucketSpecFetcherMock = mock(Function.class); + bucketSpecFetcherMock = mock(GcsBucketSpecFetcher.class); } @Test void testGetsSpecIfPresent() throws IOException { - when(bucketSpecFetcherMock.apply("source-pokeapi:0.1.0")).thenReturn(Optional.of(new ConnectorSpecification())); + when(bucketSpecFetcherMock.attemptFetch("source-pokeapi:0.1.0")).thenReturn(Optional.of(new ConnectorSpecification())); final BucketSpecCacheSchedulerClient client = new BucketSpecCacheSchedulerClient(defaultClientMock, bucketSpecFetcherMock); assertEquals(new ConnectorSpecification(), client.createGetSpecJob("source-pokeapi:0.1.0").getOutput()); verifyNoInteractions(defaultClientMock); @@ -40,7 +40,7 @@ void testGetsSpecIfPresent() throws IOException { @Test void testCallsDelegateIfNotPresent() throws IOException { - when(bucketSpecFetcherMock.apply("source-pokeapi:0.1.0")).thenReturn(Optional.empty()); + when(bucketSpecFetcherMock.attemptFetch("source-pokeapi:0.1.0")).thenReturn(Optional.empty()); when(defaultClientMock.createGetSpecJob("source-pokeapi:0.1.0")) .thenReturn(new SynchronousResponse<>(new ConnectorSpecification(), mock(SynchronousJobMetadata.class))); final BucketSpecCacheSchedulerClient client = new BucketSpecCacheSchedulerClient(defaultClientMock, bucketSpecFetcherMock); @@ -49,7 +49,7 @@ void testCallsDelegateIfNotPresent() throws IOException { @Test void testCallsDelegateIfException() throws IOException { - when(bucketSpecFetcherMock.apply("source-pokeapi:0.1.0")).thenThrow(new RuntimeException("induced exception")); + when(bucketSpecFetcherMock.attemptFetch("source-pokeapi:0.1.0")).thenThrow(new RuntimeException("induced exception")); when(defaultClientMock.createGetSpecJob("source-pokeapi:0.1.0")) .thenReturn(new SynchronousResponse<>(new ConnectorSpecification(), mock(SynchronousJobMetadata.class))); final BucketSpecCacheSchedulerClient client = new BucketSpecCacheSchedulerClient(defaultClientMock, bucketSpecFetcherMock); @@ -62,7 +62,7 @@ void testCallsDelegateIfException() throws IOException { @Disabled @Test void testGetsSpecFromBucket() throws IOException { - when(bucketSpecFetcherMock.apply("source-pokeapi:0.1.0")).thenReturn(Optional.of(new ConnectorSpecification())); + when(bucketSpecFetcherMock.attemptFetch("source-pokeapi:0.1.0")).thenReturn(Optional.of(new ConnectorSpecification())); // todo (cgardens) - replace with prod bucket. final BucketSpecCacheSchedulerClient client = new BucketSpecCacheSchedulerClient(defaultClientMock, "cg-specs"); final ConnectorSpecification actualSpec = client.createGetSpecJob("source-pokeapi:0.1.0").getOutput(); diff --git a/airbyte-server/build.gradle b/airbyte-server/build.gradle index edc7c55fb2506..a38db0edc5dbe 100644 --- a/airbyte-server/build.gradle +++ b/airbyte-server/build.gradle @@ -66,6 +66,7 @@ dependencies { implementation project(':airbyte-config:init') implementation project(':airbyte-config:models') implementation project(':airbyte-config:persistence') + implementation project(':airbyte-config:specs') implementation project(':airbyte-db:lib') implementation project(":airbyte-json-validation") implementation project(':airbyte-migration') diff --git a/build.gradle b/build.gradle index 271b27c23feb4..39d9957cd26b1 100644 --- a/build.gradle +++ b/build.gradle @@ -73,7 +73,8 @@ def createSpotlessTarget = { pattern -> 'normalization_test_output', 'tools', 'secrets', - 'charts' // Helm charts often have injected template strings that will fail general linting. Helm linting is done separately. + 'charts', // Helm charts often have injected template strings that will fail general linting. Helm linting is done separately. + 'resources/seed/*_specs.yaml' ] if (System.getenv().containsKey("SUB_BUILD")) { diff --git a/settings.gradle b/settings.gradle index 4d5b7d79afa0a..44baedba5f042 100644 --- a/settings.gradle +++ b/settings.gradle @@ -42,6 +42,7 @@ include ':airbyte-workers' // reused by acceptance tests in connector base. include ':airbyte-analytics' // transitively used by airbyte-workers. include ':airbyte-config:init' // transitively used by airbyte-workers. include ':airbyte-config:persistence' // transitively used by airbyte-workers. +include ':airbyte-config:specs' // transitively used by airbyte-workers. include ':airbyte-db:jooq' // transitively used by airbyte-workers. include ':airbyte-notification' // transitively used by airbyte-workers. include ':airbyte-scheduler:models' // transitively used by airbyte-workers. From 90451eb0752fa167540ad17b5d083e872bef826c Mon Sep 17 00:00:00 2001 From: Yevhenii <34103125+yevhenii-ldv@users.noreply.github.com> Date: Wed, 3 Nov 2021 09:57:56 +0200 Subject: [PATCH 22/83] =?UTF-8?q?=F0=9F=8E=89=20Source=20Hubspot:=20Migrat?= =?UTF-8?q?e=20Hubspot=20source=20to=20CDK=20structure=20(#7562)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Migrate Hubspot source to CDK structure --- .../36c891d9-4bd9-43ac-bad2-10e12756272c.json | 2 +- .../resources/seed/source_definitions.yaml | 2 +- .../connectors/source-hubspot/.dockerignore | 4 +- .../connectors/source-hubspot/.gitignore | 1 - .../connectors/source-hubspot/Dockerfile | 42 ++++++++++++++----- .../connectors/source-hubspot/build.gradle | 5 --- .../source-hubspot/{main_dev.py => main.py} | 2 +- .../source-hubspot/requirements.txt | 2 - .../connectors/source-hubspot/setup.py | 2 - .../source-hubspot/source_hubspot/api.py | 4 +- .../source-hubspot/source_hubspot/client.py | 4 +- .../source-hubspot/source_hubspot/source.py | 2 +- .../unit_tests/test_field_type_converting.py | 9 ++-- docs/integrations/sources/hubspot.md | 1 + 14 files changed, 46 insertions(+), 36 deletions(-) delete mode 100644 airbyte-integrations/connectors/source-hubspot/.gitignore rename airbyte-integrations/connectors/source-hubspot/{main_dev.py => main.py} (83%) diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/36c891d9-4bd9-43ac-bad2-10e12756272c.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/36c891d9-4bd9-43ac-bad2-10e12756272c.json index 67f13cd7fff56..cda2735c33c8f 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/36c891d9-4bd9-43ac-bad2-10e12756272c.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/36c891d9-4bd9-43ac-bad2-10e12756272c.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "36c891d9-4bd9-43ac-bad2-10e12756272c", "name": "Hubspot", "dockerRepository": "airbyte/source-hubspot", - "dockerImageTag": "0.1.21", + "dockerImageTag": "0.1.22", "documentationUrl": "https://docs.airbyte.io/integrations/sources/hubspot", "icon": "hubspot.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 3a715c5dcd57a..27c53deb2b148 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -236,7 +236,7 @@ - name: Hubspot sourceDefinitionId: 36c891d9-4bd9-43ac-bad2-10e12756272c dockerRepository: airbyte/source-hubspot - dockerImageTag: 0.1.21 + dockerImageTag: 0.1.22 documentationUrl: https://docs.airbyte.io/integrations/sources/hubspot icon: hubspot.svg sourceType: api diff --git a/airbyte-integrations/connectors/source-hubspot/.dockerignore b/airbyte-integrations/connectors/source-hubspot/.dockerignore index 461b1bb7ee9e3..85586eba85c52 100644 --- a/airbyte-integrations/connectors/source-hubspot/.dockerignore +++ b/airbyte-integrations/connectors/source-hubspot/.dockerignore @@ -1,8 +1,6 @@ * !Dockerfile -!Dockerfile.test +!main.py !source_hubspot !setup.py !secrets -!acceptance-test-config.yml -!acceptance-test.sh diff --git a/airbyte-integrations/connectors/source-hubspot/.gitignore b/airbyte-integrations/connectors/source-hubspot/.gitignore deleted file mode 100644 index 29fffc6a50cc9..0000000000000 --- a/airbyte-integrations/connectors/source-hubspot/.gitignore +++ /dev/null @@ -1 +0,0 @@ -NEW_SOURCE_CHECKLIST.md diff --git a/airbyte-integrations/connectors/source-hubspot/Dockerfile b/airbyte-integrations/connectors/source-hubspot/Dockerfile index e2313033cfb08..40fcc091a1211 100644 --- a/airbyte-integrations/connectors/source-hubspot/Dockerfile +++ b/airbyte-integrations/connectors/source-hubspot/Dockerfile @@ -1,18 +1,38 @@ -FROM airbyte/integration-base-python:0.1.1 +FROM python:3.7.11-alpine3.14 as base -# Bash is installed for more convenient debugging. -RUN apt-get update && apt-get install -y bash && rm -rf /var/lib/apt/lists/* +# build and load all requirements +FROM base as builder +WORKDIR /airbyte/integration_code -ENV CODE_PATH="source_hubspot" -ENV AIRBYTE_IMPL_MODULE="source_hubspot" -ENV AIRBYTE_IMPL_PATH="SourceHubspot" +# upgrade pip to the latest version +RUN apk --no-cache upgrade \ + && pip install --upgrade pip \ + && apk --no-cache add tzdata build-base -WORKDIR /airbyte/integration_code -COPY $CODE_PATH ./$CODE_PATH COPY setup.py ./ -RUN pip install . -ENV AIRBYTE_ENTRYPOINT "/airbyte/base.sh" +# install necessary packages to a temporary folder +RUN pip install --prefix=/install . + +# build a clean environment +FROM base +WORKDIR /airbyte/integration_code + +# copy all loaded and built libraries to a pure basic image +COPY --from=builder /install /usr/local +# add default timezone settings +COPY --from=builder /usr/share/zoneinfo/Etc/UTC /etc/localtime +RUN echo "Etc/UTC" > /etc/timezone + +# bash is installed for more convenient debugging. +RUN apk --no-cache add bash + +# copy payload code only +COPY main.py ./ +COPY source_hubspot ./source_hubspot + +ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" +ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.21 +LABEL io.airbyte.version=0.1.22 LABEL io.airbyte.name=airbyte/source-hubspot diff --git a/airbyte-integrations/connectors/source-hubspot/build.gradle b/airbyte-integrations/connectors/source-hubspot/build.gradle index bdea51cec8a89..259bffb74a610 100644 --- a/airbyte-integrations/connectors/source-hubspot/build.gradle +++ b/airbyte-integrations/connectors/source-hubspot/build.gradle @@ -7,8 +7,3 @@ plugins { airbytePython { moduleDirectory 'source_hubspot' } - -dependencies { - implementation files(project(':airbyte-integrations:bases:source-acceptance-test').airbyteDocker.outputs) - implementation files(project(':airbyte-integrations:bases:base-python').airbyteDocker.outputs) -} diff --git a/airbyte-integrations/connectors/source-hubspot/main_dev.py b/airbyte-integrations/connectors/source-hubspot/main.py similarity index 83% rename from airbyte-integrations/connectors/source-hubspot/main_dev.py rename to airbyte-integrations/connectors/source-hubspot/main.py index 6649d86ac6627..2d902157fd450 100644 --- a/airbyte-integrations/connectors/source-hubspot/main_dev.py +++ b/airbyte-integrations/connectors/source-hubspot/main.py @@ -5,7 +5,7 @@ import sys -from base_python.entrypoint import launch +from airbyte_cdk.entrypoint import launch from source_hubspot import SourceHubspot if __name__ == "__main__": diff --git a/airbyte-integrations/connectors/source-hubspot/requirements.txt b/airbyte-integrations/connectors/source-hubspot/requirements.txt index e74f41a28ce1b..7be17a56d745d 100644 --- a/airbyte-integrations/connectors/source-hubspot/requirements.txt +++ b/airbyte-integrations/connectors/source-hubspot/requirements.txt @@ -1,5 +1,3 @@ # This file is autogenerated -- only edit if you know what you are doing. Use setup.py for declaring dependencies. --e ../../bases/airbyte-protocol --e ../../bases/base-python -e ../../bases/source-acceptance-test -e . diff --git a/airbyte-integrations/connectors/source-hubspot/setup.py b/airbyte-integrations/connectors/source-hubspot/setup.py index d15d84cb55069..7c4c01fa6b730 100644 --- a/airbyte-integrations/connectors/source-hubspot/setup.py +++ b/airbyte-integrations/connectors/source-hubspot/setup.py @@ -7,8 +7,6 @@ MAIN_REQUIREMENTS = [ "airbyte-cdk~=0.1", - "airbyte-protocol", - "base-python", "backoff==1.11.1", "pendulum==2.1.2", "requests==2.26.0", diff --git a/airbyte-integrations/connectors/source-hubspot/source_hubspot/api.py b/airbyte-integrations/connectors/source-hubspot/source_hubspot/api.py index 4ee4657241be4..48ec93e7b781d 100644 --- a/airbyte-integrations/connectors/source-hubspot/source_hubspot/api.py +++ b/airbyte-integrations/connectors/source-hubspot/source_hubspot/api.py @@ -14,8 +14,8 @@ import backoff import pendulum as pendulum import requests +from airbyte_cdk.entrypoint import logger from airbyte_cdk.sources.streams.http.requests_native_auth import Oauth2Authenticator -from base_python.entrypoint import logger from source_hubspot.errors import HubspotAccessDenied, HubspotInvalidAuth, HubspotRateLimited, HubspotTimeout # The value is obtained experimentally, Hubspot allows the URL length up to ~16300 symbols, @@ -374,7 +374,7 @@ def parse_response(self, response: Union[Mapping[str, Any], List[dict]]) -> Iter 'message': 'This hapikey (....) does not have proper permissions! (requires any of [automation-access])', 'correlationId': '111111-2222-3333-4444-55555555555'} """ - logger.warn(f"Stream `{self.entity}` cannot be procced. {response.get('message')}") + logger.warning(f"Stream `{self.entity}` cannot be procced. {response.get('message')}") return if response.get(self.data_field) is None: diff --git a/airbyte-integrations/connectors/source-hubspot/source_hubspot/client.py b/airbyte-integrations/connectors/source-hubspot/source_hubspot/client.py index 6f6a2cf865ffe..6dd6ffb0c1dc5 100644 --- a/airbyte-integrations/connectors/source-hubspot/source_hubspot/client.py +++ b/airbyte-integrations/connectors/source-hubspot/source_hubspot/client.py @@ -5,8 +5,8 @@ from typing import Any, Callable, Iterator, Mapping, Optional, Tuple -from airbyte_protocol import AirbyteStream -from base_python import BaseClient +from airbyte_cdk.models import AirbyteStream +from airbyte_cdk.sources.deprecated.client import BaseClient from requests import HTTPError from source_hubspot.api import ( API, diff --git a/airbyte-integrations/connectors/source-hubspot/source_hubspot/source.py b/airbyte-integrations/connectors/source-hubspot/source_hubspot/source.py index 660980307cfba..deed1d336c237 100644 --- a/airbyte-integrations/connectors/source-hubspot/source_hubspot/source.py +++ b/airbyte-integrations/connectors/source-hubspot/source_hubspot/source.py @@ -3,7 +3,7 @@ # -from base_python import BaseSource +from airbyte_cdk.sources.deprecated.base_source import BaseSource from .client import Client diff --git a/airbyte-integrations/connectors/source-hubspot/unit_tests/test_field_type_converting.py b/airbyte-integrations/connectors/source-hubspot/unit_tests/test_field_type_converting.py index 113bc557df64d..f55391f49fec0 100644 --- a/airbyte-integrations/connectors/source-hubspot/unit_tests/test_field_type_converting.py +++ b/airbyte-integrations/connectors/source-hubspot/unit_tests/test_field_type_converting.py @@ -36,14 +36,15 @@ def test_field_type_format_converting(field_type, expected): (1, {"type": ["null", "string"]}), ], ) -def test_bad_field_type_converting(field_type, expected, capsys): +def test_bad_field_type_converting(field_type, expected, caplog, capsys): assert Stream._get_field_props(field_type=field_type) == expected - logs = capsys.readouterr().out + logs = caplog.records - assert '"WARN"' in logs - assert f"Unsupported type {field_type} found" in logs + assert logs + assert logs[0].levelname == "WARNING" + assert logs[0].msg == f"Unsupported type {field_type} found" @pytest.mark.parametrize( diff --git a/docs/integrations/sources/hubspot.md b/docs/integrations/sources/hubspot.md index 2836f6b08ed00..35b64b637c8b0 100644 --- a/docs/integrations/sources/hubspot.md +++ b/docs/integrations/sources/hubspot.md @@ -96,6 +96,7 @@ If you are using Oauth, most of the streams require the appropriate [scopes](htt | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.1.22 | 2021-11-03 | [7562](https://github.com/airbytehq/airbyte/pull/7562) | Migrate Hubspot source to CDK structure | | 0.1.21 | 2021-10-27 | [7405](https://github.com/airbytehq/airbyte/pull/7405) | Change of package `import` from `urllib` to `urllib.parse` | | 0.1.20 | 2021-10-26 | [7393](https://github.com/airbytehq/airbyte/pull/7393) | Hotfix for `split_properties` function, add the length of separator symbol `,`(`%2C` in HTTP format) to the checking of the summary URL length | | 0.1.19 | 2021-10-26 | [6954](https://github.com/airbytehq/airbyte/pull/6954) | Fix issue with getting `414` HTTP error for streams | From f2ab450a8dc85652f2c588507b920f879b798f0d Mon Sep 17 00:00:00 2001 From: midavadim Date: Wed, 3 Nov 2021 12:38:56 +0200 Subject: [PATCH 23/83] :bug: source mixpanel: hardcoded 'standard' properties for engage stream (#7505) * Hardcoded 'standard' properties for engage stream to avoid normalization error (particularly for 'browser_version') * updated change log message * bumped connector version, updated change log * fix table in docs --- .../12928b32-bf0a-4f1e-964f-07e12e37153a.json | 2 +- .../resources/seed/source_definitions.yaml | 2 +- .../connectors/source-mixpanel/Dockerfile | 2 +- .../source_mixpanel/schemas/engage.json | 40 +++++++++++++++++++ .../source-mixpanel/source_mixpanel/source.py | 4 +- docs/integrations/sources/mixpanel.md | 1 + 6 files changed, 47 insertions(+), 4 deletions(-) diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/12928b32-bf0a-4f1e-964f-07e12e37153a.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/12928b32-bf0a-4f1e-964f-07e12e37153a.json index addc1ddf76170..69ac7cf32c70a 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/12928b32-bf0a-4f1e-964f-07e12e37153a.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/12928b32-bf0a-4f1e-964f-07e12e37153a.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "12928b32-bf0a-4f1e-964f-07e12e37153a", "name": "Mixpanel", "dockerRepository": "airbyte/source-mixpanel", - "dockerImageTag": "0.1.2", + "dockerImageTag": "0.1.3", "documentationUrl": "https://docs.airbyte.io/integrations/sources/mixpanel", "icon": "mixpanel.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 27c53deb2b148..a818aa66bca7a 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -334,7 +334,7 @@ - name: Mixpanel sourceDefinitionId: 12928b32-bf0a-4f1e-964f-07e12e37153a dockerRepository: airbyte/source-mixpanel - dockerImageTag: 0.1.2 + dockerImageTag: 0.1.3 documentationUrl: https://docs.airbyte.io/integrations/sources/mixpanel icon: mixpanel.svg sourceType: api diff --git a/airbyte-integrations/connectors/source-mixpanel/Dockerfile b/airbyte-integrations/connectors/source-mixpanel/Dockerfile index 6985afaf20690..3f193290b7c53 100644 --- a/airbyte-integrations/connectors/source-mixpanel/Dockerfile +++ b/airbyte-integrations/connectors/source-mixpanel/Dockerfile @@ -12,5 +12,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.2 +LABEL io.airbyte.version=0.1.3 LABEL io.airbyte.name=airbyte/source-mixpanel diff --git a/airbyte-integrations/connectors/source-mixpanel/source_mixpanel/schemas/engage.json b/airbyte-integrations/connectors/source-mixpanel/source_mixpanel/schemas/engage.json index b31b1a29826a7..b530afc24fb93 100644 --- a/airbyte-integrations/connectors/source-mixpanel/source_mixpanel/schemas/engage.json +++ b/airbyte-integrations/connectors/source-mixpanel/source_mixpanel/schemas/engage.json @@ -5,6 +5,46 @@ "properties": { "distinct_id": { "type": ["null", "string"] + }, + "browser": { + "type": ["null", "string"] + }, + "browser_version": { + "type": ["null", "string"] + }, + "city": { + "type": ["null", "string"] + }, + "country_code": { + "type": ["null", "string"] + }, + "region": { + "type": ["null", "string"] + }, + "timezone": { + "type": ["null", "string"] + }, + "last_seen": { + "type": ["null", "string"], + "format": "date-time" + }, + "email": { + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + }, + "first_name": { + "type": ["null", "string"] + }, + "last_name": { + "type": ["null", "string"] + }, + "id": { + "type": ["null", "string"] + }, + "unblocked": { + "type": ["null", "string"] } } } diff --git a/airbyte-integrations/connectors/source-mixpanel/source_mixpanel/source.py b/airbyte-integrations/connectors/source-mixpanel/source_mixpanel/source.py index 7c3916d342cbf..b0e88d8bed44e 100644 --- a/airbyte-integrations/connectors/source-mixpanel/source_mixpanel/source.py +++ b/airbyte-integrations/connectors/source-mixpanel/source_mixpanel/source.py @@ -515,7 +515,9 @@ def get_json_schema(self) -> Mapping[str, Any]: # from API: '$browser' # to stream: 'browser' property_name = property_name[1:] - schema["properties"][property_name] = types.get(property_type, {"type": ["null", "string"]}) + # Do not overwrite 'standard' hard-coded properties, add 'custom' properties + if property_name not in schema["properties"]: + schema["properties"][property_name] = types.get(property_type, {"type": ["null", "string"]}) return schema diff --git a/docs/integrations/sources/mixpanel.md b/docs/integrations/sources/mixpanel.md index 50713de707e0c..e8b1ce633efe1 100644 --- a/docs/integrations/sources/mixpanel.md +++ b/docs/integrations/sources/mixpanel.md @@ -56,6 +56,7 @@ Select the correct region \(EU or US\) for your Mixpanel project. See detail [he | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| `0.1.3` | 2021-10-30 | [7505](https://github.com/airbytehq/airbyte/issues/7505) | Guarantee that standard and custom mixpanel properties in the `Engage` stream are written as strings | | `0.1.2` | 2021-11-02 | [7439](https://github.com/airbytehq/airbyte/issues/7439) | Added delay for all streams to match API limitation of requests rate | | `0.1.1` | 2021-09-16 | [6075](https://github.com/airbytehq/airbyte/issues/6075) | Added option to select project region | | `0.1.0` | 2021-07-06 | [3698](https://github.com/airbytehq/airbyte/issues/3698) | created CDK native mixpanel connector | From a915034954ac9cb8dd2d615b416b48029b138133 Mon Sep 17 00:00:00 2001 From: Alexander Tsukanov Date: Wed, 3 Nov 2021 13:29:45 +0200 Subject: [PATCH 24/83] =?UTF-8?q?=F0=9F=90=9B=20Destination=20BigQuery-den?= =?UTF-8?q?ormalized:=20Added=20conversion=20from=20JSON=20Datetime=20to?= =?UTF-8?q?=20BigQuery=20format=20(#7413)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../079d5540-f236-4294-ba7c-ade8fd918496.json | 2 +- .../seed/destination_definitions.yaml | 2 +- .../BOOTSTRAP.md | 5 + .../Dockerfile | 2 +- .../BigQueryDenormalizedRecordConsumer.java | 13 +- .../BigQueryDenormalizedDestinationTest.java | 200 +++------------- .../BigQueryDenormalizedTestDataUtils.java | 224 ++++++++++++++++++ .../destination-bigquery/BOOTSTRAP.md | 8 + .../destination/bigquery/BigQueryUtils.java | 46 ++++ docs/integrations/destinations/bigquery.md | 1 + 10 files changed, 327 insertions(+), 176 deletions(-) create mode 100644 airbyte-integrations/connectors/destination-bigquery-denormalized/BOOTSTRAP.md create mode 100644 airbyte-integrations/connectors/destination-bigquery-denormalized/src/test-integration/java/io/airbyte/integrations/destination/bigquery/util/BigQueryDenormalizedTestDataUtils.java create mode 100644 airbyte-integrations/connectors/destination-bigquery/BOOTSTRAP.md diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/079d5540-f236-4294-ba7c-ade8fd918496.json b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/079d5540-f236-4294-ba7c-ade8fd918496.json index ea1fd8ef6ef9b..2a948beb7b301 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/079d5540-f236-4294-ba7c-ade8fd918496.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/079d5540-f236-4294-ba7c-ade8fd918496.json @@ -2,6 +2,6 @@ "destinationDefinitionId": "079d5540-f236-4294-ba7c-ade8fd918496", "name": "BigQuery (denormalized typed struct)", "dockerRepository": "airbyte/destination-bigquery-denormalized", - "dockerImageTag": "0.1.6", + "dockerImageTag": "0.1.8", "documentationUrl": "https://docs.airbyte.io/integrations/destinations/bigquery" } diff --git a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml index 1d899ea73b42f..69f4711d2270c 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml @@ -11,7 +11,7 @@ - name: BigQuery (denormalized typed struct) destinationDefinitionId: 079d5540-f236-4294-ba7c-ade8fd918496 dockerRepository: airbyte/destination-bigquery-denormalized - dockerImageTag: 0.1.7 + dockerImageTag: 0.1.8 documentationUrl: https://docs.airbyte.io/integrations/destinations/bigquery - name: Chargify (Keen) destinationDefinitionId: 81740ce8-d764-4ea7-94df-16bb41de36ae diff --git a/airbyte-integrations/connectors/destination-bigquery-denormalized/BOOTSTRAP.md b/airbyte-integrations/connectors/destination-bigquery-denormalized/BOOTSTRAP.md new file mode 100644 index 0000000000000..edb26b327d2a6 --- /dev/null +++ b/airbyte-integrations/connectors/destination-bigquery-denormalized/BOOTSTRAP.md @@ -0,0 +1,5 @@ +# BigQuery Denormalized Destination Connector Bootstrap + +Instead of splitting the final data into multiple tables, this destination leverages BigQuery capabilities with [Structured and Repeated fields](https://cloud.google.com/bigquery/docs/nested-repeated) to produce a single "big" table per stream. This does not write the `_airbyte_raw_*` tables in the destination and normalization from this connector is not supported at this time. + +See [this](https://docs.airbyte.io/integrations/destinations/databricks) link for the nuances about the connector. \ No newline at end of file diff --git a/airbyte-integrations/connectors/destination-bigquery-denormalized/Dockerfile b/airbyte-integrations/connectors/destination-bigquery-denormalized/Dockerfile index 2ad0b213627c2..ec6426734c09c 100644 --- a/airbyte-integrations/connectors/destination-bigquery-denormalized/Dockerfile +++ b/airbyte-integrations/connectors/destination-bigquery-denormalized/Dockerfile @@ -8,5 +8,5 @@ COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar RUN tar xf ${APPLICATION}.tar --strip-components=1 -LABEL io.airbyte.version=0.1.7 +LABEL io.airbyte.version=0.1.8 LABEL io.airbyte.name=airbyte/destination-bigquery-denormalized diff --git a/airbyte-integrations/connectors/destination-bigquery-denormalized/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryDenormalizedRecordConsumer.java b/airbyte-integrations/connectors/destination-bigquery-denormalized/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryDenormalizedRecordConsumer.java index d52da1ffe77bb..9048dab2a3b65 100644 --- a/airbyte-integrations/connectors/destination-bigquery-denormalized/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryDenormalizedRecordConsumer.java +++ b/airbyte-integrations/connectors/destination-bigquery-denormalized/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryDenormalizedRecordConsumer.java @@ -40,10 +40,10 @@ public class BigQueryDenormalizedRecordConsumer extends BigQueryRecordConsumer { private final Set invalidKeys; public BigQueryDenormalizedRecordConsumer(final BigQuery bigquery, - final Map writeConfigs, - final ConfiguredAirbyteCatalog catalog, - final Consumer outputRecordCollector, - final StandardNameTransformer namingResolver) { + final Map writeConfigs, + final ConfiguredAirbyteCatalog catalog, + final Consumer outputRecordCollector, + final StandardNameTransformer namingResolver) { super(bigquery, writeConfigs, catalog, outputRecordCollector, false, false); this.namingResolver = namingResolver; invalidKeys = new HashSet<>(); @@ -59,6 +59,7 @@ protected JsonNode formatRecord(final Schema schema, final AirbyteRecordMessage final ObjectNode data = (ObjectNode) formatData(schema.getFields(), recordMessage.getData()); data.put(JavaBaseConstants.COLUMN_NAME_AB_ID, UUID.randomUUID().toString()); data.put(JavaBaseConstants.COLUMN_NAME_EMITTED_AT, formattedEmittedAt); + return data; } @@ -67,6 +68,10 @@ protected JsonNode formatData(final FieldList fields, final JsonNode root) { if (fields == null) { return root; } + List dateTimeFields = BigQueryUtils.getDateTimeFieldsFromSchema(fields); + if (!dateTimeFields.isEmpty()) { + BigQueryUtils.transformJsonDateTimeToBigDataFormat(dateTimeFields, (ObjectNode) root); + } if (root.isObject()) { final List fieldNames = fields.stream().map(Field::getName).collect(Collectors.toList()); return Jsons.jsonNode(Jsons.keys(root).stream() diff --git a/airbyte-integrations/connectors/destination-bigquery-denormalized/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryDenormalizedDestinationTest.java b/airbyte-integrations/connectors/destination-bigquery-denormalized/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryDenormalizedDestinationTest.java index aa9026098504a..aa13e9fb02c49 100644 --- a/airbyte-integrations/connectors/destination-bigquery-denormalized/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryDenormalizedDestinationTest.java +++ b/airbyte-integrations/connectors/destination-bigquery-denormalized/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryDenormalizedDestinationTest.java @@ -4,6 +4,7 @@ package io.airbyte.integrations.destination.bigquery; +import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestDataUtils.*; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.params.provider.Arguments.arguments; @@ -42,6 +43,7 @@ import java.util.stream.Collectors; import java.util.stream.Stream; import java.util.stream.StreamSupport; +import org.joda.time.DateTime; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -73,6 +75,10 @@ class BigQueryDenormalizedDestinationTest { .withRecord(new AirbyteRecordMessage().withStream(USERS_STREAM_NAME) .withData(getDataWithFormats()) .withEmittedAt(NOW.toEpochMilli())); + private static final AirbyteMessage MESSAGE_USERS4 = new AirbyteMessage().withType(AirbyteMessage.Type.RECORD) + .withRecord(new AirbyteRecordMessage().withStream(USERS_STREAM_NAME) + .withData(getDataWithJSONDateTimeFormats()) + .withEmittedAt(NOW.toEpochMilli())); private JsonNode config; @@ -109,6 +115,7 @@ void setup(final TestInfo info) throws IOException { MESSAGE_USERS1.getRecord().setNamespace(datasetId); MESSAGE_USERS2.getRecord().setNamespace(datasetId); MESSAGE_USERS3.getRecord().setNamespace(datasetId); + MESSAGE_USERS4.getRecord().setNamespace(datasetId); final DatasetInfo datasetInfo = DatasetInfo.newBuilder(datasetId).setLocation(datasetLocation).build(); dataset = bigquery.create(datasetInfo); @@ -199,7 +206,7 @@ void testWriteWithFormat() throws Exception { // Bigquery's datetime type accepts multiple input format but always outputs the same, so we can't // expect to receive the value we sent. - assertEquals(extractJsonValues(resultJson, "updated_at"), Set.of("2018-08-19T12:11:35.220")); + assertEquals(extractJsonValues(resultJson, "updated_at"), Set.of("2021-10-11T06:36:53")); final Schema expectedSchema = Schema.of( Field.of("name", StandardSQLTypeName.STRING), @@ -211,6 +218,29 @@ void testWriteWithFormat() throws Exception { assertEquals(BigQueryUtils.getTableDefinition(bigquery, dataset.getDatasetId().getDataset(), USERS_STREAM_NAME).getSchema(), expectedSchema); } + @Test + void testIfJSONDateTimeWasConvertedToBigQueryFormat() throws Exception { + catalog = new ConfiguredAirbyteCatalog().withStreams(Lists.newArrayList(new ConfiguredAirbyteStream() + .withStream(new AirbyteStream().withName(USERS_STREAM_NAME).withNamespace(datasetId).withJsonSchema(getSchemaWithDateTime())) + .withSyncMode(SyncMode.FULL_REFRESH).withDestinationSyncMode(DestinationSyncMode.OVERWRITE))); + + final BigQueryDestination destination = new BigQueryDenormalizedDestination(); + final AirbyteMessageConsumer consumer = destination.getConsumer(config, catalog, Destination::defaultOutputRecordCollector); + + consumer.accept(MESSAGE_USERS4); + consumer.close(); + + final List usersActual = retrieveRecordsAsJson(USERS_STREAM_NAME); + assertEquals(usersActual.size(), 1); + final JsonNode resultJson = usersActual.get(0); + + // BigQuery Accepts "YYYY-MM-DD HH:MM:SS[.SSSSSS]" format + // returns "yyyy-MM-dd'T'HH:mm:ss" format + assertEquals(Set.of(new DateTime("2021-10-11T06:36:53+00:00").toString("yyyy-MM-dd'T'HH:mm:ss")), extractJsonValues(resultJson, "updated_at")); + //check nested datetime + assertEquals(Set.of(new DateTime("2021-11-11T06:36:53+00:00").toString("yyyy-MM-dd'T'HH:mm:ss")), extractJsonValues(resultJson.get("items"), "nested_datetime")); + } + private Set extractJsonValues(final JsonNode node, final String attributeName) { final List valuesNode = node.findValues(attributeName); final Set resultSet = new HashSet<>(); @@ -233,7 +263,6 @@ private List retrieveRecordsAsJson(final String tableName) throws Exce .newBuilder( String.format("select TO_JSON_STRING(t) as jsonValue from %s.%s t;", dataset.getDatasetId().getDataset(), tableName.toLowerCase())) .setUseLegacySql(false).build(); - BigQueryUtils.executeQuery(bigquery, queryConfig); return StreamSupport @@ -249,171 +278,4 @@ private static Stream schemaAndDataProvider() { arguments(getSchemaWithInvalidArrayType(), MESSAGE_USERS1), arguments(getSchema(), MESSAGE_USERS2)); } - - private static JsonNode getSchema() { - return Jsons.deserialize( - "{\n" - + " \"type\": [\n" - + " \"object\"\n" - + " ],\n" - + " \"properties\": {\n" - + " \"name\": {\n" - + " \"type\": [\n" - + " \"string\"\n" - + " ]\n" - + " },\n" - + " \"permissions\": {\n" - + " \"type\": [\n" - + " \"array\"\n" - + " ],\n" - + " \"items\": {\n" - + " \"type\": [\n" - + " \"object\"\n" - + " ],\n" - + " \"properties\": {\n" - + " \"domain\": {\n" - + " \"type\": [\n" - + " \"string\"\n" - + " ]\n" - + " },\n" - + " \"grants\": {\n" - + " \"type\": [\n" - + " \"array\"\n" - + " ],\n" - + " \"items\": {\n" - + " \"type\": [\n" - + " \"string\"\n" - + " ]\n" - + " }\n" - + " }\n" - + " }\n" - + " }\n" - + " }\n" - + " }\n" - + "}"); - - } - - private static JsonNode getSchemaWithFormats() { - return Jsons.deserialize( - "{\n" - + " \"type\": [\n" - + " \"object\"\n" - + " ],\n" - + " \"properties\": {\n" - + " \"name\": {\n" - + " \"type\": [\n" - + " \"string\"\n" - + " ]\n" - + " },\n" - + " \"date_of_birth\": {\n" - + " \"type\": [\n" - + " \"string\"\n" - + " ],\n" - + " \"format\": \"date\"\n" - + " },\n" - + " \"updated_at\": {\n" - + " \"type\": [\n" - + " \"string\"\n" - + " ],\n" - + " \"format\": \"date-time\"\n" - + " }\n" - + " }\n" - + "}"); - } - - private static JsonNode getSchemaWithInvalidArrayType() { - return Jsons.deserialize( - "{\n" - + " \"type\": [\n" - + " \"object\"\n" - + " ],\n" - + " \"properties\": {\n" - + " \"name\": {\n" - + " \"type\": [\n" - + " \"string\"\n" - + " ]\n" - + " },\n" - + " \"permissions\": {\n" - + " \"type\": [\n" - + " \"array\"\n" - + " ],\n" - + " \"items\": {\n" - + " \"type\": [\n" - + " \"object\"\n" - + " ],\n" - + " \"properties\": {\n" - + " \"domain\": {\n" - + " \"type\": [\n" - + " \"string\"\n" - + " ]\n" - + " },\n" - + " \"grants\": {\n" - + " \"type\": [\n" - + " \"array\"\n" // missed "items" element - + " ]\n" - + " }\n" - + " }\n" - + " }\n" - + " }\n" - + " }\n" - + "}"); - - } - - private static JsonNode getData() { - return Jsons.deserialize( - "{\n" - + " \"name\": \"Andrii\",\n" - + " \"permissions\": [\n" - + " {\n" - + " \"domain\": \"abs\",\n" - + " \"grants\": [\n" - + " \"admin\"\n" - + " ]\n" - + " },\n" - + " {\n" - + " \"domain\": \"tools\",\n" - + " \"grants\": [\n" - + " \"read\", \"write\"\n" - + " ]\n" - + " }\n" - + " ]\n" - + "}"); - } - - private static JsonNode getDataWithFormats() { - return Jsons.deserialize( - "{\n" - + " \"name\": \"Andrii\",\n" - + " \"date_of_birth\": \"1996-01-25\",\n" - + " \"updated_at\": \"2018-08-19 12:11:35.22\"\n" - + "}"); - } - - private static JsonNode getDataWithEmptyObjectAndArray() { - return Jsons.deserialize( - "{\n" - + " \"name\": \"Andrii\",\n" - + " \"permissions\": [\n" - + " {\n" - + " \"domain\": \"abs\",\n" - + " \"items\": {},\n" // empty object - + " \"grants\": [\n" - + " \"admin\"\n" - + " ]\n" - + " },\n" - + " {\n" - + " \"domain\": \"tools\",\n" - + " \"grants\": [],\n" // empty array - + " \"items\": {\n" // object with empty array and object - + " \"object\": {},\n" - + " \"array\": []\n" - + " }\n" - + " }\n" - + " ]\n" - + "}"); - - } - } diff --git a/airbyte-integrations/connectors/destination-bigquery-denormalized/src/test-integration/java/io/airbyte/integrations/destination/bigquery/util/BigQueryDenormalizedTestDataUtils.java b/airbyte-integrations/connectors/destination-bigquery-denormalized/src/test-integration/java/io/airbyte/integrations/destination/bigquery/util/BigQueryDenormalizedTestDataUtils.java new file mode 100644 index 0000000000000..c2fa24cdec102 --- /dev/null +++ b/airbyte-integrations/connectors/destination-bigquery-denormalized/src/test-integration/java/io/airbyte/integrations/destination/bigquery/util/BigQueryDenormalizedTestDataUtils.java @@ -0,0 +1,224 @@ +package io.airbyte.integrations.destination.bigquery.util; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.commons.json.Jsons; + +public class BigQueryDenormalizedTestDataUtils { + + public static JsonNode getSchema() { + return Jsons.deserialize( + "{\n" + + " \"type\": [\n" + + " \"object\"\n" + + " ],\n" + + " \"properties\": {\n" + + " \"accepts_marketing_updated_at\": {\n" + + " \"type\": [\n" + + " \"null\",\n" + + " \"string\"\n" + + " ],\n" + + " \"format\": \"date-time\"\n" + + " },\n" + + " \"name\": {\n" + + " \"type\": [\n" + + " \"string\"\n" + + " ]\n" + + " },\n" + + " \"permissions\": {\n" + + " \"type\": [\n" + + " \"array\"\n" + + " ],\n" + + " \"items\": {\n" + + " \"type\": [\n" + + " \"object\"\n" + + " ],\n" + + " \"properties\": {\n" + + " \"domain\": {\n" + + " \"type\": [\n" + + " \"string\"\n" + + " ]\n" + + " },\n" + + " \"grants\": {\n" + + " \"type\": [\n" + + " \"array\"\n" + + " ],\n" + + " \"items\": {\n" + + " \"type\": [\n" + + " \"string\"\n" + + " ]\n" + + " }\n" + + " }\n" + + " }\n" + + " }\n" + + " }\n" + + " }\n" + + "}"); + + } + + public static JsonNode getSchemaWithFormats() { + return Jsons.deserialize( + "{\n" + + " \"type\": [\n" + + " \"object\"\n" + + " ],\n" + + " \"properties\": {\n" + + " \"name\": {\n" + + " \"type\": [\n" + + " \"string\"\n" + + " ]\n" + + " },\n" + + " \"date_of_birth\": {\n" + + " \"type\": [\n" + + " \"string\"\n" + + " ],\n" + + " \"format\": \"date\"\n" + + " },\n" + + " \"updated_at\": {\n" + + " \"type\": [\n" + + " \"string\"\n" + + " ],\n" + + " \"format\": \"date-time\"\n" + + " }\n" + + " }\n" + + "}"); + } + + public static JsonNode getSchemaWithDateTime() { + return Jsons.deserialize( + "{\n" + + " \"type\": [\n" + + " \"object\"\n" + + " ],\n" + + " \"properties\": {\n" + + " " + + + "\"updated_at\": {\n" + + " \"type\": [\n" + + " \"string\"\n" + + " ],\n" + + " \"format\": \"date-time\"\n" + + " },\n" + + " \"items\": {\n" + + " \"type\": [\n" + + " \"object\"\n" + + " ],\n" + + " \"properties\": {\n" + + " \"nested_datetime\": {\n" + + " \"type\": [\n" + + " \"string\"\n" + + " ],\n" + + " \"format\": \"date-time\"\n" + + " }\n" + + + " " + + "}\n" + + " }\n" + + " }\n" + + "}"); + } + + public static JsonNode getSchemaWithInvalidArrayType() { + return Jsons.deserialize( + "{\n" + + " \"type\": [\n" + + " \"object\"\n" + + " ],\n" + + " \"properties\": {\n" + + " \"name\": {\n" + + " \"type\": [\n" + + " \"string\"\n" + + " ]\n" + + " },\n" + + " \"permissions\": {\n" + + " \"type\": [\n" + + " \"array\"\n" + + " ],\n" + + " \"items\": {\n" + + " \"type\": [\n" + + " \"object\"\n" + + " ],\n" + + " \"properties\": {\n" + + " \"domain\": {\n" + + " \"type\": [\n" + + " \"string\"\n" + + " ]\n" + + " },\n" + + " \"grants\": {\n" + + " \"type\": [\n" + + " \"array\"\n" // missed "items" element + + " ]\n" + + " }\n" + + " }\n" + + " }\n" + + " }\n" + + " }\n" + + "}"); + + } + + public static JsonNode getData() { + return Jsons.deserialize( + "{\n" + + " \"name\": \"Andrii\",\n" + + " \"accepts_marketing_updated_at\": \"2021-10-11T06:36:53-07:00\",\n" + + " \"permissions\": [\n" + + " {\n" + + " \"domain\": \"abs\",\n" + + " \"grants\": [\n" + + " \"admin\"\n" + + " ]\n" + + " },\n" + + " {\n" + + " \"domain\": \"tools\",\n" + + " \"grants\": [\n" + + " \"read\", \"write\"\n" + + " ]\n" + + " }\n" + + " ]\n" + + "}"); + } + + public static JsonNode getDataWithFormats() { + return Jsons.deserialize( + "{\n" + + " \"name\": \"Andrii\",\n" + + " \"date_of_birth\": \"1996-01-25\",\n" + + " \"updated_at\": \"2021-10-11T06:36:53\"\n" + + "}"); + } + + public static JsonNode getDataWithJSONDateTimeFormats() { + return Jsons.deserialize( + "{\n" + + " \"updated_at\": \"2021-10-11T06:36:53+00:00\",\n" + + " \"items\": {\n" + + " \"nested_datetime\": \"2021-11-11T06:36:53+00:00\"\n" + + " }\n" + + "}"); + } + + public static JsonNode getDataWithEmptyObjectAndArray() { + return Jsons.deserialize( + "{\n" + + " \"name\": \"Andrii\",\n" + + " \"permissions\": [\n" + + " {\n" + + " \"domain\": \"abs\",\n" + + " \"items\": {},\n" // empty object + + " \"grants\": [\n" + + " \"admin\"\n" + + " ]\n" + + " },\n" + + " {\n" + + " \"domain\": \"tools\",\n" + + " \"grants\": [],\n" // empty array + + " \"items\": {\n" // object with empty array and object + + " \"object\": {},\n" + + " \"array\": []\n" + + " }\n" + + " }\n" + + " ]\n" + + "}"); + } +} diff --git a/airbyte-integrations/connectors/destination-bigquery/BOOTSTRAP.md b/airbyte-integrations/connectors/destination-bigquery/BOOTSTRAP.md new file mode 100644 index 0000000000000..9a5d31b122345 --- /dev/null +++ b/airbyte-integrations/connectors/destination-bigquery/BOOTSTRAP.md @@ -0,0 +1,8 @@ +# BigQuery Destination Connector Bootstrap + +BigQuery is a serverless, highly scalable, and cost-effective data warehouse +offered by Google Cloud Provider. + +BigQuery connector is producing the standard Airbyte outputs using a `_airbyte_raw_*` tables storing the JSON blob data first. Afterward, these are transformed and normalized into separate tables, potentially "exploding" nested streams into their own tables if [basic normalization](https://docs.airbyte.io/understanding-airbyte/basic-normalization) is configured. + +See [this](https://docs.airbyte.io/integrations/destinations/bigquery) link for more information about the connector. diff --git a/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryUtils.java b/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryUtils.java index d5fc8a397cb07..613ec652407ff 100644 --- a/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryUtils.java +++ b/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryUtils.java @@ -5,16 +5,21 @@ package io.airbyte.integrations.destination.bigquery; import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; import com.google.cloud.bigquery.BigQuery; import com.google.cloud.bigquery.BigQueryException; import com.google.cloud.bigquery.Clustering; import com.google.cloud.bigquery.Dataset; import com.google.cloud.bigquery.DatasetInfo; +import com.google.cloud.bigquery.Field; +import com.google.cloud.bigquery.FieldList; import com.google.cloud.bigquery.Job; import com.google.cloud.bigquery.JobId; import com.google.cloud.bigquery.JobInfo; import com.google.cloud.bigquery.QueryJobConfiguration; +import com.google.cloud.bigquery.QueryParameterValue; import com.google.cloud.bigquery.Schema; +import com.google.cloud.bigquery.StandardSQLTypeName; import com.google.cloud.bigquery.StandardTableDefinition; import com.google.cloud.bigquery.TableDefinition; import com.google.cloud.bigquery.TableId; @@ -24,15 +29,20 @@ import com.google.common.collect.ImmutableMap; import io.airbyte.commons.json.Jsons; import io.airbyte.integrations.base.JavaBaseConstants; +import io.airbyte.protocol.models.AirbyteRecordMessage; +import java.util.ArrayList; +import java.util.List; import java.util.Set; import java.util.UUID; import org.apache.commons.lang3.tuple.ImmutablePair; +import org.joda.time.DateTime; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class BigQueryUtils { private static final Logger LOGGER = LoggerFactory.getLogger(BigQueryUtils.class); + private static final String BIG_QUERY_DATETIME_FORMAT = "yyyy-MM-dd HH:mm:ss.SSSSSS"; static ImmutablePair executeQuery(final BigQuery bigquery, final QueryJobConfiguration queryConfig) { final JobId jobId = JobId.of(UUID.randomUUID().toString()); @@ -143,4 +153,40 @@ static TableDefinition getTableDefinition(final BigQuery bigquery, final String return bigquery.getTable(tableId).getDefinition(); } + /** + * @param fieldList - the list to be checked + * @return The list of fields with datetime format. + * + */ + public static List getDateTimeFieldsFromSchema(FieldList fieldList) { + List dateTimeFields = new ArrayList<>(); + for (Field field : fieldList) { + if (field.getType().getStandardType().equals(StandardSQLTypeName.DATETIME)) { + dateTimeFields.add(field.getName()); + } + } + return dateTimeFields; + } + + /** + * @param dateTimeFields - list contains fields of DATETIME format + * @param data - Json will be sent to Google BigData service + * + * The special DATETIME format is required to save this type to BigQuery. + * @see Supported Google bigquery datatype + * This method is responsible to adapt JSON DATETIME to Bigquery + */ + public static void transformJsonDateTimeToBigDataFormat(List dateTimeFields, ObjectNode data) { + dateTimeFields.forEach(e -> { + if (data.findValue(e) != null && !data.get(e).isNull()) { + String googleBigQueryDateFormat = QueryParameterValue + .dateTime(new DateTime(data + .findValue(e) + .asText()) + .toString(BIG_QUERY_DATETIME_FORMAT)) + .getValue(); + data.put(e, googleBigQueryDateFormat); + } + }); + } } diff --git a/docs/integrations/destinations/bigquery.md b/docs/integrations/destinations/bigquery.md index 3b691446003a4..ede3b62ab2336 100644 --- a/docs/integrations/destinations/bigquery.md +++ b/docs/integrations/destinations/bigquery.md @@ -169,6 +169,7 @@ Therefore, Airbyte BigQuery destination will convert any invalid characters into | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.1.8 | 2021-10-27 | [\#7413](https://github.com/airbytehq/airbyte/issues/7413) | Fixed DATETIME conversion for BigQuery | | 0.1.7 | 2021-10-26 | [\#7240](https://github.com/airbytehq/airbyte/issues/7240) | Output partitioned/clustered tables | | 0.1.6 | 2021-09-16 | [\#6145](https://github.com/airbytehq/airbyte/pull/6145) | BigQuery Denormalized support for date, datetime & timestamp types through the json "format" key | | 0.1.5 | 2021-09-07 | [\#5881](https://github.com/airbytehq/airbyte/pull/5881) | BigQuery Denormalized NPE fix | From 0c2fcb8749e8ba00e133bfbca507e94161cd731d Mon Sep 17 00:00:00 2001 From: Dmytro Date: Wed, 3 Nov 2021 14:50:30 +0200 Subject: [PATCH 25/83] Fix iterable memory consumption (#7591) --- .../2e875208-0c0b-4ee4-9e92-1cb3156ea799.json | 2 +- .../resources/seed/source_definitions.yaml | 2 +- .../connectors/source-iterable/Dockerfile | 2 +- .../connectors/source-iterable/setup.py | 7 +++- .../source-iterable/source_iterable/api.py | 17 +++++++- .../source-iterable/unit_tests/__init__.py | 3 ++ .../unit_tests/test_exports_stream.py | 42 +++++++++++++++++++ docs/integrations/sources/iterable.md | 1 + 8 files changed, 70 insertions(+), 6 deletions(-) create mode 100644 airbyte-integrations/connectors/source-iterable/unit_tests/__init__.py create mode 100644 airbyte-integrations/connectors/source-iterable/unit_tests/test_exports_stream.py diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/2e875208-0c0b-4ee4-9e92-1cb3156ea799.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/2e875208-0c0b-4ee4-9e92-1cb3156ea799.json index 00fe3f7d0fba4..6cb4f103e6d2f 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/2e875208-0c0b-4ee4-9e92-1cb3156ea799.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/2e875208-0c0b-4ee4-9e92-1cb3156ea799.json @@ -2,6 +2,6 @@ "sourceDefinitionId": "2e875208-0c0b-4ee4-9e92-1cb3156ea799", "name": "Iterable", "dockerRepository": "airbyte/source-iterable", - "dockerImageTag": "0.1.9", + "dockerImageTag": "0.1.10", "documentationUrl": "https://docs.airbyte.io/integrations/sources/iterable" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index a818aa66bca7a..1ca547738bf99 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -262,7 +262,7 @@ - name: Iterable sourceDefinitionId: 2e875208-0c0b-4ee4-9e92-1cb3156ea799 dockerRepository: airbyte/source-iterable - dockerImageTag: 0.1.9 + dockerImageTag: 0.1.10 documentationUrl: https://docs.airbyte.io/integrations/sources/iterable sourceType: api - name: Jira diff --git a/airbyte-integrations/connectors/source-iterable/Dockerfile b/airbyte-integrations/connectors/source-iterable/Dockerfile index 85e3f9b3f58c8..39127abee7bcb 100644 --- a/airbyte-integrations/connectors/source-iterable/Dockerfile +++ b/airbyte-integrations/connectors/source-iterable/Dockerfile @@ -12,5 +12,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.9 +LABEL io.airbyte.version=0.1.10 LABEL io.airbyte.name=airbyte/source-iterable diff --git a/airbyte-integrations/connectors/source-iterable/setup.py b/airbyte-integrations/connectors/source-iterable/setup.py index 90f33d5300e34..893e468fb733b 100644 --- a/airbyte-integrations/connectors/source-iterable/setup.py +++ b/airbyte-integrations/connectors/source-iterable/setup.py @@ -11,7 +11,7 @@ "requests~=2.25", ] -TEST_REQUIREMENTS = ["pytest~=6.1"] +TEST_REQUIREMENTS = ["pytest~=6.1", "responses==0.13.3"] setup( @@ -20,6 +20,9 @@ author="Airbyte", author_email="contact@airbyte.io", packages=find_packages(), - install_requires=MAIN_REQUIREMENTS + TEST_REQUIREMENTS, + install_requires=MAIN_REQUIREMENTS, + extras_require={ + "tests": TEST_REQUIREMENTS, + }, package_data={"": ["*.json", "schemas/*.json"]}, ) diff --git a/airbyte-integrations/connectors/source-iterable/source_iterable/api.py b/airbyte-integrations/connectors/source-iterable/source_iterable/api.py index ad8288de5f39b..82a7074f42f2a 100755 --- a/airbyte-integrations/connectors/source-iterable/source_iterable/api.py +++ b/airbyte-integrations/connectors/source-iterable/source_iterable/api.py @@ -69,7 +69,7 @@ def __init__(self, start_date, **kwargs): self.stream_params = {"dataTypeName": self.data_field} def path(self, **kwargs) -> str: - return "/export/data.json" + return "export/data.json" @staticmethod def _field_to_datetime(value: Union[int, str]) -> pendulum.datetime: @@ -114,6 +114,21 @@ def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapp record[self.cursor_field] = self._field_to_datetime(record[self.cursor_field]) yield record + def request_kwargs( + self, + stream_state: Mapping[str, Any], + stream_slice: Mapping[str, Any] = None, + next_page_token: Mapping[str, Any] = None, + ) -> Mapping[str, Any]: + """ + https://api.iterable.com/api/docs#export_exportDataJson + Sending those type of requests could download large piece of json + objects splitted with newline character. + Passing stream=True argument to requests.session.send method to avoid + loading whole analytics report content into memory. + """ + return {"stream": True} + class Lists(IterableStream): data_field = "lists" diff --git a/airbyte-integrations/connectors/source-iterable/unit_tests/__init__.py b/airbyte-integrations/connectors/source-iterable/unit_tests/__init__.py new file mode 100644 index 0000000000000..46b7376756ec6 --- /dev/null +++ b/airbyte-integrations/connectors/source-iterable/unit_tests/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# diff --git a/airbyte-integrations/connectors/source-iterable/unit_tests/test_exports_stream.py b/airbyte-integrations/connectors/source-iterable/unit_tests/test_exports_stream.py new file mode 100644 index 0000000000000..4f151b6155156 --- /dev/null +++ b/airbyte-integrations/connectors/source-iterable/unit_tests/test_exports_stream.py @@ -0,0 +1,42 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + +import json +from unittest import mock + +import pytest +import responses +from airbyte_cdk.models import SyncMode +from source_iterable.api import EmailSend + + +@pytest.fixture +def session_mock(): + with mock.patch("airbyte_cdk.sources.streams.http.http.requests") as requests_mock: + session_mock = mock.MagicMock() + response_mock = mock.MagicMock() + requests_mock.Session.return_value = session_mock + session_mock.send.return_value = response_mock + response_mock.status_code = 200 + yield session_mock + + +def test_send_email_stream(session_mock): + stream = EmailSend(start_date="2020", api_key="") + _ = list(stream.read_records(sync_mode=SyncMode.full_refresh, cursor_field=None, stream_slice=[], stream_state={})) + + assert session_mock.send.called + send_args = session_mock.send.call_args[1] + assert send_args.get("stream") is True + + +@responses.activate +def test_stream_correct(): + record_js = {"createdAt": "2020"} + NUMBER_OF_RECORDS = 10 ** 2 + resp_body = "\n".join([json.dumps(record_js)] * NUMBER_OF_RECORDS) + responses.add("GET", "https://api.iterable.com/api/export/data.json", body=resp_body) + stream = EmailSend(start_date="2020", api_key="") + records = list(stream.read_records(sync_mode=SyncMode.full_refresh, cursor_field=None, stream_slice=[], stream_state={})) + assert len(records) == NUMBER_OF_RECORDS diff --git a/docs/integrations/sources/iterable.md b/docs/integrations/sources/iterable.md index b949e92cc722f..8310d6cf75f52 100644 --- a/docs/integrations/sources/iterable.md +++ b/docs/integrations/sources/iterable.md @@ -58,6 +58,7 @@ Please read [How to find your API key](https://support.iterable.com/hc/en-us/art | Version | Date | Pull Request | Subject | | :------ | :-------- | :----- | :------ | +| `0.1.10` | 2021-11-03 | [7591](https://github.com/airbytehq/airbyte/pull/7591) | Optimize export streams memory consumption for large requests | | `0.1.9` | 2021-10-06 | [5915](https://github.com/airbytehq/airbyte/pull/5915) | Enable campaign_metrics stream | | `0.1.8` | 2021-09-20 | [5915](https://github.com/airbytehq/airbyte/pull/5915) | Add new streams: campaign_metrics, events | | `0.1.7` | 2021-09-20 | [6242](https://github.com/airbytehq/airbyte/pull/6242) | Updated schema for: campaigns, lists, templates, metadata | From b94ee00fd8e5220e3191073cd711a57e052d7fda Mon Sep 17 00:00:00 2001 From: lmossman Date: Wed, 3 Nov 2021 08:46:43 -0700 Subject: [PATCH 26/83] Revert "Generate seed connector specs on build (#7501)" This reverts commit a534bb2a8f29b20e3cc7c52fef1bc3c34783695d. --- .../java/io/airbyte/config/init/SeedType.java | 4 +- .../resources/seed/destination_specs.yaml | 2752 -------- .../src/main/resources/seed/source_specs.yaml | 5836 ----------------- airbyte-config/models/build.gradle | 3 +- .../main/resources/types/DockerImageSpec.yaml | 16 - .../DatabaseConfigPersistence.java | 9 +- .../YamlSeedConfigPersistence.java | 40 +- .../YamlSeedConfigPersistenceTest.java | 9 +- airbyte-config/specs/README.md | 16 - airbyte-config/specs/build.gradle | 24 - .../config/specs/GcsBucketSpecFetcher.java | 70 - .../specs/SeedConnectorSpecGenerator.java | 127 - .../config/specs/SeedConnectorType.java | 33 - .../specs/GcsBucketSpecFetcherTest.java | 79 - .../specs/SeedConnectorSpecGeneratorTest.java | 154 - airbyte-json-validation/build.gradle | 2 - airbyte-protocol/models/build.gradle | 2 - airbyte-scheduler/client/build.gradle | 1 - .../BucketSpecCacheSchedulerClient.java | 58 +- .../BucketSpecCacheSchedulerClientTest.java | 14 +- airbyte-server/build.gradle | 1 - build.gradle | 3 +- settings.gradle | 1 - 23 files changed, 69 insertions(+), 9185 deletions(-) delete mode 100644 airbyte-config/init/src/main/resources/seed/destination_specs.yaml delete mode 100644 airbyte-config/init/src/main/resources/seed/source_specs.yaml delete mode 100644 airbyte-config/models/src/main/resources/types/DockerImageSpec.yaml delete mode 100644 airbyte-config/specs/README.md delete mode 100644 airbyte-config/specs/build.gradle delete mode 100644 airbyte-config/specs/src/main/java/io/airbyte/config/specs/GcsBucketSpecFetcher.java delete mode 100644 airbyte-config/specs/src/main/java/io/airbyte/config/specs/SeedConnectorSpecGenerator.java delete mode 100644 airbyte-config/specs/src/main/java/io/airbyte/config/specs/SeedConnectorType.java delete mode 100644 airbyte-config/specs/src/test/java/io/airbyte/config/specs/GcsBucketSpecFetcherTest.java delete mode 100644 airbyte-config/specs/src/test/java/io/airbyte/config/specs/SeedConnectorSpecGeneratorTest.java diff --git a/airbyte-config/init/src/main/java/io/airbyte/config/init/SeedType.java b/airbyte-config/init/src/main/java/io/airbyte/config/init/SeedType.java index 3730369621090..47c4c419bcf10 100644 --- a/airbyte-config/init/src/main/java/io/airbyte/config/init/SeedType.java +++ b/airbyte-config/init/src/main/java/io/airbyte/config/init/SeedType.java @@ -7,9 +7,7 @@ public enum SeedType { STANDARD_SOURCE_DEFINITION("/seed/source_definitions.yaml", "sourceDefinitionId"), - STANDARD_DESTINATION_DEFINITION("/seed/destination_definitions.yaml", "destinationDefinitionId"), - SOURCE_SPEC("/seed/source_specs.yaml", "dockerImage"), - DESTINATION_SPEC("/seed/destination_specs.yaml", "dockerImage"); + STANDARD_DESTINATION_DEFINITION("/seed/destination_definitions.yaml", "destinationDefinitionId"); final String resourcePath; // ID field name diff --git a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml deleted file mode 100644 index a248b4eff240c..0000000000000 --- a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml +++ /dev/null @@ -1,2752 +0,0 @@ -# This file is generated by io.airbyte.config.specs.SeedConnectorSpecGenerator. -# Do NOT edit this file directly. See generator class for more details. ---- -- dockerImage: "airbyte/destination-azure-blob-storage:0.1.0" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/destinations/azureblobstorage" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "AzureBlobStorage Destination Spec" - type: "object" - required: - - "azure_blob_storage_account_name" - - "azure_blob_storage_account_key" - - "format" - additionalProperties: false - properties: - azure_blob_storage_endpoint_domain_name: - title: "Endpoint Domain Name" - type: "string" - default: "blob.core.windows.net" - description: "This is Azure Blob Storage endpoint domain name. Leave default\ - \ value (or leave it empty if run container from command line) to use\ - \ Microsoft native from example." - examples: - - "blob.core.windows.net" - azure_blob_storage_container_name: - title: "Azure blob storage container (Bucket) Name" - type: "string" - description: "The name of the Azure blob storage container. If not exists\ - \ - will be created automatically. May be empty, then will be created\ - \ automatically airbytecontainer+timestamp" - examples: - - "airbytetescontainername" - azure_blob_storage_account_name: - title: "Azure Blob Storage account name" - type: "string" - description: "The account's name of the Azure Blob Storage." - examples: - - "airbyte5storage" - azure_blob_storage_account_key: - description: "The Azure blob storage account key." - airbyte_secret: true - type: "string" - examples: - - "Z8ZkZpteggFx394vm+PJHnGTvdRncaYS+JhLKdj789YNmD+iyGTnG+PV+POiuYNhBg/ACS+LKjd%4FG3FHGN12Nd==" - format: - title: "Output Format" - type: "object" - description: "Output data format" - oneOf: - - title: "CSV: Comma-Separated Values" - required: - - "format_type" - - "flattening" - properties: - format_type: - type: "string" - const: "CSV" - flattening: - type: "string" - title: "Normalization (Flattening)" - description: "Whether the input json data should be normalized (flattened)\ - \ in the output CSV. Please refer to docs for details." - default: "No flattening" - enum: - - "No flattening" - - "Root level flattening" - - title: "JSON Lines: newline-delimited JSON" - required: - - "format_type" - properties: - format_type: - type: "string" - const: "JSONL" - supportsIncremental: true - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: - - "overwrite" - - "append" -- dockerImage: "airbyte/destination-bigquery:0.5.0" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/destinations/bigquery" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "BigQuery Destination Spec" - type: "object" - required: - - "project_id" - - "dataset_id" - additionalProperties: true - properties: - big_query_client_buffer_size_mb: - title: "Google BigQuery client chunk size" - description: "Google BigQuery client's chunk(buffer) size (MIN=1, MAX =\ - \ 15) for each table. The default 15MiB value is used if not set explicitly.\ - \ It's recommended to decrease value for big data sets migration for less\ - \ HEAP memory consumption and avoiding crashes. For more details refer\ - \ to https://googleapis.dev/python/bigquery/latest/generated/google.cloud.bigquery.client.Client.html" - type: "integer" - minimum: 1 - maximum: 15 - default: 15 - examples: - - "15" - project_id: - type: "string" - description: "The GCP project ID for the project containing the target BigQuery\ - \ dataset." - title: "Project ID" - dataset_id: - type: "string" - description: "Default BigQuery Dataset ID tables are replicated to if the\ - \ source does not specify a namespace." - title: "Default Dataset ID" - dataset_location: - type: "string" - description: "The location of the dataset. Warning: Changes made after creation\ - \ will not be applied." - title: "Dataset Location" - default: "US" - enum: - - "US" - - "EU" - - "asia-east1" - - "asia-east2" - - "asia-northeast1" - - "asia-northeast2" - - "asia-northeast3" - - "asia-south1" - - "asia-southeast1" - - "asia-southeast2" - - "australia-southeast1" - - "europe-central1" - - "europe-central2" - - "europe-north1" - - "europe-west1" - - "europe-west2" - - "europe-west3" - - "europe-west4" - - "europe-west5" - - "europe-west6" - - "northamerica-northeast1" - - "southamerica-east1" - - "us-central1" - - "us-east1" - - "us-east4" - - "us-west-1" - - "us-west-2" - - "us-west-3" - - "us-west-4" - credentials_json: - type: "string" - description: "The contents of the JSON service account key. Check out the\ - \ docs if you need help generating this key. Default credentials will\ - \ be used if this field is left empty." - title: "Credentials JSON" - airbyte_secret: true - transformation_priority: - type: "string" - description: "When running custom transformations or Basic normalization,\ - \ running queries on interactive mode can hit BQ limits, choosing batch\ - \ will solve those limitss." - title: "Transformation Query Run Type" - default: "interactive" - enum: - - "interactive" - - "batch" - loading_method: - type: "object" - title: "Loading Method" - description: "Loading method used to send select the way data will be uploaded\ - \ to BigQuery." - oneOf: - - title: "Standard Inserts" - additionalProperties: false - description: "Direct uploading using streams." - required: - - "method" - properties: - method: - type: "string" - const: "Standard" - - title: "GCS Staging" - additionalProperties: false - description: "Writes large batches of records to a file, uploads the file\ - \ to GCS, then uses

COPY INTO table
to upload the file. Recommended\ - \ for large production workloads for better speed and scalability." - required: - - "method" - - "gcs_bucket_name" - - "gcs_bucket_path" - - "credential" - properties: - method: - type: "string" - const: "GCS Staging" - gcs_bucket_name: - title: "GCS Bucket Name" - type: "string" - description: "The name of the GCS bucket." - examples: - - "airbyte_sync" - gcs_bucket_path: - description: "Directory under the GCS bucket where data will be written." - type: "string" - examples: - - "data_sync/test" - keep_files_in_gcs-bucket: - type: "string" - description: "This upload method is supposed to temporary store records\ - \ in GCS bucket. What do you want to do with data in GCS bucket\ - \ when migration has finished?" - title: "GCS tmp files afterward processing" - default: "Delete all tmp files from GCS" - enum: - - "Delete all tmp files from GCS" - - "Keep all tmp files in GCS" - credential: - title: "Credential" - type: "object" - oneOf: - - title: "HMAC key" - required: - - "credential_type" - - "hmac_key_access_id" - - "hmac_key_secret" - properties: - credential_type: - type: "string" - const: "HMAC_KEY" - hmac_key_access_id: - type: "string" - description: "HMAC key access ID. When linked to a service account,\ - \ this ID is 61 characters long; when linked to a user account,\ - \ it is 24 characters long." - title: "HMAC Key Access ID" - airbyte_secret: true - examples: - - "1234567890abcdefghij1234" - hmac_key_secret: - type: "string" - description: "The corresponding secret for the access ID. It\ - \ is a 40-character base-64 encoded string." - title: "HMAC Key Secret" - airbyte_secret: true - examples: - - "1234567890abcdefghij1234567890ABCDEFGHIJ" - supportsIncremental: true - supportsNormalization: true - supportsDBT: true - supported_destination_sync_modes: - - "overwrite" - - "append" - - "append_dedup" -- dockerImage: "airbyte/destination-bigquery-denormalized:0.1.7" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/destinations/bigquery" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "BigQuery Denormalized Typed Struct Destination Spec" - type: "object" - required: - - "project_id" - - "dataset_id" - additionalProperties: true - properties: - project_id: - type: "string" - description: "The GCP project ID for the project containing the target BigQuery\ - \ dataset." - title: "Project ID" - dataset_id: - type: "string" - description: "Default BigQuery Dataset ID tables are replicated to if the\ - \ source does not specify a namespace." - title: "Default Dataset ID" - dataset_location: - type: "string" - description: "The location of the dataset. Warning: Changes made after creation\ - \ will not be applied." - title: "Dataset Location" - default: "US" - enum: - - "US" - - "EU" - - "asia-east1" - - "asia-east2" - - "asia-northeast1" - - "asia-northeast2" - - "asia-northeast3" - - "asia-south1" - - "asia-southeast1" - - "asia-southeast2" - - "australia-southeast1" - - "europe-central1" - - "europe-central2" - - "europe-north1" - - "europe-west1" - - "europe-west2" - - "europe-west3" - - "europe-west4" - - "europe-west5" - - "europe-west6" - - "northamerica-northeast1" - - "southamerica-east1" - - "us-central1" - - "us-east1" - - "us-east4" - - "us-west-1" - - "us-west-2" - - "us-west-3" - - "us-west-4" - credentials_json: - type: "string" - description: "The contents of the JSON service account key. Check out the\ - \ docs if you need help generating this key. Default credentials will\ - \ be used if this field is left empty." - title: "Credentials JSON" - airbyte_secret: true - supportsIncremental: true - supportsNormalization: false - supportsDBT: true - supported_destination_sync_modes: - - "overwrite" - - "append" -- dockerImage: "airbyte/destination-keen:0.2.0" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/destinations/keen" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Keen Spec" - type: "object" - required: - - "project_id" - - "api_key" - additionalProperties: false - properties: - project_id: - description: "Keen Project ID" - type: "string" - examples: - - "58b4acc22ba938934e888322e" - api_key: - title: "API Key" - description: "Keen Master API key" - type: "string" - examples: - - "ABCDEFGHIJKLMNOPRSTUWXYZ" - airbyte_secret: true - infer_timestamp: - title: "Infer Timestamp" - description: "Allow connector to guess keen.timestamp value based on the\ - \ streamed data" - type: "boolean" - default: true - supportsIncremental: true - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: - - "overwrite" - - "append" -- dockerImage: "airbyte/destination-dynamodb:0.1.0" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/destinations/dynamodb" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "DynamoDB Destination Spec" - type: "object" - required: - - "dynamodb_table_name" - - "dynamodb_region" - - "access_key_id" - - "secret_access_key" - additionalProperties: false - properties: - dynamodb_endpoint: - title: "Endpoint" - type: "string" - default: "" - description: "This is your DynamoDB endpoint url.(if you are working with\ - \ AWS DynamoDB, just leave empty)." - examples: - - "http://localhost:9000" - dynamodb_table_name: - title: "DynamoDB Table Name" - type: "string" - description: "The name of the DynamoDB table." - examples: - - "airbyte_sync" - dynamodb_region: - title: "DynamoDB Region" - type: "string" - default: "" - description: "The region of the DynamoDB." - enum: - - "" - - "us-east-1" - - "us-east-2" - - "us-west-1" - - "us-west-2" - - "af-south-1" - - "ap-east-1" - - "ap-south-1" - - "ap-northeast-1" - - "ap-northeast-2" - - "ap-northeast-3" - - "ap-southeast-1" - - "ap-southeast-2" - - "ca-central-1" - - "cn-north-1" - - "cn-northwest-1" - - "eu-central-1" - - "eu-north-1" - - "eu-south-1" - - "eu-west-1" - - "eu-west-2" - - "eu-west-3" - - "sa-east-1" - - "me-south-1" - - "us-gov-east-1" - - "us-gov-west-1" - access_key_id: - type: "string" - description: "The access key id to access the DynamoDB. Airbyte requires\ - \ Read and Write permissions to the DynamoDB." - title: "DynamoDB Key Id" - airbyte_secret: true - examples: - - "A012345678910EXAMPLE" - secret_access_key: - type: "string" - description: "The corresponding secret to the access key id." - title: "DynamoDB Access Key" - airbyte_secret: true - examples: - - "a012345678910ABCDEFGH/AbCdEfGhEXAMPLEKEY" - supportsIncremental: true - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: - - "overwrite" - - "append" -- dockerImage: "airbyte/destination-gcs:0.1.2" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/destinations/gcs" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "GCS Destination Spec" - type: "object" - required: - - "gcs_bucket_name" - - "gcs_bucket_path" - - "gcs_bucket_region" - - "credential" - - "format" - additionalProperties: false - properties: - gcs_bucket_name: - title: "GCS Bucket Name" - type: "string" - description: "The name of the GCS bucket." - examples: - - "airbyte_sync" - gcs_bucket_path: - description: "Directory under the GCS bucket where data will be written." - type: "string" - examples: - - "data_sync/test" - gcs_bucket_region: - title: "GCS Bucket Region" - type: "string" - default: "" - description: "The region of the GCS bucket." - enum: - - "" - - "-- North America --" - - "northamerica-northeast1" - - "us-central1" - - "us-east1" - - "us-east4" - - "us-west1" - - "us-west2" - - "us-west3" - - "us-west4" - - "-- South America --" - - "southamerica-east1" - - "-- Europe --" - - "europe-central2" - - "europe-north1" - - "europe-west1" - - "europe-west2" - - "europe-west3" - - "europe-west4" - - "europe-west6" - - "-- Asia --" - - "asia-east1" - - "asia-east2" - - "asia-northeast1" - - "asia-northeast2" - - "asia-northeast3" - - "asia-south1" - - "asia-south2" - - "asia-southeast1" - - "asia-southeast2" - - "-- Australia --" - - "australia-southeast1" - - "australia-southeast2" - - "-- Multi-regions --" - - "asia" - - "eu" - - "us" - - "-- Dual-regions --" - - "asia1" - - "eur4" - - "nam4" - credential: - title: "Credential" - type: "object" - oneOf: - - title: "HMAC key" - required: - - "credential_type" - - "hmac_key_access_id" - - "hmac_key_secret" - properties: - credential_type: - type: "string" - enum: - - "HMAC_KEY" - default: "HMAC_KEY" - hmac_key_access_id: - type: "string" - description: "HMAC key access ID. When linked to a service account,\ - \ this ID is 61 characters long; when linked to a user account,\ - \ it is 24 characters long." - title: "HMAC Key Access ID" - airbyte_secret: true - examples: - - "1234567890abcdefghij1234" - hmac_key_secret: - type: "string" - description: "The corresponding secret for the access ID. It is a\ - \ 40-character base-64 encoded string." - title: "HMAC Key Secret" - airbyte_secret: true - examples: - - "1234567890abcdefghij1234567890ABCDEFGHIJ" - format: - title: "Output Format" - type: "object" - description: "Output data format" - oneOf: - - title: "Avro: Apache Avro" - required: - - "format_type" - - "compression_codec" - properties: - format_type: - type: "string" - enum: - - "Avro" - default: "Avro" - compression_codec: - title: "Compression Codec" - description: "The compression algorithm used to compress data. Default\ - \ to no compression." - type: "object" - oneOf: - - title: "no compression" - required: - - "codec" - properties: - codec: - type: "string" - enum: - - "no compression" - default: "no compression" - - title: "Deflate" - required: - - "codec" - - "compression_level" - properties: - codec: - type: "string" - enum: - - "Deflate" - default: "Deflate" - compression_level: - title: "Deflate level" - description: "0: no compression & fastest, 9: best compression\ - \ & slowest." - type: "integer" - default: 0 - minimum: 0 - maximum: 9 - - title: "bzip2" - required: - - "codec" - properties: - codec: - type: "string" - enum: - - "bzip2" - default: "bzip2" - - title: "xz" - required: - - "codec" - - "compression_level" - properties: - codec: - type: "string" - enum: - - "xz" - default: "xz" - compression_level: - title: "Compression level" - description: "See here for details." - type: "integer" - default: 6 - minimum: 0 - maximum: 9 - - title: "zstandard" - required: - - "codec" - - "compression_level" - properties: - codec: - type: "string" - enum: - - "zstandard" - default: "zstandard" - compression_level: - title: "Compression level" - description: "Negative levels are 'fast' modes akin to lz4 or\ - \ snappy, levels above 9 are generally for archival purposes,\ - \ and levels above 18 use a lot of memory." - type: "integer" - default: 3 - minimum: -5 - maximum: 22 - include_checksum: - title: "Include checksum" - description: "If true, include a checksum with each data block." - type: "boolean" - default: false - - title: "snappy" - required: - - "codec" - properties: - codec: - type: "string" - enum: - - "snappy" - default: "snappy" - part_size_mb: - title: "Block Size (MB) for GCS multipart upload" - description: "This is the size of a \"Part\" being buffered in memory.\ - \ It limits the memory usage when writing. Larger values will allow\ - \ to upload a bigger files and improve the speed, but consumes9\ - \ more memory. Allowed values: min=5MB, max=525MB Default: 5MB." - type: "integer" - default: 5 - examples: - - 5 - - title: "CSV: Comma-Separated Values" - required: - - "format_type" - - "flattening" - properties: - format_type: - type: "string" - enum: - - "CSV" - default: "CSV" - flattening: - type: "string" - title: "Normalization (Flattening)" - description: "Whether the input json data should be normalized (flattened)\ - \ in the output CSV. Please refer to docs for details." - default: "No flattening" - enum: - - "No flattening" - - "Root level flattening" - part_size_mb: - title: "Block Size (MB) for GCS multipart upload" - description: "This is the size of a \"Part\" being buffered in memory.\ - \ It limits the memory usage when writing. Larger values will allow\ - \ to upload a bigger files and improve the speed, but consumes9\ - \ more memory. Allowed values: min=5MB, max=525MB Default: 5MB." - type: "integer" - default: 5 - examples: - - 5 - - title: "JSON Lines: newline-delimited JSON" - required: - - "format_type" - properties: - format_type: - type: "string" - enum: - - "JSONL" - default: "JSONL" - part_size_mb: - title: "Block Size (MB) for GCS multipart upload" - description: "This is the size of a \"Part\" being buffered in memory.\ - \ It limits the memory usage when writing. Larger values will allow\ - \ to upload a bigger files and improve the speed, but consumes9\ - \ more memory. Allowed values: min=5MB, max=525MB Default: 5MB." - type: "integer" - default: 5 - examples: - - 5 - - title: "Parquet: Columnar Storage" - required: - - "format_type" - properties: - format_type: - type: "string" - enum: - - "Parquet" - default: "Parquet" - compression_codec: - title: "Compression Codec" - description: "The compression algorithm used to compress data pages." - type: "string" - enum: - - "UNCOMPRESSED" - - "SNAPPY" - - "GZIP" - - "LZO" - - "BROTLI" - - "LZ4" - - "ZSTD" - default: "UNCOMPRESSED" - block_size_mb: - title: "Block Size (Row Group Size) (MB)" - description: "This is the size of a row group being buffered in memory.\ - \ It limits the memory usage when writing. Larger values will improve\ - \ the IO when reading, but consume more memory when writing. Default:\ - \ 128 MB." - type: "integer" - default: 128 - examples: - - 128 - max_padding_size_mb: - title: "Max Padding Size (MB)" - description: "Maximum size allowed as padding to align row groups.\ - \ This is also the minimum size of a row group. Default: 8 MB." - type: "integer" - default: 8 - examples: - - 8 - page_size_kb: - title: "Page Size (KB)" - description: "The page size is for compression. A block is composed\ - \ of pages. A page is the smallest unit that must be read fully\ - \ to access a single record. If this value is too small, the compression\ - \ will deteriorate. Default: 1024 KB." - type: "integer" - default: 1024 - examples: - - 1024 - dictionary_page_size_kb: - title: "Dictionary Page Size (KB)" - description: "There is one dictionary page per column per row group\ - \ when dictionary encoding is used. The dictionary page size works\ - \ like the page size but for dictionary. Default: 1024 KB." - type: "integer" - default: 1024 - examples: - - 1024 - dictionary_encoding: - title: "Dictionary Encoding" - description: "Default: true." - type: "boolean" - default: true - supportsIncremental: true - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: - - "overwrite" - - "append" - $schema: "http://json-schema.org/draft-07/schema#" -- dockerImage: "airbyte/destination-pubsub:0.1.1" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/destinations/pubsub" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Google PubSub Destination Spec" - type: "object" - required: - - "project_id" - - "topic_id" - - "credentials_json" - additionalProperties: true - properties: - project_id: - type: "string" - description: "The GCP project ID for the project containing the target PubSub" - title: "Project ID" - topic_id: - type: "string" - description: "PubSub topic ID in the given GCP project ID" - title: "PubSub Topic ID" - credentials_json: - type: "string" - description: "The contents of the JSON service account key. Check out the\ - \ docs if you need help generating this key." - title: "Credentials JSON" - airbyte_secret: true - supportsIncremental: true - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: - - "append" -- dockerImage: "airbyte/destination-kafka:0.1.2" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/destinations/kafka" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Kafka Destination Spec" - type: "object" - required: - - "bootstrap_servers" - - "topic_pattern" - - "protocol" - - "acks" - - "enable_idempotence" - - "compression_type" - - "batch_size" - - "linger_ms" - - "max_in_flight_requests_per_connection" - - "client_dns_lookup" - - "buffer_memory" - - "max_request_size" - - "retries" - - "socket_connection_setup_timeout_ms" - - "socket_connection_setup_timeout_max_ms" - - "max_block_ms" - - "request_timeout_ms" - - "delivery_timeout_ms" - - "send_buffer_bytes" - - "receive_buffer_bytes" - additionalProperties: true - properties: - bootstrap_servers: - title: "Bootstrap servers" - description: "A list of host/port pairs to use for establishing the initial\ - \ connection to the Kafka cluster. The client will make use of all servers\ - \ irrespective of which servers are specified here for bootstrapping—this\ - \ list only impacts the initial hosts used to discover the full set of\ - \ servers. This list should be in the form host1:port1,host2:port2,....\ - \ Since these servers are just used for the initial connection to discover\ - \ the full cluster membership (which may change dynamically), this list\ - \ need not contain the full set of servers (you may want more than one,\ - \ though, in case a server is down)." - type: "string" - examples: - - "kafka-broker1:9092,kafka-broker2:9092" - topic_pattern: - title: "Topic pattern" - description: "Topic pattern in which the records will be sent. You can use\ - \ patterns like '{namespace}' and/or '{stream}' to send the message to\ - \ a specific topic based on these values. Notice that the topic name will\ - \ be transformed to a standard naming convention." - type: "string" - examples: - - "sample.topic" - - "{namespace}.{stream}.sample" - test_topic: - title: "Test topic" - description: "Topic to test if Airbyte can produce messages." - type: "string" - examples: - - "test.topic" - sync_producer: - title: "Sync producer" - description: "Wait synchronously until the record has been sent to Kafka." - type: "boolean" - default: false - protocol: - title: "Protocol" - type: "object" - description: "Protocol used to communicate with brokers." - oneOf: - - title: "PLAINTEXT" - required: - - "security_protocol" - properties: - security_protocol: - type: "string" - enum: - - "PLAINTEXT" - default: "PLAINTEXT" - - title: "SASL PLAINTEXT" - required: - - "security_protocol" - - "sasl_mechanism" - - "sasl_jaas_config" - properties: - security_protocol: - type: "string" - enum: - - "SASL_PLAINTEXT" - default: "SASL_PLAINTEXT" - sasl_mechanism: - title: "SASL mechanism" - description: "SASL mechanism used for client connections. This may\ - \ be any mechanism for which a security provider is available." - type: "string" - default: "PLAIN" - enum: - - "PLAIN" - sasl_jaas_config: - title: "SASL JAAS config" - description: "JAAS login context parameters for SASL connections in\ - \ the format used by JAAS configuration files." - type: "string" - default: "" - airbyte_secret: true - - title: "SASL SSL" - required: - - "security_protocol" - - "sasl_mechanism" - - "sasl_jaas_config" - properties: - security_protocol: - type: "string" - enum: - - "SASL_SSL" - default: "SASL_SSL" - sasl_mechanism: - title: "SASL mechanism" - description: "SASL mechanism used for client connections. This may\ - \ be any mechanism for which a security provider is available." - type: "string" - default: "GSSAPI" - enum: - - "GSSAPI" - - "OAUTHBEARER" - - "SCRAM-SHA-256" - sasl_jaas_config: - title: "SASL JAAS config" - description: "JAAS login context parameters for SASL connections in\ - \ the format used by JAAS configuration files." - type: "string" - default: "" - airbyte_secret: true - client_id: - title: "Client ID" - description: "An id string to pass to the server when making requests. The\ - \ purpose of this is to be able to track the source of requests beyond\ - \ just ip/port by allowing a logical application name to be included in\ - \ server-side request logging." - type: "string" - examples: - - "airbyte-producer" - acks: - title: "ACKs" - description: "The number of acknowledgments the producer requires the leader\ - \ to have received before considering a request complete. This controls\ - \ the durability of records that are sent." - type: "string" - default: "1" - enum: - - "0" - - "1" - - "all" - enable_idempotence: - title: "Enable idempotence" - description: "When set to 'true', the producer will ensure that exactly\ - \ one copy of each message is written in the stream. If 'false', producer\ - \ retries due to broker failures, etc., may write duplicates of the retried\ - \ message in the stream." - type: "boolean" - default: false - compression_type: - title: "Compression type" - description: "The compression type for all data generated by the producer." - type: "string" - default: "none" - enum: - - "none" - - "gzip" - - "snappy" - - "lz4" - - "zstd" - batch_size: - title: "Batch size" - description: "The producer will attempt to batch records together into fewer\ - \ requests whenever multiple records are being sent to the same partition." - type: "integer" - examples: - - 16384 - linger_ms: - title: "Linger ms" - description: "The producer groups together any records that arrive in between\ - \ request transmissions into a single batched request." - type: "string" - examples: - - 0 - max_in_flight_requests_per_connection: - title: "Max in flight requests per connection" - description: "The maximum number of unacknowledged requests the client will\ - \ send on a single connection before blocking." - type: "integer" - examples: - - 5 - client_dns_lookup: - title: "Client DNS lookup" - description: "Controls how the client uses DNS lookups. If set to use_all_dns_ips,\ - \ connect to each returned IP address in sequence until a successful connection\ - \ is established. After a disconnection, the next IP is used. Once all\ - \ IPs have been used once, the client resolves the IP(s) from the hostname\ - \ again. If set to resolve_canonical_bootstrap_servers_only, resolve each\ - \ bootstrap address into a list of canonical names. After the bootstrap\ - \ phase, this behaves the same as use_all_dns_ips. If set to default (deprecated),\ - \ attempt to connect to the first IP address returned by the lookup, even\ - \ if the lookup returns multiple IP addresses." - type: "string" - default: "use_all_dns_ips" - enum: - - "default" - - "use_all_dns_ips" - - "resolve_canonical_bootstrap_servers_only" - - "use_all_dns_ips" - buffer_memory: - title: "Buffer memory" - description: "The total bytes of memory the producer can use to buffer records\ - \ waiting to be sent to the server." - type: "string" - examples: 33554432 - max_request_size: - title: "Max request size" - description: "The maximum size of a request in bytes." - type: "integer" - examples: - - 1048576 - retries: - title: "Retries" - description: "Setting a value greater than zero will cause the client to\ - \ resend any record whose send fails with a potentially transient error." - type: "integer" - examples: - - 2147483647 - socket_connection_setup_timeout_ms: - title: "Socket connection setup timeout" - description: "The amount of time the client will wait for the socket connection\ - \ to be established." - type: "string" - examples: - - 10000 - socket_connection_setup_timeout_max_ms: - title: "Socket connection setup max timeout" - description: "The maximum amount of time the client will wait for the socket\ - \ connection to be established. The connection setup timeout will increase\ - \ exponentially for each consecutive connection failure up to this maximum." - type: "string" - examples: - - 30000 - max_block_ms: - title: "Max block ms" - description: "The configuration controls how long the KafkaProducer's send(),\ - \ partitionsFor(), initTransactions(), sendOffsetsToTransaction(), commitTransaction()\ - \ and abortTransaction() methods will block." - type: "string" - examples: - - 60000 - request_timeout_ms: - title: "Request timeout" - description: "The configuration controls the maximum amount of time the\ - \ client will wait for the response of a request. If the response is not\ - \ received before the timeout elapses the client will resend the request\ - \ if necessary or fail the request if retries are exhausted." - type: "integer" - examples: - - 30000 - delivery_timeout_ms: - title: "Delivery timeout" - description: "An upper bound on the time to report success or failure after\ - \ a call to 'send()' returns." - type: "integer" - examples: - - 120000 - send_buffer_bytes: - title: "Send buffer bytes" - description: "The size of the TCP send buffer (SO_SNDBUF) to use when sending\ - \ data. If the value is -1, the OS default will be used." - type: "integer" - examples: - - 131072 - receive_buffer_bytes: - title: "Receive buffer bytes" - description: "The size of the TCP receive buffer (SO_RCVBUF) to use when\ - \ reading data. If the value is -1, the OS default will be used." - type: "integer" - examples: - - 32768 - supportsIncremental: true - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: - - "append" -- dockerImage: "airbyte/destination-csv:0.2.8" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/destinations/local-csv" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "CSV Destination Spec" - type: "object" - required: - - "destination_path" - additionalProperties: false - properties: - destination_path: - description: "Path to the directory where csv files will be written. The\ - \ destination uses the local mount \"/local\" and any data files will\ - \ be placed inside that local mount. For more information check out our\ - \ docs" - type: "string" - examples: - - "/local" - supportsIncremental: true - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: - - "overwrite" - - "append" -- dockerImage: "airbyte/destination-local-json:0.2.8" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/destinations/local-json" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Local Json Destination Spec" - type: "object" - required: - - "destination_path" - additionalProperties: false - properties: - destination_path: - description: "Path to the directory where json files will be written. The\ - \ files will be placed inside that local mount. For more information check\ - \ out our docs" - type: "string" - examples: - - "/json_data" - supportsIncremental: true - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: - - "overwrite" - - "append" -- dockerImage: "airbyte/destination-mssql:0.1.10" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/destinations/mssql" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "MS SQL Server Destination Spec" - type: "object" - required: - - "host" - - "port" - - "username" - - "database" - - "schema" - additionalProperties: true - properties: - host: - title: "Host" - description: "Hostname of the database." - type: "string" - order: 0 - port: - title: "Port" - description: "Port of the database." - type: "integer" - minimum: 0 - maximum: 65536 - default: 1433 - examples: - - "1433" - order: 1 - database: - title: "DB Name" - description: "Name of the database." - type: "string" - order: 2 - schema: - title: "Default Schema" - description: "The default schema tables are written to if the source does\ - \ not specify a namespace. The usual value for this field is \"public\"\ - ." - type: "string" - examples: - - "public" - default: "public" - order: 3 - username: - title: "User" - description: "Username to use to access the database." - type: "string" - order: 4 - password: - title: "Password" - description: "Password associated with the username." - type: "string" - airbyte_secret: true - order: 5 - ssl_method: - title: "SSL Method" - type: "object" - description: "Encryption method to use when communicating with the database" - order: 6 - oneOf: - - title: "Unencrypted" - additionalProperties: false - description: "Data transfer will not be encrypted." - required: - - "ssl_method" - type: "object" - properties: - ssl_method: - type: "string" - enum: - - "unencrypted" - default: "unencrypted" - - title: "Encrypted (trust server certificate)" - additionalProperties: false - description: "Use the cert provided by the server without verification.\ - \ (For testing purposes only!)" - required: - - "ssl_method" - type: "object" - properties: - ssl_method: - type: "string" - enum: - - "encrypted_trust_server_certificate" - default: "encrypted_trust_server_certificate" - - title: "Encrypted (verify certificate)" - additionalProperties: false - description: "Verify and use the cert provided by the server." - required: - - "ssl_method" - - "trustStoreName" - - "trustStorePassword" - type: "object" - properties: - ssl_method: - type: "string" - enum: - - "encrypted_verify_certificate" - default: "encrypted_verify_certificate" - hostNameInCertificate: - title: "Host Name In Certificate" - type: "string" - description: "Specifies the host name of the server. The value of\ - \ this property must match the subject property of the certificate." - order: 7 - tunnel_method: - type: "object" - title: "SSH Tunnel Method" - description: "Whether to initiate an SSH tunnel before connecting to the\ - \ database, and if so, which kind of authentication to use." - oneOf: - - title: "No Tunnel" - required: - - "tunnel_method" - properties: - tunnel_method: - description: "No ssh tunnel needed to connect to database" - type: "string" - const: "NO_TUNNEL" - order: 0 - - title: "SSH Key Authentication" - required: - - "tunnel_method" - - "tunnel_host" - - "tunnel_port" - - "tunnel_user" - - "ssh_key" - properties: - tunnel_method: - description: "Connect through a jump server tunnel host using username\ - \ and ssh key" - type: "string" - const: "SSH_KEY_AUTH" - order: 0 - tunnel_host: - title: "SSH Tunnel Jump Server Host" - description: "Hostname of the jump server host that allows inbound\ - \ ssh tunnel." - type: "string" - order: 1 - tunnel_port: - title: "SSH Connection Port" - description: "Port on the proxy/jump server that accepts inbound ssh\ - \ connections." - type: "integer" - minimum: 0 - maximum: 65536 - default: 22 - examples: - - "22" - order: 2 - tunnel_user: - title: "SSH Login Username" - description: "OS-level username for logging into the jump server host." - type: "string" - order: 3 - ssh_key: - title: "SSH Private Key" - description: "OS-level user account ssh key credentials in RSA PEM\ - \ format ( created with ssh-keygen -t rsa -m PEM -f myuser_rsa )" - type: "string" - airbyte_secret: true - multiline: true - order: 4 - - title: "Password Authentication" - required: - - "tunnel_method" - - "tunnel_host" - - "tunnel_port" - - "tunnel_user" - - "tunnel_user_password" - properties: - tunnel_method: - description: "Connect through a jump server tunnel host using username\ - \ and password authentication" - type: "string" - const: "SSH_PASSWORD_AUTH" - order: 0 - tunnel_host: - title: "SSH Tunnel Jump Server Host" - description: "Hostname of the jump server host that allows inbound\ - \ ssh tunnel." - type: "string" - order: 1 - tunnel_port: - title: "SSH Connection Port" - description: "Port on the proxy/jump server that accepts inbound ssh\ - \ connections." - type: "integer" - minimum: 0 - maximum: 65536 - default: 22 - examples: - - "22" - order: 2 - tunnel_user: - title: "SSH Login Username" - description: "OS-level username for logging into the jump server host" - type: "string" - order: 3 - tunnel_user_password: - title: "Password" - description: "OS-level password for logging into the jump server host" - type: "string" - airbyte_secret: true - order: 4 - supportsIncremental: true - supportsNormalization: true - supportsDBT: true - supported_destination_sync_modes: - - "overwrite" - - "append" - - "append_dedup" -- dockerImage: "airbyte/destination-meilisearch:0.2.10" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/destinations/meilisearch" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "MeiliSearch Destination Spec" - type: "object" - required: - - "host" - additionalProperties: true - properties: - host: - title: "Host" - description: "Hostname of the MeiliSearch instance" - type: "string" - order: 0 - api_key: - title: "API Key" - airbyte_secret: true - description: "MeiliSearch instance API Key" - type: "string" - order: 1 - supportsIncremental: true - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: - - "overwrite" - - "append" -- dockerImage: "airbyte/destination-mongodb:0.1.2" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/destinations/mongodb" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "MongoDB Destination Spec" - type: "object" - required: - - "database" - - "auth_type" - additionalProperties: true - properties: - instance_type: - description: "MongoDb instance to connect to. For MongoDB Atlas and Replica\ - \ Set TLS connection is used by default." - title: "MongoDb instance type" - type: "object" - order: 0 - oneOf: - - title: "Standalone MongoDb Instance" - required: - - "instance" - - "host" - - "port" - properties: - instance: - type: "string" - enum: - - "standalone" - default: "standalone" - host: - title: "Host" - type: "string" - description: "Host of a Mongo database to be replicated." - order: 0 - port: - title: "Port" - type: "integer" - description: "Port of a Mongo database to be replicated." - minimum: 0 - maximum: 65536 - default: 27017 - examples: - - "27017" - order: 1 - tls: - title: "TLS connection" - type: "boolean" - description: "Indicates whether TLS encryption protocol will be used\ - \ to connect to MongoDB. It is recommended to use TLS connection\ - \ if possible. For more information see documentation." - default: false - order: 2 - - title: "Replica Set" - required: - - "instance" - - "server_addresses" - properties: - instance: - type: "string" - enum: - - "replica" - default: "replica" - server_addresses: - title: "Server addresses" - type: "string" - description: "The members of a replica set. Please specify `host`:`port`\ - \ of each member seperated by comma." - examples: - - "host1:27017,host2:27017,host3:27017" - order: 0 - replica_set: - title: "Replica Set" - type: "string" - description: "A replica set name." - order: 1 - - title: "MongoDB Atlas" - additionalProperties: false - required: - - "instance" - - "cluster_url" - properties: - instance: - type: "string" - enum: - - "atlas" - default: "atlas" - cluster_url: - title: "Cluster URL" - type: "string" - description: "URL of a cluster to connect to." - order: 0 - database: - title: "DB Name" - description: "Name of the database." - type: "string" - order: 2 - auth_type: - title: "Authorization type" - type: "object" - description: "Authorization type." - oneOf: - - title: "None" - additionalProperties: false - description: "None." - required: - - "authorization" - type: "object" - properties: - authorization: - type: "string" - const: "none" - - title: "Login/Password" - additionalProperties: false - description: "Login/Password." - required: - - "authorization" - - "username" - - "password" - type: "object" - properties: - authorization: - type: "string" - const: "login/password" - username: - title: "User" - description: "Username to use to access the database." - type: "string" - order: 1 - password: - title: "Password" - description: "Password associated with the username." - type: "string" - airbyte_secret: true - order: 2 - supportsIncremental: true - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: - - "overwrite" - - "append" -- dockerImage: "airbyte/destination-mysql:0.1.13" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/destinations/mysql" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "MySQL Destination Spec" - type: "object" - required: - - "host" - - "port" - - "username" - - "database" - additionalProperties: true - properties: - host: - title: "Host" - description: "Hostname of the database." - type: "string" - order: 0 - port: - title: "Port" - description: "Port of the database." - type: "integer" - minimum: 0 - maximum: 65536 - default: 3306 - examples: - - "3306" - order: 1 - database: - title: "DB Name" - description: "Name of the database." - type: "string" - order: 2 - username: - title: "User" - description: "Username to use to access the database." - type: "string" - order: 3 - password: - title: "Password" - description: "Password associated with the username." - type: "string" - airbyte_secret: true - order: 4 - ssl: - title: "SSL Connection" - description: "Encrypt data using SSL." - type: "boolean" - default: true - order: 5 - tunnel_method: - type: "object" - title: "SSH Tunnel Method" - description: "Whether to initiate an SSH tunnel before connecting to the\ - \ database, and if so, which kind of authentication to use." - oneOf: - - title: "No Tunnel" - required: - - "tunnel_method" - properties: - tunnel_method: - description: "No ssh tunnel needed to connect to database" - type: "string" - const: "NO_TUNNEL" - order: 0 - - title: "SSH Key Authentication" - required: - - "tunnel_method" - - "tunnel_host" - - "tunnel_port" - - "tunnel_user" - - "ssh_key" - properties: - tunnel_method: - description: "Connect through a jump server tunnel host using username\ - \ and ssh key" - type: "string" - const: "SSH_KEY_AUTH" - order: 0 - tunnel_host: - title: "SSH Tunnel Jump Server Host" - description: "Hostname of the jump server host that allows inbound\ - \ ssh tunnel." - type: "string" - order: 1 - tunnel_port: - title: "SSH Connection Port" - description: "Port on the proxy/jump server that accepts inbound ssh\ - \ connections." - type: "integer" - minimum: 0 - maximum: 65536 - default: 22 - examples: - - "22" - order: 2 - tunnel_user: - title: "SSH Login Username" - description: "OS-level username for logging into the jump server host." - type: "string" - order: 3 - ssh_key: - title: "SSH Private Key" - description: "OS-level user account ssh key credentials for logging\ - \ into the jump server host." - type: "string" - airbyte_secret: true - multiline: true - order: 4 - - title: "Password Authentication" - required: - - "tunnel_method" - - "tunnel_host" - - "tunnel_port" - - "tunnel_user" - - "tunnel_user_password" - properties: - tunnel_method: - description: "Connect through a jump server tunnel host using username\ - \ and password authentication" - type: "string" - const: "SSH_PASSWORD_AUTH" - order: 0 - tunnel_host: - title: "SSH Tunnel Jump Server Host" - description: "Hostname of the jump server host that allows inbound\ - \ ssh tunnel." - type: "string" - order: 1 - tunnel_port: - title: "SSH Connection Port" - description: "Port on the proxy/jump server that accepts inbound ssh\ - \ connections." - type: "integer" - minimum: 0 - maximum: 65536 - default: 22 - examples: - - "22" - order: 2 - tunnel_user: - title: "SSH Login Username" - description: "OS-level username for logging into the jump server host" - type: "string" - order: 3 - tunnel_user_password: - title: "Password" - description: "OS-level password for logging into the jump server host" - type: "string" - airbyte_secret: true - order: 4 - supportsIncremental: true - supportsNormalization: true - supportsDBT: true - supported_destination_sync_modes: - - "overwrite" - - "append" -- dockerImage: "airbyte/destination-oracle:0.1.11" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/destinations/oracle" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Oracle Destination Spec" - type: "object" - required: - - "host" - - "port" - - "username" - - "sid" - additionalProperties: true - properties: - host: - title: "Host" - description: "Hostname of the database." - type: "string" - order: 0 - port: - title: "Port" - description: "Port of the database." - type: "integer" - minimum: 0 - maximum: 65536 - default: 1521 - examples: - - "1521" - order: 1 - sid: - title: "SID" - description: "SID" - type: "string" - order: 2 - username: - title: "User" - description: "Username to use to access the database. This user must have\ - \ CREATE USER privileges in the database." - type: "string" - order: 3 - password: - title: "Password" - description: "Password associated with the username." - type: "string" - airbyte_secret: true - order: 4 - schema: - title: "Default Schema" - description: "The default schema tables are written to if the source does\ - \ not specify a namespace. The usual value for this field is \"airbyte\"\ - . In Oracle, schemas and users are the same thing, so the \"user\" parameter\ - \ is used as the login credentials and this is used for the default Airbyte\ - \ message schema." - type: "string" - examples: - - "airbyte" - default: "airbyte" - order: 5 - encryption: - title: "Encryption" - type: "object" - description: "Encryption method to use when communicating with the database" - order: 6 - oneOf: - - title: "Unencrypted" - additionalProperties: false - description: "Data transfer will not be encrypted." - required: - - "encryption_method" - properties: - encryption_method: - type: "string" - const: "unencrypted" - enum: - - "unencrypted" - default: "unencrypted" - - title: "Native Network Ecryption (NNE)" - additionalProperties: false - description: "Native network encryption gives you the ability to encrypt\ - \ database connections, without the configuration overhead of TCP/IP\ - \ and SSL/TLS and without the need to open and listen on different ports." - required: - - "encryption_method" - properties: - encryption_method: - type: "string" - const: "client_nne" - enum: - - "client_nne" - default: "client_nne" - encryption_algorithm: - type: "string" - description: "This parameter defines the encryption algorithm to be\ - \ used" - title: "Encryption Algorithm" - default: "AES256" - enum: - - "AES256" - - "RC4_56" - - "3DES168" - - title: "TLS Encrypted (verify certificate)" - additionalProperties: false - description: "Verify and use the cert provided by the server." - required: - - "encryption_method" - - "ssl_certificate" - properties: - encryption_method: - type: "string" - const: "encrypted_verify_certificate" - enum: - - "encrypted_verify_certificate" - default: "encrypted_verify_certificate" - ssl_certificate: - title: "SSL PEM file" - description: "Privacy Enhanced Mail (PEM) files are concatenated certificate\ - \ containers frequently used in certificate installations" - type: "string" - airbyte_secret: true - multiline: true - tunnel_method: - type: "object" - title: "SSH Tunnel Method" - description: "Whether to initiate an SSH tunnel before connecting to the\ - \ database, and if so, which kind of authentication to use." - oneOf: - - title: "No Tunnel" - required: - - "tunnel_method" - properties: - tunnel_method: - description: "No ssh tunnel needed to connect to database" - type: "string" - const: "NO_TUNNEL" - order: 0 - - title: "SSH Key Authentication" - required: - - "tunnel_method" - - "tunnel_host" - - "tunnel_port" - - "tunnel_user" - - "ssh_key" - properties: - tunnel_method: - description: "Connect through a jump server tunnel host using username\ - \ and ssh key" - type: "string" - const: "SSH_KEY_AUTH" - order: 0 - tunnel_host: - title: "SSH Tunnel Jump Server Host" - description: "Hostname of the jump server host that allows inbound\ - \ ssh tunnel." - type: "string" - order: 1 - tunnel_port: - title: "SSH Connection Port" - description: "Port on the proxy/jump server that accepts inbound ssh\ - \ connections." - type: "integer" - minimum: 0 - maximum: 65536 - default: 22 - examples: - - "22" - order: 2 - tunnel_user: - title: "SSH Login Username" - description: "OS-level username for logging into the jump server host." - type: "string" - order: 3 - ssh_key: - title: "SSH Private Key" - description: "OS-level user account ssh key credentials in RSA PEM\ - \ format ( created with ssh-keygen -t rsa -m PEM -f myuser_rsa )" - type: "string" - airbyte_secret: true - multiline: true - order: 4 - - title: "Password Authentication" - required: - - "tunnel_method" - - "tunnel_host" - - "tunnel_port" - - "tunnel_user" - - "tunnel_user_password" - properties: - tunnel_method: - description: "Connect through a jump server tunnel host using username\ - \ and password authentication" - type: "string" - const: "SSH_PASSWORD_AUTH" - order: 0 - tunnel_host: - title: "SSH Tunnel Jump Server Host" - description: "Hostname of the jump server host that allows inbound\ - \ ssh tunnel." - type: "string" - order: 1 - tunnel_port: - title: "SSH Connection Port" - description: "Port on the proxy/jump server that accepts inbound ssh\ - \ connections." - type: "integer" - minimum: 0 - maximum: 65536 - default: 22 - examples: - - "22" - order: 2 - tunnel_user: - title: "SSH Login Username" - description: "OS-level username for logging into the jump server host" - type: "string" - order: 3 - tunnel_user_password: - title: "Password" - description: "OS-level password for logging into the jump server host" - type: "string" - airbyte_secret: true - order: 4 - supportsIncremental: true - supportsNormalization: false - supportsDBT: true - supported_destination_sync_modes: - - "overwrite" - - "append" -- dockerImage: "airbyte/destination-postgres:0.3.11" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/destinations/postgres" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Postgres Destination Spec" - type: "object" - required: - - "host" - - "port" - - "username" - - "database" - - "schema" - additionalProperties: true - properties: - host: - title: "Host" - description: "Hostname of the database." - type: "string" - order: 0 - port: - title: "Port" - description: "Port of the database." - type: "integer" - minimum: 0 - maximum: 65536 - default: 5432 - examples: - - "5432" - order: 1 - database: - title: "DB Name" - description: "Name of the database." - type: "string" - order: 2 - schema: - title: "Default Schema" - description: "The default schema tables are written to if the source does\ - \ not specify a namespace. The usual value for this field is \"public\"\ - ." - type: "string" - examples: - - "public" - default: "public" - order: 3 - username: - title: "User" - description: "Username to use to access the database." - type: "string" - order: 4 - password: - title: "Password" - description: "Password associated with the username." - type: "string" - airbyte_secret: true - order: 5 - ssl: - title: "SSL Connection" - description: "Encrypt data using SSL." - type: "boolean" - default: false - order: 6 - tunnel_method: - type: "object" - title: "SSH Tunnel Method" - description: "Whether to initiate an SSH tunnel before connecting to the\ - \ database, and if so, which kind of authentication to use." - oneOf: - - title: "No Tunnel" - required: - - "tunnel_method" - properties: - tunnel_method: - description: "No ssh tunnel needed to connect to database" - type: "string" - const: "NO_TUNNEL" - order: 0 - - title: "SSH Key Authentication" - required: - - "tunnel_method" - - "tunnel_host" - - "tunnel_port" - - "tunnel_user" - - "ssh_key" - properties: - tunnel_method: - description: "Connect through a jump server tunnel host using username\ - \ and ssh key" - type: "string" - const: "SSH_KEY_AUTH" - order: 0 - tunnel_host: - title: "SSH Tunnel Jump Server Host" - description: "Hostname of the jump server host that allows inbound\ - \ ssh tunnel." - type: "string" - order: 1 - tunnel_port: - title: "SSH Connection Port" - description: "Port on the proxy/jump server that accepts inbound ssh\ - \ connections." - type: "integer" - minimum: 0 - maximum: 65536 - default: 22 - examples: - - "22" - order: 2 - tunnel_user: - title: "SSH Login Username" - description: "OS-level username for logging into the jump server host." - type: "string" - order: 3 - ssh_key: - title: "SSH Private Key" - description: "OS-level user account ssh key credentials for logging\ - \ into the jump server host." - type: "string" - airbyte_secret: true - multiline: true - order: 4 - - title: "Password Authentication" - required: - - "tunnel_method" - - "tunnel_host" - - "tunnel_port" - - "tunnel_user" - - "tunnel_user_password" - properties: - tunnel_method: - description: "Connect through a jump server tunnel host using username\ - \ and password authentication" - type: "string" - const: "SSH_PASSWORD_AUTH" - order: 0 - tunnel_host: - title: "SSH Tunnel Jump Server Host" - description: "Hostname of the jump server host that allows inbound\ - \ ssh tunnel." - type: "string" - order: 1 - tunnel_port: - title: "SSH Connection Port" - description: "Port on the proxy/jump server that accepts inbound ssh\ - \ connections." - type: "integer" - minimum: 0 - maximum: 65536 - default: 22 - examples: - - "22" - order: 2 - tunnel_user: - title: "SSH Login Username" - description: "OS-level username for logging into the jump server host" - type: "string" - order: 3 - tunnel_user_password: - title: "Password" - description: "OS-level password for logging into the jump server host" - type: "string" - airbyte_secret: true - order: 4 - supportsIncremental: true - supportsNormalization: true - supportsDBT: true - supported_destination_sync_modes: - - "overwrite" - - "append" - - "append_dedup" -- dockerImage: "airbyte/destination-redshift:0.3.19" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/destinations/redshift" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Redshift Destination Spec" - type: "object" - required: - - "host" - - "port" - - "database" - - "username" - - "password" - - "schema" - additionalProperties: true - properties: - host: - description: "Host Endpoint of the Redshift Cluster (must include the cluster-id,\ - \ region and end with .redshift.amazonaws.com)" - type: "string" - title: "Host" - port: - description: "Port of the database." - type: "integer" - minimum: 0 - maximum: 65536 - default: 5439 - examples: - - "5439" - title: "Port" - username: - description: "Username to use to access the database." - type: "string" - title: "Username" - password: - description: "Password associated with the username." - type: "string" - airbyte_secret: true - title: "Password" - database: - description: "Name of the database." - type: "string" - title: "Database" - schema: - description: "The default schema tables are written to if the source does\ - \ not specify a namespace. Unless specifically configured, the usual value\ - \ for this field is \"public\"." - type: "string" - examples: - - "public" - default: "public" - title: "Default Schema" - s3_bucket_name: - title: "S3 Bucket Name" - type: "string" - description: "The name of the staging S3 bucket to use if utilising a COPY\ - \ strategy. COPY is recommended for production workloads for better speed\ - \ and scalability. See AWS docs for more details." - examples: - - "airbyte.staging" - s3_bucket_region: - title: "S3 Bucket Region" - type: "string" - default: "" - description: "The region of the S3 staging bucket to use if utilising a\ - \ copy strategy." - enum: - - "" - - "us-east-1" - - "us-east-2" - - "us-west-1" - - "us-west-2" - - "af-south-1" - - "ap-east-1" - - "ap-south-1" - - "ap-northeast-1" - - "ap-northeast-2" - - "ap-northeast-3" - - "ap-southeast-1" - - "ap-southeast-2" - - "ca-central-1" - - "cn-north-1" - - "cn-northwest-1" - - "eu-central-1" - - "eu-north-1" - - "eu-south-1" - - "eu-west-1" - - "eu-west-2" - - "eu-west-3" - - "sa-east-1" - - "me-south-1" - access_key_id: - type: "string" - description: "The Access Key Id granting allow one to access the above S3\ - \ staging bucket. Airbyte requires Read and Write permissions to the given\ - \ bucket." - title: "S3 Key Id" - airbyte_secret: true - secret_access_key: - type: "string" - description: "The corresponding secret to the above access key id." - title: "S3 Access Key" - airbyte_secret: true - part_size: - type: "integer" - minimum: 10 - maximum: 100 - examples: - - "10" - description: "Optional. Increase this if syncing tables larger than 100GB.\ - \ Only relevant for COPY. Files are streamed to S3 in parts. This determines\ - \ the size of each part, in MBs. As S3 has a limit of 10,000 parts per\ - \ file, part size affects the table size. This is 10MB by default, resulting\ - \ in a default limit of 100GB tables. Note, a larger part size will result\ - \ in larger memory requirements. A rule of thumb is to multiply the part\ - \ size by 10 to get the memory requirement. Modify this with care." - title: "Stream Part Size" - supportsIncremental: true - supportsNormalization: true - supportsDBT: true - supported_destination_sync_modes: - - "overwrite" - - "append" - - "append_dedup" -- dockerImage: "airbyte/destination-s3:0.1.12" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/destinations/s3" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "S3 Destination Spec" - type: "object" - required: - - "s3_bucket_name" - - "s3_bucket_path" - - "s3_bucket_region" - - "access_key_id" - - "secret_access_key" - - "format" - additionalProperties: false - properties: - s3_endpoint: - title: "Endpoint" - type: "string" - default: "" - description: "This is your S3 endpoint url.(if you are working with AWS\ - \ S3, just leave empty)." - examples: - - "http://localhost:9000" - s3_bucket_name: - title: "S3 Bucket Name" - type: "string" - description: "The name of the S3 bucket." - examples: - - "airbyte_sync" - s3_bucket_path: - description: "Directory under the S3 bucket where data will be written." - type: "string" - examples: - - "data_sync/test" - s3_bucket_region: - title: "S3 Bucket Region" - type: "string" - default: "" - description: "The region of the S3 bucket." - enum: - - "" - - "us-east-1" - - "us-east-2" - - "us-west-1" - - "us-west-2" - - "af-south-1" - - "ap-east-1" - - "ap-south-1" - - "ap-northeast-1" - - "ap-northeast-2" - - "ap-northeast-3" - - "ap-southeast-1" - - "ap-southeast-2" - - "ca-central-1" - - "cn-north-1" - - "cn-northwest-1" - - "eu-central-1" - - "eu-north-1" - - "eu-south-1" - - "eu-west-1" - - "eu-west-2" - - "eu-west-3" - - "sa-east-1" - - "me-south-1" - - "us-gov-east-1" - - "us-gov-west-1" - access_key_id: - type: "string" - description: "The access key id to access the S3 bucket. Airbyte requires\ - \ Read and Write permissions to the given bucket." - title: "S3 Key Id" - airbyte_secret: true - examples: - - "A012345678910EXAMPLE" - secret_access_key: - type: "string" - description: "The corresponding secret to the access key id." - title: "S3 Access Key" - airbyte_secret: true - examples: - - "a012345678910ABCDEFGH/AbCdEfGhEXAMPLEKEY" - format: - title: "Output Format" - type: "object" - description: "Output data format" - oneOf: - - title: "Avro: Apache Avro" - required: - - "format_type" - - "compression_codec" - properties: - format_type: - type: "string" - enum: - - "Avro" - default: "Avro" - compression_codec: - title: "Compression Codec" - description: "The compression algorithm used to compress data. Default\ - \ to no compression." - type: "object" - oneOf: - - title: "no compression" - required: - - "codec" - properties: - codec: - type: "string" - enum: - - "no compression" - default: "no compression" - - title: "Deflate" - required: - - "codec" - - "compression_level" - properties: - codec: - type: "string" - enum: - - "Deflate" - default: "Deflate" - compression_level: - title: "Deflate level" - description: "0: no compression & fastest, 9: best compression\ - \ & slowest." - type: "integer" - default: 0 - minimum: 0 - maximum: 9 - - title: "bzip2" - required: - - "codec" - properties: - codec: - type: "string" - enum: - - "bzip2" - default: "bzip2" - - title: "xz" - required: - - "codec" - - "compression_level" - properties: - codec: - type: "string" - enum: - - "xz" - default: "xz" - compression_level: - title: "Compression level" - description: "See here for details." - type: "integer" - default: 6 - minimum: 0 - maximum: 9 - - title: "zstandard" - required: - - "codec" - - "compression_level" - properties: - codec: - type: "string" - enum: - - "zstandard" - default: "zstandard" - compression_level: - title: "Compression level" - description: "Negative levels are 'fast' modes akin to lz4 or\ - \ snappy, levels above 9 are generally for archival purposes,\ - \ and levels above 18 use a lot of memory." - type: "integer" - default: 3 - minimum: -5 - maximum: 22 - include_checksum: - title: "Include checksum" - description: "If true, include a checksum with each data block." - type: "boolean" - default: false - - title: "snappy" - required: - - "codec" - properties: - codec: - type: "string" - enum: - - "snappy" - default: "snappy" - part_size_mb: - title: "Block Size (MB) for Amazon S3 multipart upload" - description: "This is the size of a \"Part\" being buffered in memory.\ - \ It limits the memory usage when writing. Larger values will allow\ - \ to upload a bigger files and improve the speed, but consumes9\ - \ more memory. Allowed values: min=5MB, max=525MB Default: 5MB." - type: "integer" - default: 5 - examples: - - 5 - - title: "CSV: Comma-Separated Values" - required: - - "format_type" - - "flattening" - properties: - format_type: - type: "string" - enum: - - "CSV" - default: "CSV" - flattening: - type: "string" - title: "Normalization (Flattening)" - description: "Whether the input json data should be normalized (flattened)\ - \ in the output CSV. Please refer to docs for details." - default: "No flattening" - enum: - - "No flattening" - - "Root level flattening" - part_size_mb: - title: "Block Size (MB) for Amazon S3 multipart upload" - description: "This is the size of a \"Part\" being buffered in memory.\ - \ It limits the memory usage when writing. Larger values will allow\ - \ to upload a bigger files and improve the speed, but consumes9\ - \ more memory. Allowed values: min=5MB, max=525MB Default: 5MB." - type: "integer" - default: 5 - examples: - - 5 - - title: "JSON Lines: newline-delimited JSON" - required: - - "format_type" - properties: - format_type: - type: "string" - enum: - - "JSONL" - default: "JSONL" - part_size_mb: - title: "Block Size (MB) for Amazon S3 multipart upload" - description: "This is the size of a \"Part\" being buffered in memory.\ - \ It limits the memory usage when writing. Larger values will allow\ - \ to upload a bigger files and improve the speed, but consumes9\ - \ more memory. Allowed values: min=5MB, max=525MB Default: 5MB." - type: "integer" - default: 5 - examples: - - 5 - - title: "Parquet: Columnar Storage" - required: - - "format_type" - properties: - format_type: - type: "string" - enum: - - "Parquet" - default: "Parquet" - compression_codec: - title: "Compression Codec" - description: "The compression algorithm used to compress data pages." - type: "string" - enum: - - "UNCOMPRESSED" - - "SNAPPY" - - "GZIP" - - "LZO" - - "BROTLI" - - "LZ4" - - "ZSTD" - default: "UNCOMPRESSED" - block_size_mb: - title: "Block Size (Row Group Size) (MB)" - description: "This is the size of a row group being buffered in memory.\ - \ It limits the memory usage when writing. Larger values will improve\ - \ the IO when reading, but consume more memory when writing. Default:\ - \ 128 MB." - type: "integer" - default: 128 - examples: - - 128 - max_padding_size_mb: - title: "Max Padding Size (MB)" - description: "Maximum size allowed as padding to align row groups.\ - \ This is also the minimum size of a row group. Default: 8 MB." - type: "integer" - default: 8 - examples: - - 8 - page_size_kb: - title: "Page Size (KB)" - description: "The page size is for compression. A block is composed\ - \ of pages. A page is the smallest unit that must be read fully\ - \ to access a single record. If this value is too small, the compression\ - \ will deteriorate. Default: 1024 KB." - type: "integer" - default: 1024 - examples: - - 1024 - dictionary_page_size_kb: - title: "Dictionary Page Size (KB)" - description: "There is one dictionary page per column per row group\ - \ when dictionary encoding is used. The dictionary page size works\ - \ like the page size but for dictionary. Default: 1024 KB." - type: "integer" - default: 1024 - examples: - - 1024 - dictionary_encoding: - title: "Dictionary Encoding" - description: "Default: true." - type: "boolean" - default: true - supportsIncremental: true - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: - - "overwrite" - - "append" -- dockerImage: "airbyte/destination-snowflake:0.3.16" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/destinations/snowflake" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Snowflake Destination Spec" - type: "object" - required: - - "host" - - "role" - - "warehouse" - - "database" - - "schema" - - "username" - - "password" - additionalProperties: true - properties: - host: - description: "Host domain of the snowflake instance (must include the account,\ - \ region, cloud environment, and end with snowflakecomputing.com)." - examples: - - "accountname.us-east-2.aws.snowflakecomputing.com" - type: "string" - title: "Host" - order: 0 - role: - description: "The role you created for Airbyte to access Snowflake." - examples: - - "AIRBYTE_ROLE" - type: "string" - title: "Role" - order: 1 - warehouse: - description: "The warehouse you created for Airbyte to sync data into." - examples: - - "AIRBYTE_WAREHOUSE" - type: "string" - title: "Warehouse" - order: 2 - database: - description: "The database you created for Airbyte to sync data into." - examples: - - "AIRBYTE_DATABASE" - type: "string" - title: "Database" - order: 3 - schema: - description: "The default Snowflake schema tables are written to if the\ - \ source does not specify a namespace." - examples: - - "AIRBYTE_SCHEMA" - type: "string" - title: "Default Schema" - order: 4 - username: - description: "The username you created to allow Airbyte to access the database." - examples: - - "AIRBYTE_USER" - type: "string" - title: "Username" - order: 5 - password: - description: "Password associated with the username." - type: "string" - airbyte_secret: true - title: "Password" - order: 6 - loading_method: - type: "object" - title: "Loading Method" - description: "Loading method used to send data to Snowflake." - order: 7 - oneOf: - - title: "Standard Inserts" - additionalProperties: false - description: "Uses
INSERT
statements to send batches of records\ - \ to Snowflake. Easiest (no setup) but not recommended for large production\ - \ workloads due to slow speed." - required: - - "method" - properties: - method: - type: "string" - enum: - - "Standard" - default: "Standard" - - title: "AWS S3 Staging" - additionalProperties: false - description: "Writes large batches of records to a file, uploads the file\ - \ to S3, then uses
COPY INTO table
to upload the file. Recommended\ - \ for large production workloads for better speed and scalability." - required: - - "method" - - "s3_bucket_name" - - "access_key_id" - - "secret_access_key" - properties: - method: - type: "string" - enum: - - "S3 Staging" - default: "S3 Staging" - order: 0 - s3_bucket_name: - title: "S3 Bucket Name" - type: "string" - description: "The name of the staging S3 bucket. Airbyte will write\ - \ files to this bucket and read them via
COPY
statements\ - \ on Snowflake." - examples: - - "airbyte.staging" - order: 1 - s3_bucket_region: - title: "S3 Bucket Region" - type: "string" - default: "" - description: "The region of the S3 staging bucket to use if utilising\ - \ a copy strategy." - enum: - - "" - - "us-east-1" - - "us-east-2" - - "us-west-1" - - "us-west-2" - - "af-south-1" - - "ap-east-1" - - "ap-south-1" - - "ap-northeast-1" - - "ap-northeast-2" - - "ap-northeast-3" - - "ap-southeast-1" - - "ap-southeast-2" - - "ca-central-1" - - "cn-north-1" - - "cn-northwest-1" - - "eu-central-1" - - "eu-west-1" - - "eu-west-2" - - "eu-west-3" - - "eu-south-1" - - "eu-north-1" - - "sa-east-1" - - "me-south-1" - order: 2 - access_key_id: - type: "string" - description: "The Access Key Id granting allow one to access the above\ - \ S3 staging bucket. Airbyte requires Read and Write permissions\ - \ to the given bucket." - title: "S3 Key Id" - airbyte_secret: true - order: 3 - secret_access_key: - type: "string" - description: "The corresponding secret to the above access key id." - title: "S3 Access Key" - airbyte_secret: true - order: 4 - - title: "GCS Staging" - additionalProperties: false - description: "Writes large batches of records to a file, uploads the file\ - \ to GCS, then uses
COPY INTO table
to upload the file. Recommended\ - \ for large production workloads for better speed and scalability." - required: - - "method" - - "project_id" - - "bucket_name" - - "credentials_json" - properties: - method: - type: "string" - enum: - - "GCS Staging" - default: "GCS Staging" - order: 0 - project_id: - title: "GCP Project ID" - type: "string" - description: "The name of the GCP project ID for your credentials." - examples: - - "my-project" - order: 1 - bucket_name: - title: "GCS Bucket Name" - type: "string" - description: "The name of the staging GCS bucket. Airbyte will write\ - \ files to this bucket and read them via
COPY
statements\ - \ on Snowflake." - examples: - - "airbyte-staging" - order: 2 - credentials_json: - title: "Google Application Credentials" - type: "string" - description: "The contents of the JSON key file that has read/write\ - \ permissions to the staging GCS bucket. You will separately need\ - \ to grant bucket access to your Snowflake GCP service account.\ - \ See the GCP docs for more information on how to generate a JSON key\ - \ for your service account." - airbyte_secret: true - multiline: true - order: 3 - supportsIncremental: true - supportsNormalization: true - supportsDBT: true - supported_destination_sync_modes: - - "overwrite" - - "append" - - "append_dedup" diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml deleted file mode 100644 index 71e4a45e69158..0000000000000 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ /dev/null @@ -1,5836 +0,0 @@ -# This file is generated by io.airbyte.config.specs.SeedConnectorSpecGenerator. -# Do NOT edit this file directly. See generator class for more details. ---- -- dockerImage: "airbyte/source-aws-cloudtrail:0.1.2" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/aws-cloudtrail" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Aws CloudTrail Spec" - type: "object" - required: - - "aws_key_id" - - "aws_secret_key" - - "aws_region_name" - - "start_date" - additionalProperties: true - properties: - aws_key_id: - type: "string" - description: "Specifies an AWS access key associated with an IAM user or\ - \ role." - airbyte_secret: true - aws_secret_key: - type: "string" - description: "Specifies the secret key associated with the access key. This\ - \ is essentially the 'password' for the access key." - airbyte_secret: true - aws_region_name: - type: "string" - description: "The default AWS Region to use, for example, us-west-1 or us-west-2.\ - \ When specifying a Region inline during client initialization, this property\ - \ is named region_name." - start_date: - type: "string" - description: "The date you would like to replicate data. Data in ClouTraid\ - \ is available for last 90 days only. Format: YYYY-MM-DD." - examples: - - "2021-01-01" - default: "1970-01-01" - pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-amazon-ads:0.1.2" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/amazon-ads" - connectionSpecification: - title: "Amazon Ads Spec" - type: "object" - properties: - client_id: - title: "Client Id" - description: "Oauth client id How to create your Login with Amazon" - name: "Client ID" - type: "string" - client_secret: - title: "Client Secret" - description: "Oauth client secret How to create your Login with Amazon" - name: "Client secret" - airbyte_secret: true - type: "string" - scope: - title: "Scope" - description: "By default its advertising::campaign_management, but customers\ - \ may need to set scope to cpc_advertising:campaign_management." - default: "advertising::campaign_management" - name: "Client scope" - examples: - - "cpc_advertising:campaign_management" - type: "string" - refresh_token: - title: "Refresh Token" - description: "Oauth 2.0 refresh_token, read details here" - name: "Oauth refresh token" - airbyte_secret: true - type: "string" - start_date: - title: "Start Date" - description: "Start date for collectiong reports, should not be more than\ - \ 60 days in past. In YYYY-MM-DD format" - name: "Start date" - examples: - - "2022-10-10" - - "2022-10-22" - type: "string" - region: - description: "Region to pull data from (EU/NA/FE/SANDBOX)" - default: "NA" - name: "Region" - title: "AmazonAdsRegion" - enum: - - "NA" - - "EU" - - "FE" - - "SANDBOX" - type: "string" - profiles: - title: "Profiles" - description: "profile Ids you want to fetch data for" - name: "Profile Ids" - type: "array" - items: - type: "integer" - required: - - "client_id" - - "client_secret" - - "refresh_token" - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-amazon-seller-partner:0.2.0" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/amazon-seller-partner" - changelogUrl: "https://docs.airbyte.io/integrations/sources/amazon-seller-partner" - connectionSpecification: - title: "Amazon Seller Partner Spec" - type: "object" - properties: - replication_start_date: - title: "Replication Start Date" - description: "UTC date and time in the format 2017-01-25T00:00:00Z. Any\ - \ data before this date will not be replicated." - pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" - examples: - - "2017-01-25T00:00:00Z" - type: "string" - refresh_token: - title: "Refresh Token" - description: "The refresh token used obtained via authorization (can be\ - \ passed to the client instead)" - airbyte_secret: true - type: "string" - lwa_app_id: - title: "Lwa App Id" - description: "Your login with amazon app id" - airbyte_secret: true - type: "string" - lwa_client_secret: - title: "Lwa Client Secret" - description: "Your login with amazon client secret" - airbyte_secret: true - type: "string" - aws_access_key: - title: "Aws Access Key" - description: "AWS user access key" - airbyte_secret: true - type: "string" - aws_secret_key: - title: "Aws Secret Key" - description: "AWS user secret key" - airbyte_secret: true - type: "string" - role_arn: - title: "Role Arn" - description: "The role's arn (needs permission to 'Assume Role' STS)" - airbyte_secret: true - type: "string" - aws_environment: - title: "AWSEnvironment" - description: "An enumeration." - enum: - - "PRODUCTION" - - "SANDBOX" - type: "string" - region: - title: "AWSRegion" - description: "An enumeration." - enum: - - "AE" - - "DE" - - "PL" - - "EG" - - "ES" - - "FR" - - "IN" - - "IT" - - "NL" - - "SA" - - "SE" - - "TR" - - "UK" - - "AU" - - "JP" - - "SG" - - "US" - - "BR" - - "CA" - - "MX" - - "GB" - type: "string" - required: - - "replication_start_date" - - "refresh_token" - - "lwa_app_id" - - "lwa_client_secret" - - "aws_access_key" - - "aws_secret_key" - - "role_arn" - - "aws_environment" - - "region" - definitions: - AWSEnvironment: - title: "AWSEnvironment" - description: "An enumeration." - enum: - - "PRODUCTION" - - "SANDBOX" - type: "string" - AWSRegion: - title: "AWSRegion" - description: "An enumeration." - enum: - - "AE" - - "DE" - - "PL" - - "EG" - - "ES" - - "FR" - - "IN" - - "IT" - - "NL" - - "SA" - - "SE" - - "TR" - - "UK" - - "AU" - - "JP" - - "SG" - - "US" - - "BR" - - "CA" - - "MX" - - "GB" - type: "string" - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-amplitude:0.1.3" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/amplitude" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Amplitude Spec" - type: "object" - required: - - "api_key" - - "secret_key" - - "start_date" - additionalProperties: false - properties: - api_key: - type: "string" - description: "This is the project’s API key, used for calling Amplitude’\ - s APIs" - airbyte_secret: true - secret_key: - type: "string" - description: "This is the project's secret key, which is also used for calling\ - \ Amplitude’s APIs" - airbyte_secret: true - start_date: - type: "string" - pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" - description: "UTC date and time in the format 2021-01-25T00:00:00Z. Any\ - \ data before this date will not be replicated." - examples: - - "2021-01-25T00:00:00Z" - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-apify-dataset:0.1.1" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/apify-dataset" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Apify Dataset Spec" - type: "object" - required: - - "datasetId" - additionalProperties: false - properties: - datasetId: - type: "string" - description: "ID of the dataset you would like to load to Airbyte." - clean: - type: "boolean" - description: "If set to true, only clean items will be downloaded from the\ - \ dataset. See description of what clean means in Apify API docs. If not sure, set clean to false." - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-appstore-singer:0.2.4" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/appstore" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Source Appstore Singer Spec" - type: "object" - required: - - "key_id" - - "private_key" - - "issuer_id" - - "vendor" - - "start_date" - additionalProperties: false - properties: - key_id: - type: "string" - description: "Key_id is the API key you use to connect to appstore's API." - private_key: - type: "string" - description: "Private_key is the contents of the key file you use to connect to appstore's API." - airbyte_secret: true - multiline: true - issuer_id: - type: "string" - description: "Issuer_id is used to generate the credentials to connect to appstore's\ - \ API." - vendor: - type: "string" - description: "This is the Apple ID of your account." - start_date: - type: "string" - description: "Date from which to start pulling data." - examples: - - "2020-11-16T00:00:00Z" - pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-asana:0.1.3" - spec: - documentationUrl: "https://docsurl.com" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Asana Spec" - type: "object" - additionalProperties: true - properties: - credentials: - title: "Authentication mechanism" - description: "Choose how to authenticate to Github" - type: "object" - oneOf: - - type: "object" - title: "Authenticate with Personal Access Token" - required: - - "personal_access_token" - properties: - option_title: - type: "string" - title: "Credentials title" - description: "PAT Credentials" - const: "PAT Credentials" - personal_access_token: - type: "string" - title: "Personal Access Token" - description: "Asana Personal Access Token (generate yours here)." - airbyte_secret: true - - type: "object" - title: "Authenticate via Asana (Oauth)" - required: - - "client_id" - - "client_secret" - - "refresh_token" - properties: - option_title: - type: "string" - title: "Credentials title" - description: "OAuth Credentials" - const: "OAuth Credentials" - client_id: - type: "string" - title: "" - description: "" - airbyte_secret: true - client_secret: - type: "string" - title: "" - description: "" - airbyte_secret: true - refresh_token: - type: "string" - title: "" - description: "" - airbyte_secret: true - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] - authSpecification: - auth_type: "oauth2.0" - oauth2Specification: - rootObject: - - "credentials" - - "1" - oauthFlowInitParameters: - - - "client_id" - - - "client_secret" - oauthFlowOutputParameters: - - - "refresh_token" -- dockerImage: "airbyte/source-bamboo-hr:0.1.0" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/bamboo-hr" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Bamboo HR Spec" - type: "object" - required: - - "subdomain" - - "api_key" - additionalProperties: false - properties: - subdomain: - type: "string" - description: "Sub Domain of bamboo hr" - api_key: - type: "string" - description: "Api key of bamboo hr" - airbyte_secret: true - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-bigcommerce:0.1.0" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/bigcommerce" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "BigCommerce Source CDK Specifications" - type: "object" - required: - - "start_date" - - "store_hash" - - "access_token" - additionalProperties: false - properties: - start_date: - type: "string" - description: "The date you would like to replicate data. Format: YYYY-MM-DD." - examples: - - "2021-01-01" - pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" - store_hash: - type: "string" - description: "The hash code of the store. For https://api.bigcommerce.com/stores/HASH_CODE/v3/,\ - \ The store's hash code is 'HASH_CODE'." - access_token: - type: "string" - description: "The API Access Token." - airbyte_secret: true - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-bigquery:0.1.4" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/source/bigquery" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "BigQuery Source Spec" - type: "object" - required: - - "project_id" - - "credentials_json" - additionalProperties: false - properties: - project_id: - type: "string" - description: "The GCP project ID for the project containing the target BigQuery\ - \ dataset." - title: "Project ID" - dataset_id: - type: "string" - description: "The BigQuery Dataset ID to look for tables to replicate from." - title: "Default Dataset ID" - credentials_json: - type: "string" - description: "The contents of the JSON service account key. Check out the\ - \ docs\ - \ if you need help generating this key." - title: "Credentials JSON" - airbyte_secret: true - supportsIncremental: true - supportsNormalization: true - supportsDBT: true - supported_destination_sync_modes: [] - supported_sync_modes: - - "overwrite" - - "append" - - "append_dedup" -- dockerImage: "airbyte/source-bing-ads:0.1.1" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/bing-ads" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Bing Ads Spec" - type: "object" - required: - - "accounts" - - "client_id" - - "client_secret" - - "customer_id" - - "developer_token" - - "refresh_token" - - "user_id" - - "reports_start_date" - - "hourly_reports" - - "daily_reports" - - "weekly_reports" - - "monthly_reports" - additionalProperties: false - properties: - accounts: - title: "Accounts" - type: "object" - description: "Account selection strategy." - oneOf: - - title: "All accounts assigned to your user" - additionalProperties: false - description: "Fetch data for all available accounts." - required: - - "selection_strategy" - properties: - selection_strategy: - type: "string" - enum: - - "all" - const: "all" - - title: "Subset of your accounts" - additionalProperties: false - description: "Fetch data for subset of account ids." - required: - - "ids" - - "selection_strategy" - properties: - selection_strategy: - type: "string" - enum: - - "subset" - const: "subset" - ids: - type: "array" - description: "List of accounts from which data will be fetched." - items: - type: "string" - minItems: 1 - uniqueItems: true - client_id: - type: "string" - description: "ID of your Microsoft Advertising client application." - airbyte_secret: true - client_secret: - type: "string" - description: "Secret of your Microsoft Advertising client application." - airbyte_secret: true - customer_id: - type: "string" - description: "User's customer ID." - developer_token: - type: "string" - description: "Developer token associated with user." - airbyte_secret: true - refresh_token: - type: "string" - description: "The long-lived Refresh token received via grant_type=refresh_token\ - \ request." - airbyte_secret: true - user_id: - type: "string" - description: "Unique user identifier." - reports_start_date: - type: "string" - format: "date" - default: "2020-01-01" - description: "From which date perform initial sync for report related streams.\ - \ In YYYY-MM-DD format" - hourly_reports: - title: "Hourly reports" - type: "boolean" - description: "The report data will be aggregated by each hour of the day." - default: false - daily_reports: - title: "Daily reports" - type: "boolean" - description: "The report data will be aggregated by each day." - default: false - weekly_reports: - title: "Weekly reports" - type: "boolean" - description: "The report data will be aggregated by each week running from\ - \ Sunday through Saturday." - default: false - monthly_reports: - title: "Monthly reports" - type: "boolean" - description: "The report data will be aggregated by each month." - default: false - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-braintree:0.1.0" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/braintree" - connectionSpecification: - title: "Braintree Spec" - type: "object" - properties: - merchant_id: - title: "Merchant Id" - description: "Merchant ID is the unique identifier for entire gateway account." - name: "Merchant ID" - type: "string" - public_key: - title: "Public Key" - description: "This is your user-specific public identifier for Braintree." - name: "Public key" - type: "string" - private_key: - title: "Private Key" - description: "This is your user-specific private identifier." - name: "Private Key" - airbyte_secret: true - type: "string" - start_date: - title: "Start Date" - description: "The date from which you'd like to replicate data for Braintree\ - \ API for UTC timezone, All data generated after this date will be replicated." - name: "Start date" - examples: - - "2020" - - "2020-12-30" - - "2020-11-22 20:20:05" - type: "string" - format: "date-time" - environment: - description: "Environment specifies where the data will come from." - name: "Environment" - examples: - - "sandbox" - - "production" - - "qa" - - "development" - allOf: - - $ref: "#/definitions/Environment" - required: - - "merchant_id" - - "public_key" - - "private_key" - - "environment" - definitions: - Environment: - title: "Environment" - description: "An enumeration." - enum: - - "Development" - - "Sandbox" - - "Qa" - - "Production" - type: "string" - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-cart:0.1.3" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/cart" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Cart Spec" - type: "object" - required: - - "access_token" - - "start_date" - - "store_name" - additionalProperties: true - properties: - access_token: - type: "string" - airbyte_secret: true - description: "API Key. See the docs for information on how to generate this key." - store_name: - type: "string" - description: "Store name. All API URLs start with https://[mystorename.com]/api/v1/,\ - \ where [mystorename.com] is the domain name of your store." - start_date: - title: "Start Date" - type: "string" - description: "The date from which you'd like to replicate the data" - pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" - examples: - - "2021-01-01T00:00:00Z" - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-chargebee:0.1.4" - spec: - documentationUrl: "https://apidocs.chargebee.com/docs/api" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Chargebee Spec" - type: "object" - required: - - "site" - - "site_api_key" - - "start_date" - - "product_catalog" - additionalProperties: false - properties: - site: - type: "string" - title: "Site" - description: "The site prefix for your Chargebee instance." - examples: - - "airbyte-test" - site_api_key: - type: "string" - title: "API Key" - description: "The API key from your Chargebee instance." - examples: - - "test_3yzfanAXF66USdWC9wQcM555DQJkSYoppu" - airbyte_secret: true - start_date: - type: "string" - title: "Start Date" - pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" - description: "UTC date and time in the format 2021-01-25T00:00:00Z. Any\ - \ data before this date will not be replicated." - examples: - - "2021-01-25T00:00:00Z" - product_catalog: - title: "Product Catalog" - type: "string" - description: "Product Catalog version of your Chargebee site. Instructions\ - \ on how to find your version you may find here under `API Version` section." - enum: - - "1.0" - - "2.0" - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-clickhouse:0.1.4" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/destinations/clickhouse" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "ClickHouse Source Spec" - type: "object" - required: - - "host" - - "port" - - "database" - - "username" - additionalProperties: false - properties: - host: - description: "Host Endpoint of the Clickhouse Cluster" - type: "string" - port: - description: "Port of the database." - type: "integer" - minimum: 0 - maximum: 65536 - default: 8123 - examples: - - "8123" - database: - description: "Name of the database." - type: "string" - examples: - - "default" - username: - description: "Username to use to access the database." - type: "string" - password: - description: "Password associated with the username." - type: "string" - airbyte_secret: true - ssl: - title: "SSL Connection" - description: "Encrypt data using SSL." - type: "boolean" - default: true - tunnel_method: - type: "object" - title: "SSH Tunnel Method" - description: "Whether to initiate an SSH tunnel before connecting to the\ - \ database, and if so, which kind of authentication to use." - oneOf: - - title: "No Tunnel" - required: - - "tunnel_method" - properties: - tunnel_method: - description: "No ssh tunnel needed to connect to database" - type: "string" - const: "NO_TUNNEL" - order: 0 - - title: "SSH Key Authentication" - required: - - "tunnel_method" - - "tunnel_host" - - "tunnel_port" - - "tunnel_user" - - "ssh_key" - properties: - tunnel_method: - description: "Connect through a jump server tunnel host using username\ - \ and ssh key" - type: "string" - const: "SSH_KEY_AUTH" - order: 0 - tunnel_host: - title: "SSH Tunnel Jump Server Host" - description: "Hostname of the jump server host that allows inbound\ - \ ssh tunnel." - type: "string" - order: 1 - tunnel_port: - title: "SSH Connection Port" - description: "Port on the proxy/jump server that accepts inbound ssh\ - \ connections." - type: "integer" - minimum: 0 - maximum: 65536 - default: 22 - examples: - - "22" - order: 2 - tunnel_user: - title: "SSH Login Username" - description: "OS-level username for logging into the jump server host." - type: "string" - order: 3 - ssh_key: - title: "SSH Private Key" - description: "OS-level user account ssh key credentials in RSA PEM\ - \ format ( created with ssh-keygen -t rsa -m PEM -f myuser_rsa )" - type: "string" - airbyte_secret: true - multiline: true - order: 4 - - title: "Password Authentication" - required: - - "tunnel_method" - - "tunnel_host" - - "tunnel_port" - - "tunnel_user" - - "tunnel_user_password" - properties: - tunnel_method: - description: "Connect through a jump server tunnel host using username\ - \ and password authentication" - type: "string" - const: "SSH_PASSWORD_AUTH" - order: 0 - tunnel_host: - title: "SSH Tunnel Jump Server Host" - description: "Hostname of the jump server host that allows inbound\ - \ ssh tunnel." - type: "string" - order: 1 - tunnel_port: - title: "SSH Connection Port" - description: "Port on the proxy/jump server that accepts inbound ssh\ - \ connections." - type: "integer" - minimum: 0 - maximum: 65536 - default: 22 - examples: - - "22" - order: 2 - tunnel_user: - title: "SSH Login Username" - description: "OS-level username for logging into the jump server host" - type: "string" - order: 3 - tunnel_user_password: - title: "Password" - description: "OS-level password for logging into the jump server host" - type: "string" - airbyte_secret: true - order: 4 - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-close-com:0.1.0" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/close-com" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Close.com Spec" - type: "object" - required: - - "api_key" - additionalProperties: false - properties: - api_key: - type: "string" - description: "Close.com API key (usually starts with 'api_'; find yours\ - \ here)." - airbyte_secret: true - start_date: - type: "string" - description: "The start date to sync data. Leave blank for full sync. Format:\ - \ YYYY-MM-DD." - examples: - - "2021-01-01" - default: "2021-01-01" - pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-cockroachdb:0.1.2" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/postgres" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Cockroach Source Spec" - type: "object" - required: - - "host" - - "port" - - "database" - - "username" - additionalProperties: false - properties: - host: - title: "Host" - description: "Hostname of the database." - type: "string" - order: 0 - port: - title: "Port" - description: "Port of the database." - type: "integer" - minimum: 0 - maximum: 65536 - default: 5432 - examples: - - "5432" - order: 1 - database: - title: "DB Name" - description: "Name of the database." - type: "string" - order: 2 - username: - title: "User" - description: "Username to use to access the database." - type: "string" - order: 3 - password: - title: "Password" - description: "Password associated with the username." - type: "string" - airbyte_secret: true - order: 4 - ssl: - title: "Connect using SSL" - description: "Encrypt client/server communications for increased security." - type: "boolean" - default: false - order: 5 - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-dixa:0.1.1" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/dixa" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Dixa Spec" - type: "object" - required: - - "api_token" - - "start_date" - additionalProperties: false - properties: - api_token: - type: "string" - description: "Dixa API token" - airbyte_secret: true - start_date: - type: "string" - description: "The connector pulls records updated from this date onwards." - pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" - examples: - - "YYYY-MM-DD" - batch_size: - type: "integer" - description: "Number of days to batch into one request. Max 31." - pattern: "^[0-9]{1,2}$" - examples: - - 1 - - 31 - default: 31 - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-drift:0.2.3" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/drift" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Drift Spec" - type: "object" - required: - - "access_token" - additionalProperties: false - properties: - access_token: - type: "string" - description: "Drift Access Token. See the docs for more information on how to generate this key." - airbyte_secret: true - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-exchange-rates:0.2.3" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/exchangeratesapi" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "ratesapi.io Source Spec" - type: "object" - required: - - "start_date" - - "access_key" - additionalProperties: false - properties: - start_date: - type: "string" - description: "Start getting data from that date." - pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" - examples: - - "YYYY-MM-DD" - access_key: - type: "string" - description: "Your API Access Key. See here. The key is case sensitive." - airbyte_secret: true - base: - type: "string" - description: "ISO reference currency. See here. Free plan doesn't support Source Currency Switching, default\ - \ base currency is EUR" - examples: - - "EUR" - - "USD" - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-facebook-marketing:0.2.21" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/facebook-marketing" - changelogUrl: "https://docs.airbyte.io/integrations/sources/facebook-marketing" - connectionSpecification: - title: "Source Facebook Marketing" - type: "object" - properties: - account_id: - title: "Account Id" - description: "The Facebook Ad account ID to use when pulling data from the\ - \ Facebook Marketing API." - type: "string" - access_token: - title: "Access Token" - description: "The value of the access token generated. See the docs\ - \ for more information" - airbyte_secret: true - type: "string" - start_date: - title: "Start Date" - description: "The date from which you'd like to replicate data for AdCreatives\ - \ and AdInsights APIs, in the format YYYY-MM-DDT00:00:00Z. All data generated\ - \ after this date will be replicated." - pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" - examples: - - "2017-01-25T00:00:00Z" - type: "string" - format: "date-time" - end_date: - title: "End Date" - description: "The date until which you'd like to replicate data for AdCreatives\ - \ and AdInsights APIs, in the format YYYY-MM-DDT00:00:00Z. All data generated\ - \ between start_date and this date will be replicated. Not setting this\ - \ option will result in always syncing the latest data." - pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" - examples: - - "2017-01-26T00:00:00Z" - type: "string" - format: "date-time" - include_deleted: - title: "Include Deleted" - description: "Include data from deleted campaigns, ads, and adsets." - default: false - type: "boolean" - insights_lookback_window: - title: "Insights Lookback Window" - description: "The attribution window for the actions" - default: 28 - minimum: 0 - maximum: 28 - type: "integer" - insights_days_per_job: - title: "Insights Days Per Job" - description: "Number of days to sync in one job. The more data you have\ - \ - the smaller you want this parameter to be." - default: 7 - minimum: 1 - maximum: 30 - type: "integer" - custom_insights: - title: "Custom Insights" - description: "A list wich contains insights entries, each entry must have\ - \ a name and can contains fields, breakdowns or action_breakdowns)" - type: "array" - items: - title: "InsightConfig" - type: "object" - properties: - name: - title: "Name" - description: "The name value of insight" - type: "string" - fields: - title: "Fields" - description: "A list of chosen fields for fields parameter" - default: [] - type: "array" - items: - type: "string" - breakdowns: - title: "Breakdowns" - description: "A list of chosen breakdowns for breakdowns" - default: [] - type: "array" - items: - type: "string" - action_breakdowns: - title: "Action Breakdowns" - description: "A list of chosen action_breakdowns for action_breakdowns" - default: [] - type: "array" - items: - type: "string" - required: - - "name" - required: - - "account_id" - - "access_token" - - "start_date" - definitions: - InsightConfig: - title: "InsightConfig" - type: "object" - properties: - name: - title: "Name" - description: "The name value of insight" - type: "string" - fields: - title: "Fields" - description: "A list of chosen fields for fields parameter" - default: [] - type: "array" - items: - type: "string" - breakdowns: - title: "Breakdowns" - description: "A list of chosen breakdowns for breakdowns" - default: [] - type: "array" - items: - type: "string" - action_breakdowns: - title: "Action Breakdowns" - description: "A list of chosen action_breakdowns for action_breakdowns" - default: [] - type: "array" - items: - type: "string" - required: - - "name" - supportsIncremental: true - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: - - "append" - authSpecification: - auth_type: "oauth2.0" - oauth2Specification: - rootObject: [] - oauthFlowInitParameters: [] - oauthFlowOutputParameters: - - - "access_token" -- dockerImage: "airbyte/source-facebook-pages:0.1.2" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/facebook-pages" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Facebook Pages Spec" - type: "object" - required: - - "access_token" - - "page_id" - additionalProperties: false - properties: - access_token: - type: "string" - description: "Facebook Page Access Token" - airbyte_secret: true - page_id: - type: "string" - description: "Page ID" - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] - authSpecification: - auth_type: "oauth2.0" - oauth2Specification: - rootObject: [] - oauthFlowInitParameters: [] - oauthFlowOutputParameters: - - - "access_token" -- dockerImage: "airbyte/source-file:0.2.6" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/file" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "File Source Spec" - type: "object" - additionalProperties: false - required: - - "dataset_name" - - "format" - - "url" - - "provider" - properties: - dataset_name: - type: "string" - description: "Name of the final table where to replicate this file (should\ - \ include only letters, numbers dash and underscores)" - format: - type: "string" - enum: - - "csv" - - "json" - - "jsonl" - - "excel" - - "feather" - - "parquet" - default: "csv" - description: "File Format of the file to be replicated (Warning: some format\ - \ may be experimental, please refer to docs)." - reader_options: - type: "string" - description: "This should be a valid JSON string used by each reader/parser\ - \ to provide additional options and tune its behavior" - examples: - - "{}" - - "{'sep': ' '}" - url: - type: "string" - description: "URL path to access the file to be replicated" - provider: - type: "object" - description: "Storage Provider or Location of the file(s) to be replicated." - default: "Public Web" - oneOf: - - title: "HTTPS: Public Web" - required: - - "storage" - properties: - storage: - type: "string" - enum: - - "HTTPS" - default: "HTTPS" - - title: "GCS: Google Cloud Storage" - required: - - "storage" - properties: - storage: - type: "string" - enum: - - "GCS" - default: "GCS" - service_account_json: - type: "string" - description: "In order to access private Buckets stored on Google\ - \ Cloud, this connector would need a service account json credentials\ - \ with the proper permissions as described here. Please generate the credentials.json\ - \ file and copy/paste its content to this field (expecting JSON\ - \ formats). If accessing publicly available data, this field is\ - \ not necessary." - - title: "S3: Amazon Web Services" - required: - - "storage" - properties: - storage: - type: "string" - enum: - - "S3" - default: "S3" - aws_access_key_id: - type: "string" - description: "In order to access private Buckets stored on AWS S3,\ - \ this connector would need credentials with the proper permissions.\ - \ If accessing publicly available data, this field is not necessary." - aws_secret_access_key: - type: "string" - description: "In order to access private Buckets stored on AWS S3,\ - \ this connector would need credentials with the proper permissions.\ - \ If accessing publicly available data, this field is not necessary." - airbyte_secret: true - - title: "AzBlob: Azure Blob Storage" - required: - - "storage" - - "storage_account" - properties: - storage: - type: "string" - enum: - - "AzBlob" - default: "AzBlob" - storage_account: - type: "string" - description: "The globally unique name of the storage account that\ - \ the desired blob sits within. See here for more details." - sas_token: - type: "string" - description: "To access Azure Blob Storage, this connector would need\ - \ credentials with the proper permissions. One option is a SAS (Shared\ - \ Access Signature) token. If accessing publicly available data,\ - \ this field is not necessary." - airbyte_secret: true - shared_key: - type: "string" - description: "To access Azure Blob Storage, this connector would need\ - \ credentials with the proper permissions. One option is a storage\ - \ account shared key (aka account key or access key). If accessing\ - \ publicly available data, this field is not necessary." - airbyte_secret: true - - title: "SSH: Secure Shell" - required: - - "storage" - - "user" - - "host" - properties: - storage: - type: "string" - enum: - - "SSH" - default: "SSH" - user: - type: "string" - password: - type: "string" - airbyte_secret: true - host: - type: "string" - port: - type: "string" - default: "22" - - title: "SCP: Secure copy protocol" - required: - - "storage" - - "user" - - "host" - properties: - storage: - type: "string" - enum: - - "SCP" - default: "SCP" - user: - type: "string" - password: - type: "string" - airbyte_secret: true - host: - type: "string" - port: - type: "string" - default: "22" - - title: "SFTP: Secure File Transfer Protocol" - required: - - "storage" - - "user" - - "host" - properties: - storage: - type: "string" - enum: - - "SFTP" - default: "SFTP" - user: - type: "string" - password: - type: "string" - airbyte_secret: true - host: - type: "string" - port: - type: "string" - default: "22" - - title: "Local Filesystem (limited)" - required: - - "storage" - properties: - storage: - type: "string" - description: "WARNING: Note that local storage URL available for read\ - \ must start with the local mount \"/local/\" at the moment until\ - \ we implement more advanced docker mounting options..." - enum: - - "local" - default: "local" - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-freshdesk:0.2.7" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/freshdesk" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Freshdesk Spec" - type: "object" - required: - - "domain" - - "api_key" - additionalProperties: false - properties: - domain: - type: "string" - description: "Freshdesk domain" - examples: - - "myaccount.freshdesk.com" - pattern: - - "^[a-zA-Z0-9._-]*\\.freshdesk\\.com$" - api_key: - type: "string" - description: "Freshdesk API Key. See the docs for more information on how to obtain this key." - airbyte_secret: true - requests_per_minute: - title: "Requests per minute" - type: "integer" - description: "Number of requests per minute that this source allowed to\ - \ use." - start_date: - title: "Start date" - description: "Date from which to start pulling data." - format: "date-time" - pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" - examples: - - "2020-12-01T00:00:00Z" - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-freshservice:0.1.0" - spec: - documentationUrl: "https://hub.docker.com/r/airbyte/source-freshservice" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Freshservice Spec" - type: "object" - required: - - "domain_name" - - "api_key" - - "start_date" - additionalProperties: false - properties: - domain_name: - type: "string" - description: "Freshservice domain" - examples: - - "mydomain.freshservice.com" - api_key: - title: "Api Key" - type: "string" - description: "Your API Access Key. See here. The key is case sensitive." - airbyte_secret: true - start_date: - title: "Replication Start Date" - type: "string" - description: "UTC date and time in the format 2020-10-01T00:00:00Z. Any\ - \ data before this date will not be replicated." - examples: - - "2020-10-01T00:00:00Z" - pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-github:0.2.3" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/github" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Github Source Spec" - type: "object" - required: - - "start_date" - - "repository" - additionalProperties: true - properties: - credentials: - title: "Authentication mechanism" - description: "Choose how to authenticate to Github" - type: "object" - oneOf: - - type: "object" - title: "Authenticate via Github (Oauth)" - required: - - "access_token" - properties: - option_title: - type: "string" - title: "Credentials title" - description: "OAuth Credentials" - const: "OAuth Credentials" - access_token: - type: "string" - title: "Access Token" - description: "Oauth access token" - airbyte_secret: true - - type: "object" - title: "Authenticate with Personal Access Token" - required: - - "personal_access_token" - properties: - option_title: - type: "string" - title: "Credentials title" - description: "PAT Credentials" - const: "PAT Credentials" - personal_access_token: - type: "string" - title: "Personal Access Tokens" - description: "Log into Github and then generate a personal access token. To load balance your API quota consumption\ - \ across multiple API tokens, input multiple tokens separated with\ - \ \",\"" - airbyte_secret: true - repository: - type: "string" - examples: - - "airbytehq/airbyte" - - "airbytehq/*" - title: "Github repositories" - description: "Space-delimited list of GitHub repositories/organizations,\ - \ e.g. `airbytehq/airbyte` for single repository and `airbytehq/*` for\ - \ get all repositories from organization" - start_date: - type: "string" - title: "Start date" - description: "The date from which you'd like to replicate data for GitHub\ - \ in the format YYYY-MM-DDT00:00:00Z. All data generated after this date\ - \ will be replicated. Note that it will be used only in the following\ - \ incremental streams: comments, commits and issues." - examples: - - "2021-03-01T00:00:00Z" - pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" - branch: - type: "string" - title: "Branch" - examples: - - "airbytehq/airbyte/master" - description: "Space-delimited list of GitHub repository branches to pull\ - \ commits for, e.g. `airbytehq/airbyte/master`. If no branches are specified\ - \ for a repository, the default branch will be pulled." - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] - authSpecification: - auth_type: "oauth2.0" - oauth2Specification: - rootObject: - - "credentials" - - "0" - oauthFlowInitParameters: [] - oauthFlowOutputParameters: - - - "access_token" -- dockerImage: "airbyte/source-gitlab:0.1.2" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/gitlab" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Source Gitlab Singer Spec" - type: "object" - required: - - "api_url" - - "private_token" - - "start_date" - additionalProperties: false - properties: - api_url: - type: "string" - examples: - - "gitlab.com" - description: "Please enter your basic URL from Gitlab instance" - private_token: - type: "string" - description: "Log into your Gitlab account and then generate a personal\ - \ Access Token." - airbyte_secret: true - groups: - type: "string" - examples: - - "airbyte.io" - description: "Space-delimited list of groups. e.g. airbyte.io" - projects: - type: "string" - examples: - - "airbyte.io/documentation" - description: "Space-delimited list of projects. e.g. airbyte.io/documentation\ - \ meltano/tap-gitlab" - start_date: - type: "string" - description: "The date from which you'd like to replicate data for Gitlab\ - \ API, in the format YYYY-MM-DDT00:00:00Z. All data generated after this\ - \ date will be replicated." - examples: - - "2021-03-01T00:00:00Z" - pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-google-ads:0.1.15" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/google-ads" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Google Ads Spec" - type: "object" - required: - - "credentials" - - "start_date" - - "customer_id" - additionalProperties: true - properties: - credentials: - type: "object" - title: "Google Credentials" - required: - - "developer_token" - - "client_id" - - "client_secret" - - "refresh_token" - properties: - developer_token: - type: "string" - title: "Developer Token" - description: "Developer token granted by Google to use their APIs. More\ - \ instruction on how to find this value in our docs" - airbyte_secret: true - client_id: - type: "string" - title: "Client Id" - description: "Google client id. More instruction on how to find this\ - \ value in our docs" - client_secret: - type: "string" - title: "Client Secret" - description: "Google client secret. More instruction on how to find\ - \ this value in our docs" - airbyte_secret: true - access_token: - type: "string" - title: "Access Token" - description: "Access token generated using developer_token, oauth_client_id,\ - \ and oauth_client_secret. More instruction on how to find this value\ - \ in our docs" - airbyte_secret: true - refresh_token: - type: "string" - title: "Refresh Token" - description: "Refresh token generated using developer_token, oauth_client_id,\ - \ and oauth_client_secret. More instruction on how to find this value\ - \ in our docs" - airbyte_secret: true - customer_id: - title: "Customer Id" - type: "string" - description: "Customer id must be specified as a 10-digit number without\ - \ dashes. More instruction on how to find this value in our docs" - login_customer_id: - type: "string" - title: "Login Customer ID" - description: "If your access to the customer account is through a manager\ - \ account, this field is required and must be set to the customer ID of\ - \ the manager account (10-digit number without dashes). More information\ - \ about this field you can see here" - start_date: - type: "string" - title: "Start Date" - description: "UTC date and time in the format 2017-01-25. Any data before\ - \ this date will not be replicated." - pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" - examples: - - "2017-01-25" - conversion_window_days: - title: "Conversion Window" - type: "integer" - description: "Define the historical replication lookback window in days" - minimum: 0 - maximum: 1095 - default: 14 - examples: - - 14 - custom_queries: - type: "array" - title: "Custom GAQL Queries" - items: - type: "object" - properties: - query: - type: "string" - title: "Custom query" - description: "A custom defined GAQL query for building the report.\ - \ Should not contain segments.date expression as it used by incremental\ - \ streams" - examples: - - "SELECT segments.ad_destination_type, campaign.advertising_channel_sub_type\ - \ FROM campaign WHERE campaign.status = 'PAUSED'" - table_name: - type: "string" - title: "Destination table name" - description: "The table name in your destination database for choosen\ - \ query." - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] - authSpecification: - auth_type: "oauth2.0" - oauth2Specification: - rootObject: - - "credentials" - oauthFlowInitParameters: - - - "client_id" - - - "client_secret" - - - "developer_token" - oauthFlowOutputParameters: - - - "access_token" - - - "refresh_token" -- dockerImage: "airbyte/source-google-analytics-v4:0.1.9" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/google-analytics-v4" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Google Analytics V4 Spec" - type: "object" - required: - - "view_id" - - "start_date" - additionalProperties: true - properties: - view_id: - type: "string" - title: "View ID" - description: "The ID for the Google Analytics View you want to fetch data\ - \ from. This can be found from the Google Analytics Account Explorer." - airbyte_secret: true - start_date: - type: "string" - title: "Start Date" - description: "A date in the format YYYY-MM-DD." - examples: - - "2020-06-01" - window_in_days: - type: "integer" - description: "The amount of days for each data-chunk begining from start_date.\ - \ Bigger the value - faster the fetch. (Min=1, as for a Day; Max=364,\ - \ as for a Year)." - examples: - - 30 - - 60 - - 90 - - 120 - - 200 - - 364 - default: 90 - custom_reports: - title: "Custom Reports" - type: "string" - description: "A JSON array describing the custom reports you want to sync\ - \ from GA. Check out the docs to get more information about this field." - credentials: - type: "object" - oneOf: - - title: "Authenticate via Google (Oauth)" - type: "object" - required: - - "client_id" - - "client_secret" - - "refresh_token" - properties: - auth_type: - type: "string" - const: "Client" - enum: - - "Client" - default: "Client" - order: 0 - client_id: - title: "Client ID" - type: "string" - description: "The Client ID of your developer application" - airbyte_secret: true - client_secret: - title: "Client Secret" - type: "string" - description: "The client secret of your developer application" - airbyte_secret: true - refresh_token: - title: "Refresh Token" - type: "string" - description: "A refresh token generated using the above client ID\ - \ and secret" - airbyte_secret: true - access_token: - title: "Access Token" - type: "string" - description: "A access token generated using the above client ID,\ - \ secret and refresh_token" - airbyte_secret: true - - type: "object" - title: "Service Account Key Authentication" - required: - - "credentials_json" - properties: - auth_type: - type: "string" - const: "Service" - enum: - - "Service" - default: "Service" - order: 0 - credentials_json: - type: "string" - description: "The JSON key of the service account to use for authorization" - examples: - - "{ \"type\": \"service_account\", \"project_id\": YOUR_PROJECT_ID,\ - \ \"private_key_id\": YOUR_PRIVATE_KEY, ... }" - airbyte_secret: true - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] - authSpecification: - auth_type: "oauth2.0" - oauth2Specification: - rootObject: - - "credentials" - - "0" - oauthFlowInitParameters: - - - "client_id" - - - "client_secret" - oauthFlowOutputParameters: - - - "access_token" - - - "refresh_token" -- dockerImage: "airbyte/source-google-directory:0.1.5" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/google-directory" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Google Directory Spec" - type: "object" - required: - - "credentials_json" - - "email" - additionalProperties: false - properties: - credentials_json: - type: "string" - description: "The contents of the JSON service account key. See the docs for more information on how to generate this key." - airbyte_secret: true - email: - type: "string" - description: "The email of the user, which has permissions to access the\ - \ Google Workspace Admin APIs." - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-google-search-console:0.1.6" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/google-search-console" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Google Search Console Spec" - type: "object" - additionalProperties: false - required: - - "site_urls" - - "start_date" - - "authorization" - properties: - site_urls: - type: "array" - items: - type: "string" - description: "Website URLs property; do not include the domain-level property\ - \ in the list" - examples: - - "https://example1.com" - - "https://example2.com" - start_date: - type: "string" - description: "The date from which you'd like to replicate data in the format\ - \ YYYY-MM-DD." - examples: - - "2021-01-01" - pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" - end_date: - type: "string" - description: "The date from which you'd like to replicate data in the format\ - \ YYYY-MM-DD. Must be greater or equal start_date field" - examples: - - "2021-12-12" - pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" - authorization: - type: "object" - title: "Authentication Type" - oneOf: - - title: "Authenticate via Google (Oauth)" - type: "object" - required: - - "auth_type" - - "client_id" - - "client_secret" - - "refresh_token" - properties: - auth_type: - type: "string" - const: "Client" - enum: - - "Client" - default: "Client" - order: 0 - client_id: - title: "Client ID" - type: "string" - description: "The Client ID of your developer application" - airbyte_secret: true - client_secret: - title: "Client Secret" - type: "string" - description: "The client secret of your developer application" - airbyte_secret: true - access_token: - title: "Access Token" - type: "string" - description: "An access token generated using the above client ID\ - \ and secret" - airbyte_secret: true - refresh_token: - title: "Refresh Token" - type: "string" - description: "A refresh token generated using the above client ID\ - \ and secret" - airbyte_secret: true - - type: "object" - title: "Service Account Key Authentication" - required: - - "auth_type" - - "service_account_info" - - "email" - properties: - auth_type: - type: "string" - const: "Service" - enum: - - "Service" - default: "Service" - order: 0 - service_account_info: - title: "Service Account JSON Key" - type: "string" - description: "The JSON key of the service account to use for authorization" - examples: - - "{ \"type\": \"service_account\", \"project_id\": YOUR_PROJECT_ID,\ - \ \"private_key_id\": YOUR_PRIVATE_KEY, ... }" - email: - title: "Admin Email" - type: "string" - description: "The email of the user which has permissions to access\ - \ the Google Workspace Admin APIs." - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] - authSpecification: - auth_type: "oauth2.0" - oauth2Specification: - rootObject: - - "authorization" - - "0" - oauthFlowInitParameters: - - - "client_id" - - - "client_secret" - oauthFlowOutputParameters: - - - "access_token" - - - "refresh_token" -- dockerImage: "airbyte/source-google-sheets:0.2.6" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/google-sheets" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Stripe Source Spec" - type: "object" - required: - - "spreadsheet_id" - additionalProperties: true - properties: - spreadsheet_id: - type: "string" - description: "The ID of the spreadsheet to be replicated." - credentials: - type: "object" - oneOf: - - title: "Authenticate via Google (Oauth)" - type: "object" - required: - - "auth_type" - - "client_id" - - "client_secret" - - "refresh_token" - properties: - auth_type: - type: "string" - const: "Client" - client_id: - title: "Client ID" - type: "string" - description: "The Client ID of your developer application" - airbyte_secret: true - client_secret: - title: "Client Secret" - type: "string" - description: "The client secret of your developer application" - airbyte_secret: true - refresh_token: - title: "Refresh Token" - type: "string" - description: "A refresh token generated using the above client ID\ - \ and secret" - airbyte_secret: true - - title: "Service Account Key Authentication" - type: "object" - required: - - "auth_type" - - "service_account_info" - properties: - auth_type: - type: "string" - const: "Service" - service_account_info: - type: "string" - description: "The JSON key of the service account to use for authorization" - examples: - - "{ \"type\": \"service_account\", \"project_id\": YOUR_PROJECT_ID,\ - \ \"private_key_id\": YOUR_PRIVATE_KEY, ... }" - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] - authSpecification: - auth_type: "oauth2.0" - oauth2Specification: - rootObject: - - "credentials" - - 0 - oauthFlowInitParameters: - - - "client_id" - - - "client_secret" - oauthFlowOutputParameters: - - - "refresh_token" -- dockerImage: "airbyte/source-google-workspace-admin-reports:0.1.5" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/google-workspace-admin-reports" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Google Directory Spec" - type: "object" - required: - - "credentials_json" - - "email" - additionalProperties: false - properties: - credentials_json: - type: "string" - description: "The contents of the JSON service account key. See the docs for more information on how to generate this key." - airbyte_secret: true - email: - type: "string" - description: "The email of the user, which has permissions to access the\ - \ Google Workspace Admin APIs." - lookback: - type: "integer" - minimum: 0 - maximum: 180 - description: "Sets the range of time shown in the report. Reports API allows\ - \ from up to 180 days ago. " - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-greenhouse:0.2.5" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/greenhouse" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Greenhouse Spec" - type: "object" - required: - - "api_key" - additionalProperties: false - properties: - api_key: - type: "string" - description: "Greenhouse API Key. See the docs for more information on how to generate this key." - airbyte_secret: true - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-harvest:0.1.5" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/harvest" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Harvest Spec" - type: "object" - required: - - "api_token" - - "account_id" - - "replication_start_date" - additionalProperties: false - properties: - api_token: - title: "API Token" - description: "Harvest API Token." - airbyte_secret: true - type: "string" - account_id: - title: "Account ID" - description: "Harvest account ID. Required for all Harvest requests in pair\ - \ with API Key" - airbyte_secret: true - type: "string" - replication_start_date: - title: "Replication Start Date" - description: "UTC date and time in the format 2017-01-25T00:00:00Z. Any\ - \ data before this date will not be replicated." - pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" - examples: - - "2017-01-25T00:00:00Z" - type: "string" - supportsIncremental: true - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: - - "append" -- dockerImage: "airbyte/source-hubspot:0.1.21" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/hubspot" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Hubspot Source Spec" - type: "object" - required: - - "start_date" - - "credentials" - additionalProperties: false - properties: - start_date: - type: "string" - title: "Replication start date" - pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" - description: "UTC date and time in the format 2017-01-25T00:00:00Z. Any\ - \ data before this date will not be replicated." - examples: - - "2017-01-25T00:00:00Z" - credentials: - title: "Authentication mechanism" - description: "Choose either to provide the API key or the OAuth2.0 credentials" - type: "object" - oneOf: - - type: "object" - title: "Authenticate via Hubspot (Oauth)" - required: - - "redirect_uri" - - "client_id" - - "client_secret" - - "refresh_token" - - "access_token" - - "credentials_title" - properties: - credentials_title: - type: "string" - title: "Credentials title" - description: "Name of the credentials set" - const: "OAuth Credentials" - enum: - - "OAuth Credentials" - default: "OAuth Credentials" - order: 0 - client_id: - title: "Client ID" - description: "Hubspot client_id. See our docs if you need help finding this id." - type: "string" - examples: - - "123456789000" - client_secret: - title: "Client Secret" - description: "Hubspot client_secret. See our docs if you need help finding this secret." - type: "string" - examples: - - "secret" - airbyte_secret: true - refresh_token: - title: "Refresh token" - description: "Hubspot refresh_token. See our docs if you need help generating the token." - type: "string" - examples: - - "refresh_token" - airbyte_secret: true - - type: "object" - title: "API key" - required: - - "api_key" - - "credentials_title" - properties: - credentials_title: - type: "string" - title: "Credentials title" - description: "Name of the credentials set" - const: "API Key Credentials" - enum: - - "API Key Credentials" - default: "API Key Credentials" - order: 0 - api_key: - title: "API key" - description: "Hubspot API Key. See our docs if you need help finding this key." - type: "string" - airbyte_secret: true - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] - authSpecification: - auth_type: "oauth2.0" - oauth2Specification: - rootObject: - - "credentials" - - "0" - oauthFlowInitParameters: - - - "client_id" - - - "client_secret" - - - "refresh_token" - oauthFlowOutputParameters: - - - "refresh_token" -- dockerImage: "airbyte/source-db2:0.1.1" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/db2" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "IBM Db2 Source Spec" - type: "object" - required: - - "host" - - "port" - - "db" - - "username" - - "password" - additionalProperties: false - properties: - host: - description: "Host of the Db2." - type: "string" - port: - description: "Port of the database." - type: "integer" - minimum: 0 - maximum: 65536 - default: 8123 - examples: - - "8123" - db: - description: "Name of the database." - type: "string" - examples: - - "default" - username: - description: "Username to use to access the database." - type: "string" - password: - description: "Password associated with the username." - type: "string" - airbyte_secret: true - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-instagram:0.1.9" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/instagram" - changelogUrl: "https://docs.airbyte.io/integrations/sources/instagram" - connectionSpecification: - title: "Source Instagram" - type: "object" - properties: - start_date: - title: "Start Date" - description: "The date from which you'd like to replicate data for User\ - \ Insights, in the format YYYY-MM-DDT00:00:00Z. All data generated after\ - \ this date will be replicated." - pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" - examples: - - "2017-01-25T00:00:00Z" - type: "string" - format: "date-time" - access_token: - title: "Access Token" - description: "The value of the access token generated. See the docs for\ - \ more information" - airbyte_secret: true - type: "string" - required: - - "start_date" - - "access_token" - supportsIncremental: true - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: - - "append" - authSpecification: - auth_type: "oauth2.0" - oauth2Specification: - rootObject: [] - oauthFlowInitParameters: [] - oauthFlowOutputParameters: - - - "access_token" -- dockerImage: "airbyte/source-intercom:0.1.6" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/intercom" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Source Intercom Spec" - type: "object" - required: - - "access_token" - - "start_date" - additionalProperties: false - properties: - access_token: - type: "string" - description: "Intercom Access Token. See the docs for more information on how to obtain this key." - airbyte_secret: true - start_date: - type: "string" - description: "The date from which you'd like to replicate data for Intercom\ - \ API, in the format YYYY-MM-DDT00:00:00Z. All data generated after this\ - \ date will be replicated." - examples: - - "2020-11-16T00:00:00Z" - pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-iterable:0.1.9" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/iterable" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Iterable Spec" - type: "object" - required: - - "start_date" - - "api_key" - additionalProperties: false - properties: - start_date: - type: "string" - description: "The date from which you'd like to replicate data for Iterable,\ - \ in the format YYYY-MM-DDT00:00:00Z. All data generated after this date\ - \ will be replicated." - examples: - - "2021-04-01T00:00:00Z" - pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" - api_key: - type: "string" - description: "Iterable API Key. See the docs for more information on how to obtain this key." - airbyte_secret: true - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-jira:0.2.14" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/jira" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Jira Spec" - type: "object" - required: - - "api_token" - - "domain" - - "email" - additionalProperties: true - properties: - api_token: - type: "string" - description: "Jira API Token. See the docs for more information on how to generate this key." - airbyte_secret: true - domain: - type: "string" - examples: - - "domainname.atlassian.net" - pattern: "^[a-zA-Z0-9._-]*\\.atlassian\\.net$" - description: "Domain for your Jira account, e.g. airbyteio.atlassian.net" - email: - type: "string" - description: "The user email for your Jira account" - projects: - type: "array" - title: "Projects" - items: - type: "string" - examples: - - "PROJ1" - - "PROJ2" - description: "Comma-separated list of Jira project keys to replicate data\ - \ for" - start_date: - type: "string" - title: "Start Date" - description: "The date from which you'd like to replicate data for Jira\ - \ in the format YYYY-MM-DDT00:00:00Z. All data generated after this date\ - \ will be replicated. Note that it will be used only in the following\ - \ incremental streams: issues." - examples: - - "2021-03-01T00:00:00Z" - pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" - additional_fields: - type: "array" - title: "Additional Fields" - items: - type: "string" - description: "Comma-separated list of additional fields to include in replicating\ - \ issues" - examples: - - "Field A" - - "Field B" - expand_issue_changelog: - type: "boolean" - title: "Expand Issue Changelog" - description: "Expand the changelog when replicating issues" - default: false - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-kafka:0.1.0" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/kafka" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Kafka Source Spec" - type: "object" - required: - - "bootstrap_servers" - - "subscription" - - "protocol" - additionalProperties: false - properties: - bootstrap_servers: - title: "Bootstrap servers" - description: "A list of host/port pairs to use for establishing the initial\ - \ connection to the Kafka cluster. The client will make use of all servers\ - \ irrespective of which servers are specified here for bootstrapping—this\ - \ list only impacts the initial hosts used to discover the full set of\ - \ servers. This list should be in the form host1:port1,host2:port2,....\ - \ Since these servers are just used for the initial connection to discover\ - \ the full cluster membership (which may change dynamically), this list\ - \ need not contain the full set of servers (you may want more than one,\ - \ though, in case a server is down)." - type: "string" - examples: - - "kafka-broker1:9092,kafka-broker2:9092" - subscription: - title: "Subscribe method" - type: "object" - description: "You can choose to manually assign a list of partitions, or\ - \ subscribe to all topics matching specified pattern to get dynamically\ - \ assigned partitions" - oneOf: - - title: "Manually assign a list of partitions" - required: - - "subscription_type" - - "topic_partitions" - properties: - subscription_type: - description: "Manually assign a list of partitions to this consumer.\ - \ This interface does not allow for incremental assignment and will\ - \ replace the previous assignment (if there is one).\nIf the given\ - \ list of topic partitions is empty, it is treated the same as unsubscribe()." - type: "string" - const: "assign" - enum: - - "assign" - default: "assign" - topic_partitions: - title: "List of topic:partition pairs" - type: "string" - examples: - - "sample.topic:0, sample.topic:1" - - title: "Subscribe to all topics matching specified pattern" - required: - - "subscription_type" - - "topic_pattern" - properties: - subscription_type: - description: "Topic pattern from which the records will be read." - type: "string" - const: "subscribe" - enum: - - "subscribe" - default: "subscribe" - topic_pattern: - title: "Topic pattern" - type: "string" - examples: - - "sample.topic" - test_topic: - title: "Test topic" - description: "Topic to test if Airbyte can consume messages." - type: "string" - examples: - - "test.topic" - group_id: - title: "Group ID" - description: "Group id." - type: "string" - examples: - - "group.id" - max_poll_records: - title: "Max poll records" - description: "The maximum number of records returned in a single call to\ - \ poll(). Note, that max_poll_records does not impact the underlying fetching\ - \ behavior. The consumer will cache the records from each fetch request\ - \ and returns them incrementally from each poll." - type: "integer" - default: 500 - protocol: - title: "Protocol" - type: "object" - description: "Protocol used to communicate with brokers." - oneOf: - - title: "PLAINTEXT" - required: - - "security_protocol" - properties: - security_protocol: - type: "string" - enum: - - "PLAINTEXT" - default: "PLAINTEXT" - - title: "SASL PLAINTEXT" - required: - - "security_protocol" - - "sasl_mechanism" - - "sasl_jaas_config" - properties: - security_protocol: - type: "string" - enum: - - "SASL_PLAINTEXT" - default: "SASL_PLAINTEXT" - sasl_mechanism: - title: "SASL mechanism" - description: "SASL mechanism used for client connections. This may\ - \ be any mechanism for which a security provider is available." - type: "string" - default: "PLAIN" - enum: - - "PLAIN" - sasl_jaas_config: - title: "SASL JAAS config" - description: "JAAS login context parameters for SASL connections in\ - \ the format used by JAAS configuration files." - type: "string" - default: "" - airbyte_secret: true - - title: "SASL SSL" - required: - - "security_protocol" - - "sasl_mechanism" - - "sasl_jaas_config" - properties: - security_protocol: - type: "string" - enum: - - "SASL_SSL" - default: "SASL_SSL" - sasl_mechanism: - title: "SASL mechanism" - description: "SASL mechanism used for client connections. This may\ - \ be any mechanism for which a security provider is available." - type: "string" - default: "GSSAPI" - enum: - - "GSSAPI" - - "OAUTHBEARER" - - "SCRAM-SHA-256" - sasl_jaas_config: - title: "SASL JAAS config" - description: "JAAS login context parameters for SASL connections in\ - \ the format used by JAAS configuration files." - type: "string" - default: "" - airbyte_secret: true - client_id: - title: "Client ID" - description: "An id string to pass to the server when making requests. The\ - \ purpose of this is to be able to track the source of requests beyond\ - \ just ip/port by allowing a logical application name to be included in\ - \ server-side request logging." - type: "string" - examples: - - "airbyte-consumer" - enable_auto_commit: - title: "Enable auto commit" - description: "If true the consumer's offset will be periodically committed\ - \ in the background." - type: "boolean" - default: true - auto_commit_interval_ms: - title: "Auto commit interval ms" - description: "The frequency in milliseconds that the consumer offsets are\ - \ auto-committed to Kafka if enable.auto.commit is set to true." - type: "integer" - default: 5000 - client_dns_lookup: - title: "Client DNS lookup" - description: "Controls how the client uses DNS lookups. If set to use_all_dns_ips,\ - \ connect to each returned IP address in sequence until a successful connection\ - \ is established. After a disconnection, the next IP is used. Once all\ - \ IPs have been used once, the client resolves the IP(s) from the hostname\ - \ again. If set to resolve_canonical_bootstrap_servers_only, resolve each\ - \ bootstrap address into a list of canonical names. After the bootstrap\ - \ phase, this behaves the same as use_all_dns_ips. If set to default (deprecated),\ - \ attempt to connect to the first IP address returned by the lookup, even\ - \ if the lookup returns multiple IP addresses." - type: "string" - default: "use_all_dns_ips" - enum: - - "default" - - "use_all_dns_ips" - - "resolve_canonical_bootstrap_servers_only" - retry_backoff_ms: - title: "Retry backoff ms" - description: "The amount of time to wait before attempting to retry a failed\ - \ request to a given topic partition. This avoids repeatedly sending requests\ - \ in a tight loop under some failure scenarios." - type: "integer" - default: 100 - request_timeout_ms: - title: "Request timeout ms" - description: "The configuration controls the maximum amount of time the\ - \ client will wait for the response of a request. If the response is not\ - \ received before the timeout elapses the client will resend the request\ - \ if necessary or fail the request if retries are exhausted." - type: "integer" - default: 30000 - receive_buffer_bytes: - title: "Receive buffer bytes" - description: "The size of the TCP receive buffer (SO_RCVBUF) to use when\ - \ reading data. If the value is -1, the OS default will be used." - type: "integer" - default: 32768 - auto_offset_reset: - title: "Auto offset reset" - description: "What to do when there is no initial offset in Kafka or if\ - \ the current offset does not exist any more on the server - earliest:\ - \ automatically reset the offset to the earliest offset, latest: automatically\ - \ reset the offset to the latest offset, none: throw exception to the\ - \ consumer if no previous offset is found for the consumer's group, anything\ - \ else: throw exception to the consumer." - type: "string" - default: "latest" - enum: - - "latest" - - "earliest" - - "none" - repeated_calls: - title: "Repeated calls" - description: "The number of repeated calls to poll() if no messages were\ - \ received." - type: "integer" - default: 3 - supportsIncremental: true - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] - supported_source_sync_modes: - - "append" -- dockerImage: "airbyte/source-klaviyo:0.1.2" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/klaviyo" - changelogUrl: "https://docs.airbyte.io/integrations/sources/klaviyo" - connectionSpecification: - title: "Klaviyo Spec" - type: "object" - properties: - api_key: - title: "Api Key" - description: "Klaviyo API Key. See our docs if you need help finding this key." - airbyte_secret: true - type: "string" - start_date: - title: "Start Date" - description: "UTC date and time in the format 2017-01-25T00:00:00Z. Any\ - \ data before this date will not be replicated." - pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" - examples: - - "2017-01-25T00:00:00Z" - type: "string" - required: - - "api_key" - - "start_date" - supportsIncremental: true - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: - - "append" -- dockerImage: "airbyte/source-lever-hiring:0.1.0" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/lever-hiring" - changelogUrl: "https://docs.airbyte.io/integrations/sources/lever-hiring#changelog" - connectionSpecification: - title: "Lever Hiring Spec" - type: "object" - properties: - client_id: - title: "Client Id" - description: "The client application id as provided when registering the\ - \ application with Lever." - type: "string" - client_secret: - title: "Client Secret" - description: "The application secret as provided when registering the application\ - \ with Lever." - airbyte_secret: true - type: "string" - refresh_token: - title: "Refresh Token" - description: "The refresh token your application will need to submit to\ - \ get a new access token after it's expired." - type: "string" - environment: - title: "Environment" - description: "Sandbox or Production environment." - default: "Production" - enum: - - "Sandbox" - - "Production" - type: "string" - start_date: - title: "Start Date" - description: "UTC date and time in the format 2019-02-25T00:00:00Z. Any\ - \ data before this date will not be replicated." - pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" - examples: - - "2021-04-25T00:00:00Z" - type: "string" - required: - - "client_id" - - "client_secret" - - "refresh_token" - - "start_date" - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] - authSpecification: - auth_type: "oauth2.0" - oauth2Specification: - rootObject: [] - oauthFlowInitParameters: - - - "client_id" - - - "client_secret" - - - "refresh_token" - oauthFlowOutputParameters: [] -- dockerImage: "airbyte/source-linkedin-ads:0.1.1" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/linkedin-ads" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Linkedin Ads Spec" - type: "object" - required: - - "start_date" - - "access_token" - additionalProperties: false - properties: - start_date: - type: "string" - title: "Start Date" - pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" - description: "Date in the format 2020-09-17. Any data before this date will\ - \ not be replicated." - examples: - - "2021-05-17" - access_token: - type: "string" - title: "Access Token" - description: "The token value ganerated using Auth Code" - airbyte_secret: true - account_ids: - title: "Account IDs" - type: "array" - description: "Specify the Account IDs separated by space, from which to\ - \ pull the data. Leave empty to pull from all associated accounts." - items: - type: "integer" - default: [] - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-looker:0.2.5" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/looker" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Looker Spec" - type: "object" - required: - - "domain" - - "client_id" - - "client_secret" - additionalProperties: false - properties: - domain: - type: "string" - examples: - - "domainname.looker.com" - - "looker.clientname.com" - - "123.123.124.123:8000" - description: "Domain for your Looker account, e.g. airbyte.cloud.looker.com,looker.[clientname].com,IP\ - \ address" - client_id: - title: "Client ID" - type: "string" - description: "The Client ID is first part of an API3 key that is specific\ - \ to each Looker user. See the docs for more information on how to generate this key." - client_secret: - title: "Client Secret" - type: "string" - description: "The Client Secret is second part of an API3 key." - run_look_ids: - title: "Look IDs to Run" - type: "array" - items: - type: "string" - pattern: "^[0-9]*$" - description: "The IDs of any Looks to run (optional)" - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-mailchimp:0.2.8" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/mailchimp" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Mailchimp Spec" - type: "object" - required: - - "username" - - "apikey" - additionalProperties: false - properties: - username: - type: "string" - description: "The Username or email you use to sign into Mailchimp" - apikey: - type: "string" - airbyte_secret: true - description: "API Key. See the docs for information on how to generate this key." - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-marketo:0.1.0" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/marketo" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Source Marketo Spec" - type: "object" - required: - - "domain_url" - - "client_id" - - "client_secret" - - "start_date" - additionalProperties: false - properties: - domain_url: - type: "string" - description: "Your Marketo Base URL. See the docs for info on how to obtain this." - examples: - - "https://000-AAA-000.mktorest.com" - airbyte_secret: true - client_id: - type: "string" - description: "Your Marketo client_id. See the docs for info on how to obtain this." - airbyte_secret: true - client_secret: - type: "string" - description: "Your Marketo client secret. See the docs for info on how to obtain this." - airbyte_secret: true - start_date: - type: "string" - description: "Data generated in Marketo after this date will be replicated.\ - \ This date must be specified in the format YYYY-MM-DDT00:00:00Z." - examples: - - "2020-09-25T00:00:00Z" - pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" - window_in_days: - type: "integer" - description: "The amount of days for each data-chunk begining from start_date.\ - \ (Min=1, as for a Day; Max=30, as for a Month)." - examples: - - 1 - - 5 - - 10 - - 15 - - 30 - default: 30 - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-mssql:0.3.6" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/destinations/mssql" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "MSSQL Source Spec" - type: "object" - required: - - "host" - - "port" - - "database" - - "username" - additionalProperties: false - properties: - host: - description: "Hostname of the database." - type: "string" - port: - description: "Port of the database." - type: "integer" - minimum: 0 - maximum: 65536 - examples: - - "1433" - database: - description: "Name of the database." - type: "string" - examples: - - "master" - username: - description: "Username to use to access the database." - type: "string" - password: - description: "Password associated with the username." - type: "string" - airbyte_secret: true - ssl_method: - title: "SSL Method" - type: "object" - description: "Encryption method to use when communicating with the database" - order: 6 - oneOf: - - title: "Unencrypted" - additionalProperties: false - description: "Data transfer will not be encrypted." - required: - - "ssl_method" - properties: - ssl_method: - type: "string" - const: "unencrypted" - enum: - - "unencrypted" - default: "unencrypted" - - title: "Encrypted (trust server certificate)" - additionalProperties: false - description: "Use the cert provided by the server without verification.\ - \ (For testing purposes only!)" - required: - - "ssl_method" - properties: - ssl_method: - type: "string" - const: "encrypted_trust_server_certificate" - enum: - - "encrypted_trust_server_certificate" - default: "encrypted_trust_server_certificate" - - title: "Encrypted (verify certificate)" - additionalProperties: false - description: "Verify and use the cert provided by the server." - required: - - "ssl_method" - - "trustStoreName" - - "trustStorePassword" - properties: - ssl_method: - type: "string" - const: "encrypted_verify_certificate" - enum: - - "encrypted_verify_certificate" - default: "encrypted_verify_certificate" - hostNameInCertificate: - title: "Host Name In Certificate" - type: "string" - description: "Specifies the host name of the server. The value of\ - \ this property must match the subject property of the certificate." - order: 7 - replication_method: - type: "string" - title: "Replication Method" - description: "Replication method to use for extracting data from the database.\ - \ STANDARD replication requires no setup on the DB side but will not be\ - \ able to represent deletions incrementally. CDC uses {TBC} to detect\ - \ inserts, updates, and deletes. This needs to be configured on the source\ - \ database itself." - default: "STANDARD" - enum: - - "STANDARD" - - "CDC" - tunnel_method: - type: "object" - title: "SSH Tunnel Method" - description: "Whether to initiate an SSH tunnel before connecting to the\ - \ database, and if so, which kind of authentication to use." - oneOf: - - title: "No Tunnel" - required: - - "tunnel_method" - properties: - tunnel_method: - description: "No ssh tunnel needed to connect to database" - type: "string" - const: "NO_TUNNEL" - order: 0 - - title: "SSH Key Authentication" - required: - - "tunnel_method" - - "tunnel_host" - - "tunnel_port" - - "tunnel_user" - - "ssh_key" - properties: - tunnel_method: - description: "Connect through a jump server tunnel host using username\ - \ and ssh key" - type: "string" - const: "SSH_KEY_AUTH" - order: 0 - tunnel_host: - title: "SSH Tunnel Jump Server Host" - description: "Hostname of the jump server host that allows inbound\ - \ ssh tunnel." - type: "string" - order: 1 - tunnel_port: - title: "SSH Connection Port" - description: "Port on the proxy/jump server that accepts inbound ssh\ - \ connections." - type: "integer" - minimum: 0 - maximum: 65536 - default: 22 - examples: - - "22" - order: 2 - tunnel_user: - title: "SSH Login Username" - description: "OS-level username for logging into the jump server host." - type: "string" - order: 3 - ssh_key: - title: "SSH Private Key" - description: "OS-level user account ssh key credentials for logging\ - \ into the jump server host." - type: "string" - airbyte_secret: true - multiline: true - order: 4 - - title: "Password Authentication" - required: - - "tunnel_method" - - "tunnel_host" - - "tunnel_port" - - "tunnel_user" - - "tunnel_user_password" - properties: - tunnel_method: - description: "Connect through a jump server tunnel host using username\ - \ and password authentication" - type: "string" - const: "SSH_PASSWORD_AUTH" - order: 0 - tunnel_host: - title: "SSH Tunnel Jump Server Host" - description: "Hostname of the jump server host that allows inbound\ - \ ssh tunnel." - type: "string" - order: 1 - tunnel_port: - title: "SSH Connection Port" - description: "Port on the proxy/jump server that accepts inbound ssh\ - \ connections." - type: "integer" - minimum: 0 - maximum: 65536 - default: 22 - examples: - - "22" - order: 2 - tunnel_user: - title: "SSH Login Username" - description: "OS-level username for logging into the jump server host" - type: "string" - order: 3 - tunnel_user_password: - title: "Password" - description: "OS-level password for logging into the jump server host" - type: "string" - airbyte_secret: true - order: 4 - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-microsoft-teams:0.2.2" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/microsoft-teams" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Microsoft Teams Spec" - type: "object" - required: - - "tenant_id" - - "client_id" - - "client_secret" - - "period" - additionalProperties: false - properties: - tenant_id: - title: "Directory (tenant) ID" - type: "string" - description: "Directory (tenant) ID" - client_id: - title: "Application (client) ID" - type: "string" - description: "Application (client) ID" - client_secret: - title: "Client Secret" - type: "string" - description: "Client secret" - airbyte_secret: true - period: - type: "string" - description: "Specifies the length of time over which the Team Device Report\ - \ stream is aggregated. The supported values are: D7, D30, D90, and D180." - examples: - - "D7" - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-mixpanel:0.1.1" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/mixpanel" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Source Mixpanel Spec" - type: "object" - required: - - "api_secret" - additionalProperties: true - properties: - api_secret: - type: "string" - description: "Mixpanel API Secret. See the docs for more information on how to obtain this key." - airbyte_secret: true - attribution_window: - type: "integer" - description: "Latency minimum number of days to look-back to account for\ - \ delays in attributing accurate results. Default attribution window is\ - \ 5 days." - default: 5 - date_window_size: - type: "integer" - description: "Number of days for date window looping through transactional\ - \ endpoints with from_date and to_date. Default date_window_size is 30\ - \ days. Clients with large volumes of events may want to decrease this\ - \ to 14, 7, or even down to 1-2 days." - default: 30 - project_timezone: - type: "string" - description: "Time zone in which integer date times are stored. The project\ - \ timezone may be found in the project settings in the Mixpanel console." - default: "US/Pacific" - examples: - - "US/Pacific" - - "UTC" - select_properties_by_default: - type: "boolean" - description: "Setting this config parameter to true ensures that new properties\ - \ on events and engage records are captured. Otherwise new properties\ - \ will be ignored" - default: true - start_date: - type: "string" - description: "The default value to use if no bookmark exists for an endpoint.\ - \ Default is 1 year ago." - examples: - - "2021-11-16" - pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}(T[0-9]{2}:[0-9]{2}:[0-9]{2}Z)?$" - region: - type: "string" - enum: - - "US" - - "EU" - default: "US" - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-mongodb-v2:0.1.3" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/mongodb-v2" - changelogUrl: "https://docs.airbyte.io/integrations/sources/mongodb-v2" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "MongoDb Source Spec" - type: "object" - required: - - "database" - additionalProperties: true - properties: - instance_type: - type: "object" - title: "MongoDb instance type" - description: "MongoDb instance to connect to. For MongoDB Atlas and Replica\ - \ Set TLS connection is used by default." - order: 0 - oneOf: - - title: "Standalone MongoDb Instance" - required: - - "instance" - - "host" - - "port" - properties: - instance: - type: "string" - enum: - - "standalone" - default: "standalone" - host: - title: "Host" - type: "string" - description: "Host of a Mongo database to be replicated." - order: 0 - port: - title: "Port" - type: "integer" - description: "Port of a Mongo database to be replicated." - minimum: 0 - maximum: 65536 - default: 27017 - examples: - - "27017" - order: 1 - tls: - title: "TLS connection" - type: "boolean" - description: "Indicates whether TLS encryption protocol will be used\ - \ to connect to MongoDB. It is recommended to use TLS connection\ - \ if possible. For more information see documentation." - default: false - order: 2 - - title: "Replica Set" - required: - - "instance" - - "server_addresses" - properties: - instance: - type: "string" - enum: - - "replica" - default: "replica" - server_addresses: - title: "Server addresses" - type: "string" - description: "The members of a replica set. Please specify `host`:`port`\ - \ of each member seperated by comma." - examples: - - "host1:27017,host2:27017,host3:27017" - order: 0 - replica_set: - title: "Replica Set" - type: "string" - description: "A replica set name." - order: 1 - - title: "MongoDB Atlas" - additionalProperties: false - required: - - "instance" - - "cluster_url" - properties: - instance: - type: "string" - enum: - - "atlas" - default: "atlas" - cluster_url: - title: "Cluster URL" - type: "string" - description: "URL of a cluster to connect to." - order: 0 - database: - title: "Database name" - type: "string" - description: "Database to be replicated." - order: 1 - user: - title: "User" - type: "string" - description: "User" - order: 2 - password: - title: "Password" - type: "string" - description: "Password" - airbyte_secret: true - order: 3 - auth_source: - title: "Authentication source" - type: "string" - description: "Authentication source where user information is stored" - default: "admin" - examples: - - "admin" - order: 4 - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-mysql:0.4.8" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/mysql" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "MySql Source Spec" - type: "object" - required: - - "host" - - "port" - - "database" - - "username" - - "replication_method" - additionalProperties: false - properties: - host: - description: "Hostname of the database." - type: "string" - order: 0 - port: - description: "Port of the database." - type: "integer" - minimum: 0 - maximum: 65536 - default: 3306 - examples: - - "3306" - order: 1 - database: - description: "Name of the database." - type: "string" - order: 2 - username: - description: "Username to use to access the database." - type: "string" - order: 3 - password: - description: "Password associated with the username." - type: "string" - airbyte_secret: true - order: 4 - jdbc_url_params: - description: "Additional properties to pass to the jdbc url string when\ - \ connecting to the database formatted as 'key=value' pairs separated\ - \ by the symbol '&'. (example: key1=value1&key2=value2&key3=value3)" - type: "string" - order: 5 - ssl: - title: "SSL Connection" - description: "Encrypt data using SSL." - type: "boolean" - default: true - order: 7 - replication_method: - type: "string" - title: "Replication Method" - description: "Replication method to use for extracting data from the database.\ - \ STANDARD replication requires no setup on the DB side but will not be\ - \ able to represent deletions incrementally. CDC uses the Binlog to detect\ - \ inserts, updates, and deletes. This needs to be configured on the source\ - \ database itself." - order: 6 - default: "STANDARD" - enum: - - "STANDARD" - - "CDC" - tunnel_method: - type: "object" - title: "SSH Tunnel Method" - description: "Whether to initiate an SSH tunnel before connecting to the\ - \ database, and if so, which kind of authentication to use." - oneOf: - - title: "No Tunnel" - required: - - "tunnel_method" - properties: - tunnel_method: - description: "No ssh tunnel needed to connect to database" - type: "string" - const: "NO_TUNNEL" - order: 0 - - title: "SSH Key Authentication" - required: - - "tunnel_method" - - "tunnel_host" - - "tunnel_port" - - "tunnel_user" - - "ssh_key" - properties: - tunnel_method: - description: "Connect through a jump server tunnel host using username\ - \ and ssh key" - type: "string" - const: "SSH_KEY_AUTH" - order: 0 - tunnel_host: - title: "SSH Tunnel Jump Server Host" - description: "Hostname of the jump server host that allows inbound\ - \ ssh tunnel." - type: "string" - order: 1 - tunnel_port: - title: "SSH Connection Port" - description: "Port on the proxy/jump server that accepts inbound ssh\ - \ connections." - type: "integer" - minimum: 0 - maximum: 65536 - default: 22 - examples: - - "22" - order: 2 - tunnel_user: - title: "SSH Login Username" - description: "OS-level username for logging into the jump server host." - type: "string" - order: 3 - ssh_key: - title: "SSH Private Key" - description: "OS-level user account ssh key credentials in RSA PEM\ - \ format ( created with ssh-keygen -t rsa -m PEM -f myuser_rsa )" - type: "string" - airbyte_secret: true - multiline: true - order: 4 - - title: "Password Authentication" - required: - - "tunnel_method" - - "tunnel_host" - - "tunnel_port" - - "tunnel_user" - - "tunnel_user_password" - properties: - tunnel_method: - description: "Connect through a jump server tunnel host using username\ - \ and password authentication" - type: "string" - const: "SSH_PASSWORD_AUTH" - order: 0 - tunnel_host: - title: "SSH Tunnel Jump Server Host" - description: "Hostname of the jump server host that allows inbound\ - \ ssh tunnel." - type: "string" - order: 1 - tunnel_port: - title: "SSH Connection Port" - description: "Port on the proxy/jump server that accepts inbound ssh\ - \ connections." - type: "integer" - minimum: 0 - maximum: 65536 - default: 22 - examples: - - "22" - order: 2 - tunnel_user: - title: "SSH Login Username" - description: "OS-level username for logging into the jump server host" - type: "string" - order: 3 - tunnel_user_password: - title: "Password" - description: "OS-level password for logging into the jump server host" - type: "string" - airbyte_secret: true - order: 4 - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-okta:0.1.2" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/okta" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Okta Spec" - type: "object" - required: - - "token" - - "base_url" - additionalProperties: false - properties: - token: - type: "string" - title: "API Token" - description: "A Okta token. See the docs for instructions on how to generate it." - airbyte_secret: true - base_url: - type: "string" - title: "Base URL" - description: "The Okta base URL." - airbyte_secret: true - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-onesignal:0.1.0" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/onesignal" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "OneSignal Source Spec" - type: "object" - required: - - "user_auth_key" - - "start_date" - - "outcome_names" - additionalProperties: false - properties: - user_auth_key: - type: "string" - description: "OneSignal User Auth Key, see the docs for more information on how to obtain this key." - airbyte_secret: true - start_date: - type: "string" - description: "The date from which you'd like to replicate data for OneSignal\ - \ API, in the format YYYY-MM-DDT00:00:00Z. All data generated after this\ - \ date will be replicated." - examples: - - "2020-11-16T00:00:00Z" - pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" - outcome_names: - type: "string" - description: "Comma-separated list of names and the value (sum/count) for\ - \ the returned outcome data. See the docs for more details" - examples: - - "os__session_duration.count,os__click.count,CustomOutcomeName.sum" - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-oracle:0.3.8" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/oracle" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Oracle Source Spec" - type: "object" - required: - - "host" - - "port" - - "sid" - - "username" - additionalProperties: false - properties: - host: - title: "Host" - description: "Hostname of the database." - type: "string" - port: - title: "Port" - description: "Port of the database.\nOracle Corporations recommends the\ - \ following port numbers:\n1521 - Default listening port for client connections\ - \ to the listener. \n2484 - Recommended and officially registered listening\ - \ port for client connections to the listener using TCP/IP with SSL" - type: "integer" - minimum: 0 - maximum: 65536 - default: 1521 - sid: - title: "SID (Oracle System Identifier)" - type: "string" - username: - title: "User" - description: "Username to use to access the database." - type: "string" - password: - title: "Password" - description: "Password associated with the username." - type: "string" - airbyte_secret: true - schemas: - title: "Schemas" - description: "List of schemas to sync from. Defaults to user. Case sensitive." - type: "array" - items: - type: "string" - minItems: 1 - uniqueItems: true - encryption: - title: "Encryption" - type: "object" - description: "Encryption method to use when communicating with the database" - order: 6 - oneOf: - - title: "Unencrypted" - additionalProperties: false - description: "Data transfer will not be encrypted." - required: - - "encryption_method" - properties: - encryption_method: - type: "string" - const: "unencrypted" - enum: - - "unencrypted" - default: "unencrypted" - - title: "Native Network Ecryption (NNE)" - additionalProperties: false - description: "Native network encryption gives you the ability to encrypt\ - \ database connections, without the configuration overhead of TCP/IP\ - \ and SSL/TLS and without the need to open and listen on different ports." - required: - - "encryption_method" - properties: - encryption_method: - type: "string" - const: "client_nne" - enum: - - "client_nne" - default: "client_nne" - encryption_algorithm: - type: "string" - description: "This parameter defines the encryption algorithm to be\ - \ used" - title: "Encryption Algorithm" - default: "AES256" - enum: - - "AES256" - - "RC4_56" - - "3DES168" - - title: "TLS Encrypted (verify certificate)" - additionalProperties: false - description: "Verify and use the cert provided by the server." - required: - - "encryption_method" - - "ssl_certificate" - properties: - encryption_method: - type: "string" - const: "encrypted_verify_certificate" - enum: - - "encrypted_verify_certificate" - default: "encrypted_verify_certificate" - ssl_certificate: - title: "SSL PEM file" - description: "Privacy Enhanced Mail (PEM) files are concatenated certificate\ - \ containers frequently used in certificate installations" - type: "string" - airbyte_secret: true - multiline: true - order: 4 - tunnel_method: - type: "object" - title: "SSH Tunnel Method" - description: "Whether to initiate an SSH tunnel before connecting to the\ - \ database, and if so, which kind of authentication to use." - oneOf: - - title: "No Tunnel" - required: - - "tunnel_method" - properties: - tunnel_method: - description: "No ssh tunnel needed to connect to database" - type: "string" - const: "NO_TUNNEL" - order: 0 - - title: "SSH Key Authentication" - required: - - "tunnel_method" - - "tunnel_host" - - "tunnel_port" - - "tunnel_user" - - "ssh_key" - properties: - tunnel_method: - description: "Connect through a jump server tunnel host using username\ - \ and ssh key" - type: "string" - const: "SSH_KEY_AUTH" - order: 0 - tunnel_host: - title: "SSH Tunnel Jump Server Host" - description: "Hostname of the jump server host that allows inbound\ - \ ssh tunnel." - type: "string" - order: 1 - tunnel_port: - title: "SSH Connection Port" - description: "Port on the proxy/jump server that accepts inbound ssh\ - \ connections." - type: "integer" - minimum: 0 - maximum: 65536 - default: 22 - examples: - - "22" - order: 2 - tunnel_user: - title: "SSH Login Username" - description: "OS-level username for logging into the jump server host." - type: "string" - order: 3 - ssh_key: - title: "SSH Private Key" - description: "OS-level user account ssh key credentials in RSA PEM\ - \ format ( created with ssh-keygen -t rsa -m PEM -f myuser_rsa )" - type: "string" - airbyte_secret: true - multiline: true - order: 4 - - title: "Password Authentication" - required: - - "tunnel_method" - - "tunnel_host" - - "tunnel_port" - - "tunnel_user" - - "tunnel_user_password" - properties: - tunnel_method: - description: "Connect through a jump server tunnel host using username\ - \ and password authentication" - type: "string" - const: "SSH_PASSWORD_AUTH" - order: 0 - tunnel_host: - title: "SSH Tunnel Jump Server Host" - description: "Hostname of the jump server host that allows inbound\ - \ ssh tunnel." - type: "string" - order: 1 - tunnel_port: - title: "SSH Connection Port" - description: "Port on the proxy/jump server that accepts inbound ssh\ - \ connections." - type: "integer" - minimum: 0 - maximum: 65536 - default: 22 - examples: - - "22" - order: 2 - tunnel_user: - title: "SSH Login Username" - description: "OS-level username for logging into the jump server host" - type: "string" - order: 3 - tunnel_user_password: - title: "Password" - description: "OS-level password for logging into the jump server host" - type: "string" - airbyte_secret: true - order: 4 - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-paypal-transaction:0.1.1" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/paypal-transactions" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Paypal Transaction Search" - type: "object" - required: - - "client_id" - - "secret" - - "start_date" - - "is_sandbox" - additionalProperties: true - properties: - client_id: - title: "Client ID" - type: "string" - description: "The Paypal Client ID for API credentials" - secret: - title: "Secret" - type: "string" - description: "The Secret for a given Client ID." - airbyte_secret: true - start_date: - type: "string" - title: "Start Date" - description: "Start Date for data extraction in ISO format. Date must be in range from 3 years till 12 hrs before\ - \ present time" - examples: - - "2021-06-11T23:59:59-00:00" - pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}[+-][0-9]{2}:[0-9]{2}$" - is_sandbox: - title: "Is Sandbox" - description: "Whether or not to Sandbox or Production environment to extract\ - \ data from" - type: "boolean" - default: false - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-pipedrive:0.1.6" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/pipedrive" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Pipedrive Spec" - type: "object" - required: - - "replication_start_date" - additionalProperties: true - properties: - authorization: - type: "object" - title: "Authentication Type" - oneOf: - - title: "Sign in via Pipedrive (OAuth)" - type: "object" - required: - - "auth_type" - - "client_id" - - "client_secret" - - "refresh_token" - properties: - auth_type: - type: "string" - const: "Client" - enum: - - "Client" - default: "Client" - order: 0 - client_id: - title: "Client ID" - type: "string" - description: "The Client ID of your developer application" - airbyte_secret: true - client_secret: - title: "Client Secret" - type: "string" - description: "The client secret of your developer application" - airbyte_secret: true - access_token: - title: "Access Token" - type: "string" - description: "An access token generated using the above client ID\ - \ and secret" - airbyte_secret: true - refresh_token: - title: "Refresh Token" - type: "string" - description: "A refresh token generated using the above client ID\ - \ and secret" - airbyte_secret: true - - type: "object" - title: "API Key Authentication" - required: - - "auth_type" - - "api_token" - properties: - auth_type: - type: "string" - const: "Token" - enum: - - "Token" - default: "Token" - order: 0 - api_token: - title: "API Token" - type: "string" - description: "Pipedrive API Token" - airbyte_secret: true - replication_start_date: - title: "Replication Start Date" - description: "UTC date and time in the format 2017-01-25T00:00:00Z. Any\ - \ data before this date will not be replicated. When specified and not\ - \ None, then stream will behave as incremental" - pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" - examples: - - "2017-01-25T00:00:00Z" - type: "string" - supportsIncremental: true - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: - - "append" -- dockerImage: "airbyte/source-plaid:0.2.1" - spec: - documentationUrl: "https://plaid.com/docs/api/" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - type: "object" - required: - - "access_token" - - "api_key" - - "client_id" - additionalProperties: false - properties: - access_token: - type: "string" - title: "Access Token" - description: "The end-user's Link access token." - api_key: - title: "API Key" - type: "string" - description: "The Plaid API key to use to hit the API." - airbyte_secret: true - client_id: - title: "Client ID" - type: "string" - description: "The Plaid client id" - plaid_env: - title: "Plaid Environment" - type: "string" - enum: - - "sandbox" - - "development" - - "production" - description: "The Plaid environment" - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-pokeapi:0.1.1" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/pokeapi" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Pokeapi Spec" - type: "object" - required: - - "pokemon_name" - additionalProperties: false - properties: - pokemon_name: - type: "string" - description: "Pokemon requested from the API." - pattern: "^[a-z0-9_\\-]+$" - examples: - - "ditto, luxray, snorlax" - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-posthog:0.1.4" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/posthog" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "PostHog Spec" - type: "object" - required: - - "api_key" - - "start_date" - additionalProperties: false - properties: - start_date: - title: "Start Date" - type: "string" - description: "The date from which you'd like to replicate the data" - pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" - examples: - - "2021-01-01T00:00:00Z" - api_key: - type: "string" - airbyte_secret: true - description: "API Key. See the docs for information on how to generate this key." - base_url: - type: "string" - default: "https://app.posthog.com" - description: "Base PostHog url. Defaults to PostHog Cloud (https://app.posthog.com)." - examples: - - "https://posthog.example.com" - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-postgres:0.3.11" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/postgres" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Postgres Source Spec" - type: "object" - required: - - "host" - - "port" - - "database" - - "username" - additionalProperties: false - properties: - host: - title: "Host" - description: "Hostname of the database." - type: "string" - order: 0 - port: - title: "Port" - description: "Port of the database." - type: "integer" - minimum: 0 - maximum: 65536 - default: 5432 - examples: - - "5432" - order: 1 - database: - title: "DB Name" - description: "Name of the database." - type: "string" - order: 2 - username: - title: "User" - description: "Username to use to access the database." - type: "string" - order: 3 - password: - title: "Password" - description: "Password associated with the username." - type: "string" - airbyte_secret: true - order: 4 - ssl: - title: "Connect using SSL" - description: "Encrypt client/server communications for increased security." - type: "boolean" - default: false - order: 5 - replication_method: - type: "object" - title: "Replication Method" - description: "Replication method to use for extracting data from the database." - order: 6 - oneOf: - - title: "Standard" - additionalProperties: false - description: "Standard replication requires no setup on the DB side but\ - \ will not be able to represent deletions incrementally." - required: - - "method" - properties: - method: - type: "string" - const: "Standard" - enum: - - "Standard" - default: "Standard" - order: 0 - - title: "Logical Replication (CDC)" - additionalProperties: false - description: "Logical replication uses the Postgres write-ahead log (WAL)\ - \ to detect inserts, updates, and deletes. This needs to be configured\ - \ on the source database itself. Only available on Postgres 10 and above.\ - \ Read the Postgres Source docs for more information." - required: - - "method" - - "replication_slot" - - "publication" - properties: - method: - type: "string" - const: "CDC" - enum: - - "CDC" - default: "CDC" - order: 0 - plugin: - type: "string" - description: "A logical decoding plug-in installed on the PostgreSQL\ - \ server. `pgoutput` plug-in is used by default.\nIf replication\ - \ table contains a lot of big jsonb values it is recommended to\ - \ use `wal2json` plug-in. For more information about `wal2json`\ - \ plug-in read Postgres Source docs." - enum: - - "pgoutput" - - "wal2json" - default: "pgoutput" - order: 1 - replication_slot: - type: "string" - description: "A plug-in logical replication slot." - order: 2 - publication: - type: "string" - description: "A Postgres publication used for consuming changes." - order: 3 - tunnel_method: - type: "object" - title: "SSH Tunnel Method" - description: "Whether to initiate an SSH tunnel before connecting to the\ - \ database, and if so, which kind of authentication to use." - oneOf: - - title: "No Tunnel" - required: - - "tunnel_method" - properties: - tunnel_method: - description: "No ssh tunnel needed to connect to database" - type: "string" - const: "NO_TUNNEL" - order: 0 - - title: "SSH Key Authentication" - required: - - "tunnel_method" - - "tunnel_host" - - "tunnel_port" - - "tunnel_user" - - "ssh_key" - properties: - tunnel_method: - description: "Connect through a jump server tunnel host using username\ - \ and ssh key" - type: "string" - const: "SSH_KEY_AUTH" - order: 0 - tunnel_host: - title: "SSH Tunnel Jump Server Host" - description: "Hostname of the jump server host that allows inbound\ - \ ssh tunnel." - type: "string" - order: 1 - tunnel_port: - title: "SSH Connection Port" - description: "Port on the proxy/jump server that accepts inbound ssh\ - \ connections." - type: "integer" - minimum: 0 - maximum: 65536 - default: 22 - examples: - - "22" - order: 2 - tunnel_user: - title: "SSH Login Username" - description: "OS-level username for logging into the jump server host." - type: "string" - order: 3 - ssh_key: - title: "SSH Private Key" - description: "OS-level user account ssh key credentials for logging\ - \ into the jump server host." - type: "string" - airbyte_secret: true - multiline: true - order: 4 - - title: "Password Authentication" - required: - - "tunnel_method" - - "tunnel_host" - - "tunnel_port" - - "tunnel_user" - - "tunnel_user_password" - properties: - tunnel_method: - description: "Connect through a jump server tunnel host using username\ - \ and password authentication" - type: "string" - const: "SSH_PASSWORD_AUTH" - order: 0 - tunnel_host: - title: "SSH Tunnel Jump Server Host" - description: "Hostname of the jump server host that allows inbound\ - \ ssh tunnel." - type: "string" - order: 1 - tunnel_port: - title: "SSH Connection Port" - description: "Port on the proxy/jump server that accepts inbound ssh\ - \ connections." - type: "integer" - minimum: 0 - maximum: 65536 - default: 22 - examples: - - "22" - order: 2 - tunnel_user: - title: "SSH Login Username" - description: "OS-level username for logging into the jump server host" - type: "string" - order: 3 - tunnel_user_password: - title: "Password" - description: "OS-level password for logging into the jump server host" - type: "string" - airbyte_secret: true - order: 4 - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-prestashop:0.1.0" - spec: - documentationUrl: "https://docsurl.com" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "PrestaShop Spec" - type: "object" - required: - - "url" - - "access_key" - additionalProperties: false - properties: - url: - type: "string" - description: "Shop URL without trailing slash (domain name or IP address)" - access_key: - type: "string" - description: "Your PrestaShop access key. See the docs for info on how to obtain this." - airbyte_secret: true - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-quickbooks-singer:0.1.3" - spec: - documentationUrl: "https://docsurl.com" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Source Quickbooks Singer Spec" - type: "object" - required: - - "client_id" - - "client_secret" - - "refresh_token" - - "realm_id" - - "user_agent" - - "start_date" - - "sandbox" - additionalProperties: false - properties: - client_id: - type: "string" - description: "Identifies which app is making the request. Obtain this value\ - \ from the Keys tab on the app profile via My Apps on the developer site.\ - \ There are two versions of this key: development and production" - client_secret: - description: " Obtain this value from the Keys tab on the app profile via\ - \ My Apps on the developer site. There are two versions of this key: development\ - \ and production" - type: "string" - airbyte_secret: true - refresh_token: - description: "A token used when refreshing the access token." - type: "string" - airbyte_secret: true - realm_id: - description: "Labeled Company ID. The Make API Calls panel is populated\ - \ with the realm id and the current access token" - type: "string" - airbyte_secret: true - user_agent: - type: "string" - description: "Process and email for API logging purposes. Example: tap-quickbooks\ - \ " - start_date: - description: "The default value to use if no bookmark exists for an endpoint\ - \ (rfc3339 date string) E.g, 2021-03-20T00:00:00Z" - type: "string" - pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" - examples: - - "2021-03-20T00:00:00Z" - sandbox: - description: "Development or Production." - type: "boolean" - default: false - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-recharge:0.1.3" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/recharge" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Recharge Spec" - type: "object" - required: - - "start_date" - - "access_token" - additionalProperties: false - properties: - start_date: - type: "string" - description: "The date from which you'd like to replicate data for Recharge\ - \ API, in the format YYYY-MM-DDT00:00:00Z." - examples: - - "2021-05-14T00:00:00Z" - pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" - access_token: - type: "string" - description: "The value of the Access Token generated. See the docs for more\ - \ information" - airbyte_secret: true - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-recurly:0.2.4" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/recurly" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Recurly Source Spec" - type: "object" - required: - - "api_key" - additionalProperties: false - properties: - api_key: - type: "string" - description: "Recurly API Key. See the docs for more information on how to generate this key." - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-redshift:0.3.4" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/destinations/redshift" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Redshift Source Spec" - type: "object" - required: - - "host" - - "port" - - "database" - - "username" - - "password" - additionalProperties: false - properties: - host: - description: "Host Endpoint of the Redshift Cluster (must include the cluster-id,\ - \ region and end with .redshift.amazonaws.com)" - type: "string" - port: - description: "Port of the database." - type: "integer" - minimum: 0 - maximum: 65536 - default: 5439 - examples: - - "5439" - database: - description: "Name of the database." - type: "string" - examples: - - "master" - username: - description: "Username to use to access the database." - type: "string" - password: - description: "Password associated with the username." - type: "string" - airbyte_secret: true - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-s3:0.1.6" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/s3" - changelogUrl: "https://docs.airbyte.io/integrations/sources/s3" - connectionSpecification: - title: "S3 Source Spec" - type: "object" - properties: - dataset: - title: "Dataset" - description: "This source creates one table per connection, this field is\ - \ the name of that table. This should include only letters, numbers, dash\ - \ and underscores. Note that this may be altered according to destination." - pattern: "^([A-Za-z0-9-_]+)$" - type: "string" - path_pattern: - title: "Path Pattern" - description: "Add at least 1 pattern here to match filepaths against. Use\ - \ | to separate multiple patterns. Airbyte uses these patterns to determine\ - \ which files to pick up from the provider storage. See wcmatch.glob to understand pattern syntax (GLOBSTAR\ - \ and SPLIT flags are enabled). Use pattern ** to pick\ - \ up all files." - examples: - - "**" - - "myFolder/myTableFiles/*.csv|myFolder/myOtherTableFiles/*.csv" - type: "string" - schema: - title: "Schema" - description: "Optionally provide a schema to enforce, as a valid JSON string.\ - \ Ensure this is a mapping of { \"column\" : \"type\" },\ - \ where types are valid JSON Schema datatypes. Leave as {} to auto-infer\ - \ the schema." - default: "{}" - examples: - - "{\"column_1\": \"number\", \"column_2\": \"string\", \"column_3\": \"\ - array\", \"column_4\": \"object\", \"column_5\": \"boolean\"}" - type: "string" - format: - title: "Format" - default: "csv" - type: "object" - oneOf: - - title: "csv" - description: "This connector utilises PyArrow (Apache Arrow) for CSV parsing." - type: "object" - properties: - filetype: - title: "Filetype" - const: "csv" - type: "string" - delimiter: - title: "Delimiter" - description: "The character delimiting individual cells in the CSV\ - \ data. This may only be a 1-character string." - default: "," - minLength: 1 - type: "string" - quote_char: - title: "Quote Char" - description: "The character used optionally for quoting CSV values.\ - \ To disallow quoting, make this field blank." - default: "\"" - type: "string" - escape_char: - title: "Escape Char" - description: "The character used optionally for escaping special characters.\ - \ To disallow escaping, leave this field blank." - type: "string" - encoding: - title: "Encoding" - description: "The character encoding of the CSV data. Leave blank\ - \ to default to UTF-8. See list of python encodings for allowable options." - type: "string" - double_quote: - title: "Double Quote" - description: "Whether two quotes in a quoted CSV value denote a single\ - \ quote in the data." - default: true - type: "boolean" - newlines_in_values: - title: "Newlines In Values" - description: "Whether newline characters are allowed in CSV values.\ - \ Turning this on may affect performance. Leave blank to default\ - \ to False." - default: false - type: "boolean" - block_size: - title: "Block Size" - description: "The chunk size in bytes to process at a time in memory\ - \ from each file. If your data is particularly wide and failing\ - \ during schema detection, increasing this should solve it. Beware\ - \ of raising this too high as you could hit OOM errors." - default: 10000 - type: "integer" - additional_reader_options: - title: "Additional Reader Options" - description: "Optionally add a valid JSON string here to provide additional\ - \ options to the csv reader. Mappings must correspond to options\ - \ detailed here. 'column_types' is used internally\ - \ to handle schema so overriding that would likely cause problems." - default: "{}" - examples: - - "{\"timestamp_parsers\": [\"%m/%d/%Y %H:%M\", \"%Y/%m/%d %H:%M\"\ - ], \"strings_can_be_null\": true, \"null_values\": [\"NA\", \"NULL\"\ - ]}" - type: "string" - advanced_options: - title: "Advanced Options" - description: "Optionally add a valid JSON string here to provide additional\ - \ Pyarrow ReadOptions. Specify 'column_names'\ - \ here if your CSV doesn't have header, or if you want to use custom\ - \ column names. 'block_size' and 'encoding' are already used above,\ - \ specify them again here will override the values above." - default: "{}" - examples: - - "{\"column_names\": [\"column1\", \"column2\"]}" - type: "string" - - title: "parquet" - description: "This connector utilises PyArrow (Apache Arrow) for Parquet parsing." - type: "object" - properties: - filetype: - title: "Filetype" - const: "parquet" - type: "string" - buffer_size: - title: "Buffer Size" - description: "Perform read buffering when deserializing individual\ - \ column chunks. By default every group column will be loaded fully\ - \ to memory. This option can help to optimize a work with memory\ - \ if your data is particularly wide or failing during detection\ - \ of OOM errors." - default: 0 - type: "integer" - columns: - title: "Columns" - description: "If you only want to sync a subset of the columns from\ - \ the file(s), add the columns you want here. Leave it empty to\ - \ sync all columns." - type: "array" - items: - type: "string" - batch_size: - title: "Batch Size" - description: "Maximum number of records per batch. Batches may be\ - \ smaller if there aren’t enough rows in the file. This option can\ - \ help to optimize a work with memory if your data is particularly\ - \ wide or failing during detection of OOM errors." - default: 65536 - type: "integer" - provider: - title: "S3: Amazon Web Services" - type: "object" - properties: - bucket: - title: "Bucket" - description: "Name of the S3 bucket where the file(s) exist." - type: "string" - aws_access_key_id: - title: "Aws Access Key Id" - description: "In order to access private Buckets stored on AWS S3, this\ - \ connector requires credentials with the proper permissions. If accessing\ - \ publicly available data, this field is not necessary." - airbyte_secret: true - type: "string" - aws_secret_access_key: - title: "Aws Secret Access Key" - description: "In order to access private Buckets stored on AWS S3, this\ - \ connector requires credentials with the proper permissions. If accessing\ - \ publicly available data, this field is not necessary." - airbyte_secret: true - type: "string" - path_prefix: - title: "Path Prefix" - description: "By providing a path-like prefix (e.g. myFolder/thisTable/)\ - \ under which all the relevant files sit, we can optimise finding\ - \ these in S3. This is optional but recommended if your bucket contains\ - \ many folders/files." - default: "" - type: "string" - endpoint: - title: "Endpoint" - description: "Endpoint to an S3 compatible service. Leave empty to use\ - \ AWS." - default: "" - type: "string" - use_ssl: - title: "Use Ssl" - description: "Is remote server using secure SSL/TLS connection" - type: "boolean" - verify_ssl_cert: - title: "Verify Ssl Cert" - description: "Allow self signed certificates" - type: "boolean" - required: - - "bucket" - required: - - "dataset" - - "path_pattern" - - "provider" - supportsIncremental: true - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: - - "overwrite" - - "append" - - "append_dedup" -- dockerImage: "airbyte/source-salesloft:0.1.0" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/salesloft" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Source Salesloft Spec" - type: "object" - required: - - "client_id" - - "client_secret" - - "refresh_token" - - "start_date" - additionalProperties: false - properties: - client_id: - type: "string" - description: "Salesloft client id." - client_secret: - type: "string" - description: "Salesloft client secret." - airbyte_secret: true - refresh_token: - type: "string" - description: "Salesloft refresh token." - airbyte_secret: true - start_date: - type: "string" - description: "The date from which you'd like to replicate data for Salesloft\ - \ API, in the format YYYY-MM-DDT00:00:00Z. All data generated after this\ - \ date will be replicated." - examples: - - "2020-11-16T00:00:00Z" - pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-salesforce:0.1.2" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/salesforce" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Salesforce Source Spec" - type: "object" - required: - - "client_id" - - "client_secret" - - "refresh_token" - - "start_date" - - "api_type" - additionalProperties: false - properties: - client_id: - description: "The Consumer Key that can be found when viewing your app in\ - \ Salesforce" - type: "string" - client_secret: - description: "The Consumer Secret that can be found when viewing your app\ - \ in Salesforce" - type: "string" - airbyte_secret: true - refresh_token: - description: "Salesforce Refresh Token used for Airbyte to access your Salesforce\ - \ account. If you don't know what this is, follow this guide to retrieve it." - type: "string" - airbyte_secret: true - start_date: - description: "UTC date and time in the format 2017-01-25T00:00:00Z. Any\ - \ data before this date will not be replicated." - type: "string" - pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" - examples: - - "2021-07-25T00:00:00Z" - is_sandbox: - description: "Whether or not the the app is in a Salesforce sandbox. If\ - \ you do not know what this, assume it is false. We provide more info\ - \ on this field in the docs." - type: "boolean" - default: false - api_type: - description: "Unless you know that you are transferring a very small amount\ - \ of data, prefer using the BULK API. This will help avoid using up all\ - \ of your API call quota with Salesforce. Valid values are BULK or REST." - type: "string" - enum: - - "BULK" - - "REST" - default: "BULK" - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] - authSpecification: - auth_type: "oauth2.0" - oauth2Specification: - rootObject: [] - oauthFlowInitParameters: - - - "client_id" - - - "client_secret" - oauthFlowOutputParameters: - - - "refresh_token" -- dockerImage: "airbyte/source-sendgrid:0.2.6" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/sendgrid" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Sendgrid Spec" - type: "object" - required: - - "apikey" - additionalProperties: false - properties: - apikey: - type: "string" - description: "API Key, use admin to generate this key." - start_time: - type: "integer" - description: "Start time in timestamp integer format. Any data before this\ - \ timestamp will not be replicated." - examples: - - 1558359837 - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-shopify:0.1.21" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/shopify" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Shopify Source CDK Specifications" - type: "object" - required: - - "shop" - - "start_date" - - "auth_method" - additionalProperties: false - properties: - shop: - type: "string" - description: "The name of the shopify store. For https://EXAMPLE.myshopify.com,\ - \ the shop name is 'EXAMPLE'." - start_date: - type: "string" - description: "The date you would like to replicate data. Format: YYYY-MM-DD." - examples: - - "2021-01-01" - pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" - auth_method: - title: "Shopify Authorization Method" - type: "object" - oneOf: - - type: "object" - title: "OAuth2.0" - required: - - "client_id" - - "client_secret" - - "access_token" - properties: - auth_method: - type: "string" - const: "access_token" - enum: - - "access_token" - default: "access_token" - order: 0 - client_id: - type: "string" - description: "The API Key of the Shopify developer application." - airbyte_secret: true - client_secret: - type: "string" - description: "The API Secret the Shopify developer application." - airbyte_secret: true - access_token: - type: "string" - description: "Access Token for making authenticated requests." - airbyte_secret: true - - title: "API Password" - type: "object" - required: - - "api_password" - properties: - auth_method: - type: "string" - const: "api_password" - enum: - - "api_password" - default: "api_password" - order: 0 - api_password: - type: "string" - description: "The API PASSWORD for your private application in `Shopify`\ - \ shop." - airbyte_secret: true - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] - authSpecification: - auth_type: "oauth2.0" - oauth2Specification: - rootObject: - - "auth_method" - - "0" - oauthFlowInitParameters: - - - "client_id" - - - "client_secret" - oauthFlowOutputParameters: - - - "access_token" -- dockerImage: "airbyte/source-shortio:0.1.0" - spec: - documentationUrl: "https://developers.short.io/reference" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Shortio Spec" - type: "object" - required: - - "domain_id" - - "secret_key" - - "start_date" - additionalProperties: false - properties: - domain_id: - type: "string" - description: "Domain ID" - airbyte_secret: false - secret_key: - type: "string" - description: "Short.io Secret key" - airbyte_secret: true - start_date: - type: "string" - description: "Start Date, YYYY-MM-DD" - airbyte_secret: false - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-slack:0.1.12" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/slack" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Slack Spec" - type: "object" - required: - - "start_date" - - "lookback_window" - - "join_channels" - additionalProperties: true - properties: - start_date: - type: "string" - pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" - description: "UTC date and time in the format 2017-01-25T00:00:00Z. Any\ - \ data before this date will not be replicated." - examples: - - "2017-01-25T00:00:00Z" - title: "Start Date" - lookback_window: - type: "integer" - title: "Threads Lookback window (Days)" - description: "How far into the past to look for messages in threads." - examples: - - 7 - - 14 - join_channels: - type: "boolean" - default: true - title: "Join all channels" - description: "Whether to join all channels or to sync data only from channels\ - \ the bot is already in. If false, you'll need to manually add the bot\ - \ to all the channels from which you'd like to sync messages. " - credentials: - title: "Authentication mechanism" - description: "Choose how to authenticate into Slack" - type: "object" - oneOf: - - type: "object" - title: "Sign in via Slack (OAuth)" - required: - - "access_token" - - "client_id" - - "client_secret" - - "option_title" - properties: - option_title: - type: "string" - const: "Default OAuth2.0 authorization" - client_id: - title: "Client ID" - description: "Slack client_id. See our docs if you need help finding this id." - type: "string" - examples: - - "slack-client-id-example" - client_secret: - title: "Client Secret" - description: "Slack client_secret. See our docs if you need help finding this secret." - type: "string" - examples: - - "slack-client-secret-example" - airbyte_secret: true - access_token: - title: "Access token" - description: "Slack access_token. See our docs if you need help generating the token." - type: "string" - examples: - - "slack-access-token-example" - airbyte_secret: true - refresh_token: - title: "Refresh token" - description: "Slack refresh_token. See our docs if you need help generating the token." - type: "string" - examples: - - "slack-refresh-token-example" - airbyte_secret: true - order: 0 - - type: "object" - title: "API Token" - required: - - "api_token" - - "option_title" - properties: - option_title: - type: "string" - const: "API Token Credentials" - api_token: - type: "string" - title: "API Token" - description: "A Slack bot token. See the docs for instructions on how to generate it." - airbyte_secret: true - order: 1 - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] - authSpecification: - auth_type: "oauth2.0" - oauth2Specification: - rootObject: - - "credentials" - - "0" - oauthFlowInitParameters: - - - "client_id" - - - "client_secret" - oauthFlowOutputParameters: - - - "access_token" - - - "refresh_token" -- dockerImage: "airbyte/source-smartsheets:0.1.5" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/smartsheets" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Smartsheets Source Spec" - type: "object" - required: - - "access_token" - - "spreadsheet_id" - additionalProperties: false - properties: - access_token: - title: "API Access token" - description: "Found in Profile > Apps & Integrations > API Access within\ - \ Smartsheet app" - type: "string" - airbyte_secret: true - spreadsheet_id: - title: "Smartsheet ID" - description: "Found in File > Properties" - type: "string" - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-snapchat-marketing:0.1.1" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/snapchat-marketing" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Snapchat Marketing Spec" - type: "object" - required: - - "client_id" - - "client_secret" - - "refresh_token" - additionalProperties: false - properties: - client_id: - title: "Client ID" - type: "string" - description: "The Snapchat Client ID for API credentials." - airbyte_secret: true - client_secret: - title: "Client Secret" - type: "string" - description: "The Client Secret for a given Client ID." - airbyte_secret: true - refresh_token: - title: "API Refresh Token" - type: "string" - description: "Refresh Token to get next api key after expiration. Is given\ - \ with API Key" - airbyte_secret: true - start_date: - title: "Start Date" - type: "string" - description: "The start date to sync data. Leave blank for full sync. Format:\ - \ YYYY-MM-DD." - examples: - - "2021-01-01" - default: "1970-01-01" - pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-snowflake:0.1.2" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/snowflake" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Snowflake Source Spec" - type: "object" - required: - - "host" - - "role" - - "warehouse" - - "database" - - "schema" - - "username" - - "password" - additionalProperties: false - properties: - host: - description: "Host domain of the snowflake instance (must include the account,\ - \ region, cloud environment, and end with snowflakecomputing.com)." - examples: - - "accountname.us-east-2.aws.snowflakecomputing.com" - type: "string" - title: "Account name" - order: 0 - role: - description: "The role you created for Airbyte to access Snowflake." - examples: - - "AIRBYTE_ROLE" - type: "string" - title: "Role" - order: 1 - warehouse: - description: "The warehouse you created for Airbyte to access data into." - examples: - - "AIRBYTE_WAREHOUSE" - type: "string" - title: "Warehouse" - order: 2 - database: - description: "The database you created for Airbyte to access data into." - examples: - - "AIRBYTE_DATABASE" - type: "string" - title: "Database" - order: 3 - schema: - description: "The source Snowflake schema tables." - examples: - - "AIRBYTE_SCHEMA" - type: "string" - title: "Schema" - order: 4 - username: - description: "The username you created to allow Airbyte to access the database." - examples: - - "AIRBYTE_USER" - type: "string" - title: "Username" - order: 5 - password: - description: "Password associated with the username." - type: "string" - airbyte_secret: true - title: "Password" - order: 6 - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-square:0.1.1" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/square" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Square Source CDK Specifications" - type: "object" - required: - - "api_key" - - "is_sandbox" - additionalProperties: false - properties: - api_key: - type: "string" - description: "The API key for a Square application" - airbyte_secret: true - is_sandbox: - type: "boolean" - description: "Determines the sandbox (true) or production (false) API version" - examples: - - true - - false - default: true - start_date: - type: "string" - description: "The start date to sync data. Leave blank for full sync. Format:\ - \ YYYY-MM-DD." - examples: - - "2021-01-01" - default: "1970-01-01" - pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" - include_deleted_objects: - type: "boolean" - description: "In some streams there is and option to include deleted objects\ - \ (Items, Categories, Discounts, Taxes)" - examples: - - true - - false - default: false - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-strava:0.1.0" - spec: - documentationUrl: "https://docsurl.com" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Strava Spec" - type: "object" - required: - - "client_id" - - "client_secret" - - "refresh_token" - - "athlete_id" - - "start_date" - additionalProperties: false - properties: - client_id: - type: "string" - description: "Strava Client ID" - pattern: "^[0-9_\\-]+$" - examples: - - "12345" - client_secret: - type: "string" - description: "Strava Client Secret" - pattern: "^[0-9a-fA-F]+$" - examples: - - "fc6243f283e51f6ca989aab298b17da125496f50" - airbyte_secret: true - refresh_token: - type: "string" - description: "Strava Refresh Token with activity:read_all permissions" - pattern: "^[0-9a-fA-F]+$" - examples: - - "fc6243f283e51f6ca989aab298b17da125496f50" - airbyte_secret: true - athlete_id: - type: "integer" - description: "Strava Athlete ID" - pattern: "^[0-9_\\-]+$" - examples: - - "17831421" - start_date: - type: "string" - description: "Start Query Timestamp in UTC" - examples: - - "2016-12-31 23:59:59" - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-stripe:0.1.21" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/stripe" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Stripe Source Spec" - type: "object" - required: - - "client_secret" - - "account_id" - - "start_date" - additionalProperties: false - properties: - client_secret: - type: "string" - pattern: "^(s|r)k_(live|test)_[a-zA-Z0-9]+$" - description: "Stripe API key (usually starts with 'sk_live_'; find yours\ - \ here)." - airbyte_secret: true - account_id: - type: "string" - description: "Your Stripe account ID (starts with 'acct_', find yours here)." - start_date: - type: "string" - pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" - description: "UTC date and time in the format 2017-01-25T00:00:00Z. Any\ - \ data before this date will not be replicated." - examples: - - "2017-01-25T00:00:00Z" - lookback_window_days: - type: "integer" - title: "Lookback Window (in days)" - default: 0 - minimum: 0 - description: "When set, the connector will always reload data from the past\ - \ N days, where N is the value set here. This is useful if your data is\ - \ updated after creation." - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-surveymonkey:0.1.2" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/surveymonkey" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "SurveyMonkey Spec" - type: "object" - required: - - "start_date" - additionalProperties: true - properties: - start_date: - title: "Start Date" - type: "string" - description: "The date from which you'd like to replicate the data" - pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z?$" - examples: - - "2021-01-01T00:00:00Z" - credentials: - type: "object" - title: "Authentication Type" - oneOf: - - title: "Authenticate via OAuth" - type: "object" - required: - - "client_id" - - "client_secret" - - "access_token" - - "auth_type" - properties: - auth_type: - type: "string" - const: "OAuth" - enum: - - "OAuth" - default: "OAuth" - order: 0 - client_id: - title: "Client ID" - type: "string" - description: "The Client ID of your developer application" - airbyte_secret: true - client_secret: - title: "Client Secret" - type: "string" - description: "The client secret of your developer application" - airbyte_secret: true - access_token: - title: "Access Token" - type: "string" - description: "An access token generated using the above client ID\ - \ and secret" - airbyte_secret: true - - type: "object" - title: "Token Authentication" - additionalProperties: false - required: - - "access_token" - - "auth_type" - properties: - auth_type: - type: "string" - const: "Token" - enum: - - "Token" - default: "Token" - order: 0 - access_token: - type: "string" - airbyte_secret: true - description: "API Token. See the docs for information on how to generate this key." - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] - authSpecification: - auth_type: "oauth2.0" - oauth2Specification: - rootObject: - - "credentials" - - "0" - oauthFlowInitParameters: - - - "client_id" - - - "client_secret" - oauthFlowOutputParameters: - - - "access_token" -- dockerImage: "airbyte/source-tempo:0.2.3" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Tempo Spec" - type: "object" - required: - - "api_token" - additionalProperties: false - properties: - api_token: - type: "string" - description: "Tempo API Token." - airbyte_secret: true - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-tiktok-marketing:0.1.0" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/tiktok-marketing" - changelogUrl: "https://docs.airbyte.io/integrations/sources/tiktok-marketing" - connectionSpecification: - title: "TikTok Marketing Source Spec" - type: "object" - properties: - environment: - title: "Environment" - default: "Production" - oneOf: - - title: "Production" - type: "object" - properties: - environment: - title: "Environment" - const: "prod" - type: "string" - app_id: - title: "App Id" - description: "The App id applied by the developer." - type: "string" - secret: - title: "Secret" - description: "The private key of the developer's application." - airbyte_secret: true - type: "string" - required: - - "app_id" - - "secret" - - title: "Sandbox" - type: "object" - properties: - environment: - title: "Environment" - const: "sandbox" - type: "string" - advertiser_id: - title: "Advertiser Id" - description: "The Advertiser ID which generated for the developer's\ - \ Sandbox application." - type: "string" - required: - - "advertiser_id" - type: "object" - access_token: - title: "Access Token" - description: "Long-term Authorized Access Token." - airbyte_secret: true - type: "string" - start_date: - title: "Start Date" - description: "Start Date in format: YYYY-MM-DD." - default: "01-09-2016" - pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" - type: "string" - required: - - "access_token" - supportsIncremental: true - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: - - "overwrite" - - "append" - - "append_dedup" -- dockerImage: "airbyte/source-trello:0.1.1" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/trello" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Trello Spec" - type: "object" - required: - - "token" - - "key" - - "start_date" - additionalProperties: true - properties: - token: - type: "string" - title: "API token" - description: "A Trello token. See the docs for instructions on how to generate it." - airbyte_secret: true - key: - type: "string" - title: "API key" - description: "A Trello token. See the docs for instructions on how to generate it." - airbyte_secret: true - start_date: - type: "string" - title: "Start date" - pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}.[0-9]{3}Z$" - description: "UTC date and time in the format 2017-01-25T00:00:00Z. Any\ - \ data before this date will not be replicated." - examples: - - "2021-03-01T00:00:00.000Z" - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] - authSpecification: - auth_type: "oauth2.0" - oauth2Specification: - rootObject: [] - oauthFlowInitParameters: [] - oauthFlowOutputParameters: - - - "token" - - - "key" -- dockerImage: "airbyte/source-twilio:0.1.1" - spec: - documentationUrl: "https://hub.docker.com/r/airbyte/source-twilio" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Twilio Spec" - type: "object" - required: - - "account_sid" - - "auth_token" - - "start_date" - additionalProperties: false - properties: - account_sid: - title: "Account ID" - description: "Twilio account SID" - airbyte_secret: true - type: "string" - auth_token: - title: "Auth Token" - description: "Twilio Auth Token." - airbyte_secret: true - type: "string" - start_date: - title: "Replication Start Date" - description: "UTC date and time in the format 2020-10-01T00:00:00Z. Any\ - \ data before this date will not be replicated." - pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" - examples: - - "2020-10-01T00:00:00Z" - type: "string" - supportsIncremental: true - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: - - "append" -- dockerImage: "airbyte/source-typeform:0.1.2" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/typeform" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Typeform Spec" - type: "object" - required: - - "token" - - "start_date" - additionalProperties: true - properties: - start_date: - type: "string" - description: "The date you would like to replicate data. Format: YYYY-MM-DDTHH:mm:ss[Z]." - examples: - - "2020-01-01T00:00:00Z" - pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" - token: - type: "string" - description: "The API Token for a Typeform account." - airbyte_secret: true - form_ids: - title: "Form IDs to replicate" - description: "When this parameter is set, the connector will replicate data\ - \ only from the input forms. Otherwise, all forms in your Typeform account\ - \ will be replicated. You can find form IDs in your form URLs. For example,\ - \ in the URL \"https://mysite.typeform.com/to/u6nXL7\" the form_id is\ - \ u6nXL7. You can find form URLs on Share panel" - type: "array" - items: - type: "string" - uniqueItems: true - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-us-census:0.1.0" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/us-census" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "https://api.census.gov/ Source Spec" - type: "object" - required: - - "api_key" - - "query_path" - additionalProperties: false - properties: - query_params: - type: "string" - description: "The query parameters portion of the GET request, without the\ - \ api key" - pattern: "^\\w+=[\\w,:*]+(&(?!key)\\w+=[\\w,:*]+)*$" - examples: - - "get=NAME,NAICS2017_LABEL,LFO_LABEL,EMPSZES_LABEL,ESTAB,PAYANN,PAYQTR1,EMP&for=us:*&NAICS2017=72&LFO=001&EMPSZES=001" - - "get=MOVEDIN,GEOID1,GEOID2,MOVEDOUT,FULL1_NAME,FULL2_NAME,MOVEDNET&for=county:*" - query_path: - type: "string" - description: "The path portion of the GET request" - pattern: "^data(\\/[\\w\\d]+)+$" - examples: - - "data/2019/cbp" - - "data/2018/acs" - - "data/timeseries/healthins/sahie" - api_key: - type: "string" - description: "Your API Key. Get your key here." - airbyte_secret: true - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-zendesk-chat:0.1.3" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/zendesk-chat" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Zendesk Chat Spec" - type: "object" - required: - - "start_date" - - "access_token" - additionalProperties: false - properties: - start_date: - type: "string" - description: "The date from which you'd like to replicate data for Zendesk\ - \ Chat API, in the format YYYY-MM-DDT00:00:00Z." - examples: - - "2021-02-01T00:00:00Z" - pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" - access_token: - type: "string" - description: "The value of the Access Token generated. See the docs for\ - \ more information" - airbyte_secret: true - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-zendesk-sunshine:0.1.0" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/zendesk_sunshine" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Zendesk Sunshine Spec" - type: "object" - required: - - "api_token" - - "email" - - "start_date" - - "subdomain" - additionalProperties: false - properties: - api_token: - type: "string" - airbyte_secret: true - description: "API Token. See the docs for information on how to generate this key." - email: - type: "string" - description: "The user email for your Zendesk account" - subdomain: - type: "string" - description: "The subdomain for your Zendesk Account" - start_date: - title: "Start Date" - type: "string" - description: "The date from which you'd like to replicate the data" - pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" - examples: "2021-01-01T00:00:00.000000Z" - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-zendesk-support:0.1.3" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/zendesk-support" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Source Zendesk Support Spec" - type: "object" - required: - - "start_date" - - "subdomain" - - "auth_method" - additionalProperties: false - properties: - start_date: - type: "string" - description: "The date from which you'd like to replicate data for Zendesk\ - \ Support API, in the format YYYY-MM-DDT00:00:00Z. All data generated\ - \ after this date will be replicated." - examples: - - "2020-10-15T00:00:00Z" - pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" - subdomain: - type: "string" - description: "The subdomain for your Zendesk Support" - auth_method: - title: "ZenDesk Authorization Method" - type: "object" - default: "api_token" - description: "Zendesk service provides 2 auth method: API token and oAuth2.\ - \ Now only the first one is available. Another one will be added in the\ - \ future" - oneOf: - - title: "API Token" - type: "object" - required: - - "email" - - "api_token" - additionalProperties: false - properties: - auth_method: - type: "string" - const: "api_token" - email: - type: "string" - description: "The user email for your Zendesk account" - api_token: - type: "string" - description: "The value of the API token generated. See the docs\ - \ for more information" - airbyte_secret: true - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-zendesk-talk:0.1.2" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/zendesk-talk" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Zendesk Talk Spec" - type: "object" - required: - - "start_date" - - "subdomain" - - "access_token" - - "email" - additionalProperties: false - properties: - start_date: - type: "string" - description: "The date from which you'd like to replicate data for Zendesk\ - \ Talk API, in the format YYYY-MM-DDT00:00:00Z." - examples: - - "2021-04-01T00:00:00Z" - pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" - subdomain: - type: "string" - description: "The subdomain for your Zendesk Talk" - access_token: - type: "string" - description: "The value of the API token generated. See the docs for more information" - airbyte_secret: true - email: - type: "string" - description: "The user email for your Zendesk account" - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-zoom-singer:0.2.4" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/zoom" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Source Zoom Singer Spec" - type: "object" - required: - - "jwt" - additionalProperties: false - properties: - jwt: - title: "JWT Token" - type: "string" - description: "Zoom JWT Token. See the docs for more information on how to obtain this key." - airbyte_secret: true - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-zuora:0.1.3" - spec: - documentationUrl: "https://docs.airbyte.io/integrations/sources/zuora" - connectionSpecification: - $schema: "http://json-schema.org/draft-07/schema#" - title: "Zuora Connector Configuration" - type: "object" - required: - - "start_date" - - "tenant_endpoint" - - "data_query" - - "client_id" - - "client_secret" - properties: - start_date: - type: "string" - title: "Start Date" - description: "Start Date in format: YYYY-MM-DD" - pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" - window_in_days: - type: "string" - title: "Query Window (in days)" - description: "The amount of days for each data-chunk begining from start_date.\ - \ Bigger the value - faster the fetch. (0.1 - as for couple of hours,\ - \ 1 - as for a Day; 364 - as for a Year)." - examples: - - "0.5" - - "1" - - "30" - - "60" - - "90" - - "120" - - "200" - - "364" - pattern: "^(0|[1-9]\\d*)(\\.\\d+)?$" - default: "90" - tenant_endpoint: - title: "Tenant Endpoint Location" - type: "string" - description: "Please choose the right endpoint where your Tenant is located.\ - \ More info by this Link" - enum: - - "US Production" - - "US Cloud Production" - - "US API Sandbox" - - "US Cloud API Sandbox" - - "US Central Sandbox" - - "US Performance Test" - - "EU Production" - - "EU API Sandbox" - - "EU Central Sandbox" - data_query: - title: "Data Query Type" - type: "string" - description: "Choose between `Live`, or `Unlimited` - the optimized, replicated\ - \ database at 12 hours freshness for high volume extraction Link" - enum: - - "Live" - - "Unlimited" - default: "Live" - client_id: - type: "string" - title: "Client ID" - description: "Your OAuth user Client ID" - airbyte_secret: true - client_secret: - type: "string" - title: "Client Secret" - description: "Your OAuth user Client Secret" - airbyte_secret: true - supportsNormalization: false - supportsDBT: false - supported_destination_sync_modes: [] diff --git a/airbyte-config/models/build.gradle b/airbyte-config/models/build.gradle index d62c88c7e1633..271b3fe685e7e 100644 --- a/airbyte-config/models/build.gradle +++ b/airbyte-config/models/build.gradle @@ -7,11 +7,10 @@ plugins { dependencies { implementation project(':airbyte-json-validation') implementation project(':airbyte-protocol:models') - implementation project(':airbyte-commons') } jsonSchema2Pojo { - sourceType = SourceType.YAMLSCHEMA + sourceType = SourceType.YAMLSCHEMA source = files("${sourceSets.main.output.resourcesDir}/types") targetDirectory = new File(project.buildDir, 'generated/src/gen/java/') diff --git a/airbyte-config/models/src/main/resources/types/DockerImageSpec.yaml b/airbyte-config/models/src/main/resources/types/DockerImageSpec.yaml deleted file mode 100644 index 0d3becf8e74c6..0000000000000 --- a/airbyte-config/models/src/main/resources/types/DockerImageSpec.yaml +++ /dev/null @@ -1,16 +0,0 @@ ---- -"$schema": http://json-schema.org/draft-07/schema# -"$id": https://github.com/airbytehq/airbyte/blob/master/airbyte-config/models/src/main/resources/types/DockerImageSpec.yaml -title: DockerImageSpec -description: docker image name and the connector specification associated with it -type: object -required: - - dockerImage - - spec -additionalProperties: false -properties: - dockerImage: - type: string - spec: - type: object - existingJavaType: io.airbyte.protocol.models.ConnectorSpecification diff --git a/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/DatabaseConfigPersistence.java b/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/DatabaseConfigPersistence.java index 2181bcb162640..19035ed42295d 100644 --- a/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/DatabaseConfigPersistence.java +++ b/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/DatabaseConfigPersistence.java @@ -366,14 +366,7 @@ ConnectorCounter updateConnectorDefinitions(final DSLContext ctx, final ConnectorInfo connectorInfo = connectorRepositoryToIdVersionMap.get(repository); final JsonNode currentDefinition = connectorInfo.definition; - - // todo (lmossman) - this logic to remove the "spec" field is temporary; it is necessary to avoid - // breaking users who are actively using an old connector version, otherwise specs from the most - // recent connector versions may be inserted into the db which could be incompatible with the - // version they are actually using. - // Once the faux major version bump has been merged, this "new field" logic will be removed - // entirely. - final Set newFields = Sets.difference(getNewFields(currentDefinition, latestDefinition), Set.of("spec")); + final Set newFields = getNewFields(currentDefinition, latestDefinition); // Process connector in use if (connectorRepositoriesInUse.contains(repository)) { diff --git a/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/YamlSeedConfigPersistence.java b/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/YamlSeedConfigPersistence.java index 2902d15ef1e97..3bca71d57c2db 100644 --- a/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/YamlSeedConfigPersistence.java +++ b/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/YamlSeedConfigPersistence.java @@ -5,10 +5,8 @@ package io.airbyte.config.persistence; import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.node.ObjectNode; import com.google.common.collect.ImmutableMap; import com.google.common.io.Resources; -import io.airbyte.commons.docker.DockerUtils; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.util.MoreIterators; import io.airbyte.commons.yaml.Yamls; @@ -21,7 +19,6 @@ import java.nio.charset.StandardCharsets; import java.util.List; import java.util.Map; -import java.util.Map.Entry; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -48,40 +45,11 @@ public static YamlSeedConfigPersistence get(final Class seedDefinitionsResour return new YamlSeedConfigPersistence(seedDefinitionsResourceClass); } - private YamlSeedConfigPersistence(final Class seedResourceClass) throws IOException { - final Map sourceDefinitionConfigs = getConfigs(seedResourceClass, SeedType.STANDARD_SOURCE_DEFINITION); - final Map sourceSpecConfigs = getConfigs(seedResourceClass, SeedType.SOURCE_SPEC); - final Map fullSourceDefinitionConfigs = sourceDefinitionConfigs.entrySet().stream() - .collect(Collectors.toMap(Entry::getKey, e -> mergeSpecIntoDefinition(e.getValue(), sourceSpecConfigs))); - - final Map destinationDefinitionConfigs = getConfigs(seedResourceClass, SeedType.STANDARD_DESTINATION_DEFINITION); - final Map destinationSpecConfigs = getConfigs(seedResourceClass, SeedType.DESTINATION_SPEC); - final Map fullDestinationDefinitionConfigs = destinationDefinitionConfigs.entrySet().stream() - .collect(Collectors.toMap(Entry::getKey, e -> mergeSpecIntoDefinition(e.getValue(), destinationSpecConfigs))); - + private YamlSeedConfigPersistence(final Class seedDefinitionsResourceClass) throws IOException { this.allSeedConfigs = ImmutableMap.>builder() - .put(SeedType.STANDARD_SOURCE_DEFINITION, fullSourceDefinitionConfigs) - .put(SeedType.STANDARD_DESTINATION_DEFINITION, fullDestinationDefinitionConfigs).build(); - } - - /** - * Merges the corresponding spec JSON into the definition JSON. This is necessary because specs are - * stored in a separate resource file from definitions. - * - * @param definitionJson JSON of connector definition that is missing a spec - * @param specConfigs map of docker image to JSON of docker image/connector spec pair - * @return JSON of connector definition including the connector spec - */ - private JsonNode mergeSpecIntoDefinition(final JsonNode definitionJson, final Map specConfigs) { - final String dockerImage = DockerUtils.getTaggedImageName( - definitionJson.get("dockerRepository").asText(), - definitionJson.get("dockerImageTag").asText()); - final JsonNode specConfigJson = specConfigs.get(dockerImage); - if (specConfigJson == null || specConfigJson.get("spec") == null) { - throw new UnsupportedOperationException(String.format("There is no seed spec for docker image %s", dockerImage)); - } - ((ObjectNode) definitionJson).set("spec", specConfigJson.get("spec")); - return definitionJson; + .put(SeedType.STANDARD_SOURCE_DEFINITION, getConfigs(seedDefinitionsResourceClass, SeedType.STANDARD_SOURCE_DEFINITION)) + .put(SeedType.STANDARD_DESTINATION_DEFINITION, getConfigs(seedDefinitionsResourceClass, SeedType.STANDARD_DESTINATION_DEFINITION)) + .build(); } @SuppressWarnings("UnstableApiUsage") diff --git a/airbyte-config/persistence/src/test/java/io/airbyte/config/persistence/YamlSeedConfigPersistenceTest.java b/airbyte-config/persistence/src/test/java/io/airbyte/config/persistence/YamlSeedConfigPersistenceTest.java index a6f261628046c..8a740ba535688 100644 --- a/airbyte-config/persistence/src/test/java/io/airbyte/config/persistence/YamlSeedConfigPersistenceTest.java +++ b/airbyte-config/persistence/src/test/java/io/airbyte/config/persistence/YamlSeedConfigPersistenceTest.java @@ -15,7 +15,6 @@ import io.airbyte.config.StandardSync; import io.airbyte.config.StandardWorkspace; import java.io.IOException; -import java.net.URI; import java.util.Collections; import java.util.Map; import java.util.stream.Stream; @@ -42,7 +41,6 @@ public void testGetConfig() throws Exception { assertEquals("airbyte/source-mysql", mysqlSource.getDockerRepository()); assertEquals("https://docs.airbyte.io/integrations/sources/mysql", mysqlSource.getDocumentationUrl()); assertEquals("mysql.svg", mysqlSource.getIcon()); - assertEquals(URI.create("https://docs.airbyte.io/integrations/sources/mysql"), mysqlSource.getSpec().getDocumentationUrl()); // destination final String s3DestinationId = "4816b78f-1489-44c1-9060-4b19d5fa9362"; @@ -52,16 +50,13 @@ public void testGetConfig() throws Exception { assertEquals("S3", s3Destination.getName()); assertEquals("airbyte/destination-s3", s3Destination.getDockerRepository()); assertEquals("https://docs.airbyte.io/integrations/destinations/s3", s3Destination.getDocumentationUrl()); - assertEquals(URI.create("https://docs.airbyte.io/integrations/destinations/s3"), s3Destination.getSpec().getDocumentationUrl()); } @Test public void testGetInvalidConfig() { - assertThrows( - UnsupportedOperationException.class, + assertThrows(UnsupportedOperationException.class, () -> PERSISTENCE.getConfig(ConfigSchema.STANDARD_SYNC, "invalid_id", StandardSync.class)); - assertThrows( - ConfigNotFoundException.class, + assertThrows(ConfigNotFoundException.class, () -> PERSISTENCE.getConfig(ConfigSchema.STANDARD_SOURCE_DEFINITION, "invalid_id", StandardWorkspace.class)); } diff --git a/airbyte-config/specs/README.md b/airbyte-config/specs/README.md deleted file mode 100644 index 8d043e1ec9729..0000000000000 --- a/airbyte-config/specs/README.md +++ /dev/null @@ -1,16 +0,0 @@ -# Generating Seed Connector Specs - -The catalog of seeded connector definitions is stored and manually updated in the `airbyte-config/init/src/main/resources/seed/*_definitions.yaml` -files. These manually-maintained connector definitions intentionally _do not_ contain the connector specs, in an effort to keep these files -human-readable and easily-editable, and because specs can be automatically fetched. - -This automatic fetching of connector specs is the goal of the SeedConnectorSpecGenerator. This class reads the connector definitions in -the `airbyte-config/init/src/main/resources/seed/*_definitions.yaml` files, fetches the corresponding specs from the GCS bucket cache, and writes the -specs to the `airbyte-config/init/src/main/resources/seed/*_specs.yaml` files. See the -[SeedConnectorSpecGenerator](src/main/java/io/airbyte/config/specs/SeedConnectorSpecGenerator.java) class for more details. - -Therefore, whenever a connector definition is updated in the `airbyte-config/init/src/main/resources/seed/*_definitions.yaml` files, the -SeedConnectorSpecGenerator should be re-ran to generate the updated connector specs files. To do so, -run `./gradlew :airbyte-config:init:processResources`, or just build the platform project, and commit the changes to your PR. If you do not do this, -the build in the CI will fail because there will be a diff in the generated files as you have not checked in the changes that were applied by the -generator. diff --git a/airbyte-config/specs/build.gradle b/airbyte-config/specs/build.gradle deleted file mode 100644 index 91d1fd0921706..0000000000000 --- a/airbyte-config/specs/build.gradle +++ /dev/null @@ -1,24 +0,0 @@ -plugins { - id 'java' -} - -dependencies { - implementation 'commons-cli:commons-cli:1.4' - - implementation project(':airbyte-commons') - implementation project(':airbyte-commons-cli') - implementation project(':airbyte-config:models') - implementation project(':airbyte-protocol:models') - implementation project(':airbyte-json-validation') -} - -task generateSeedConnectorSpecs(type: JavaExec, dependsOn: compileJava) { - classpath = sourceSets.main.runtimeClasspath - - mainClass = 'io.airbyte.config.specs.SeedConnectorSpecGenerator' - - args '--seed-root' - args new File(project(":airbyte-config:init").projectDir, '/src/main/resources/seed') -} - -project(":airbyte-config:init").tasks.processResources.dependsOn(generateSeedConnectorSpecs) diff --git a/airbyte-config/specs/src/main/java/io/airbyte/config/specs/GcsBucketSpecFetcher.java b/airbyte-config/specs/src/main/java/io/airbyte/config/specs/GcsBucketSpecFetcher.java deleted file mode 100644 index 832326c551c46..0000000000000 --- a/airbyte-config/specs/src/main/java/io/airbyte/config/specs/GcsBucketSpecFetcher.java +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (c) 2021 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.config.specs; - -import com.fasterxml.jackson.databind.JsonNode; -import com.google.api.client.util.Preconditions; -import com.google.cloud.storage.Blob; -import com.google.cloud.storage.Storage; -import io.airbyte.commons.json.Jsons; -import io.airbyte.protocol.models.AirbyteProtocolSchema; -import io.airbyte.protocol.models.ConnectorSpecification; -import io.airbyte.validation.json.JsonSchemaValidator; -import io.airbyte.validation.json.JsonValidationException; -import java.nio.charset.StandardCharsets; -import java.nio.file.Path; -import java.util.Optional; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class GcsBucketSpecFetcher { - - private static final Logger LOGGER = LoggerFactory.getLogger(GcsBucketSpecFetcher.class); - - private final Storage storage; - private final String bucketName; - - public GcsBucketSpecFetcher(final Storage storage, final String bucketName) { - this.storage = storage; - this.bucketName = bucketName; - } - - public String getBucketName() { - return bucketName; - } - - public Optional attemptFetch(final String dockerImage) { - final String[] dockerImageComponents = dockerImage.split(":"); - Preconditions.checkArgument(dockerImageComponents.length == 2, "Invalidate docker image: " + dockerImage); - final String dockerImageName = dockerImageComponents[0]; - final String dockerImageTag = dockerImageComponents[1]; - - final Path specPath = Path.of("specs").resolve(dockerImageName).resolve(dockerImageTag).resolve("spec.json"); - LOGGER.debug("Checking path for cached spec: {} {}", bucketName, specPath); - final Blob specAsBlob = storage.get(bucketName, specPath.toString()); - - // if null it means the object was not found. - if (specAsBlob == null) { - LOGGER.debug("Spec not found in bucket storage"); - return Optional.empty(); - } - - final String specAsString = new String(specAsBlob.getContent(), StandardCharsets.UTF_8); - try { - validateConfig(Jsons.deserialize(specAsString)); - } catch (final JsonValidationException e) { - LOGGER.error("Received invalid spec from bucket store. {}", e.toString()); - return Optional.empty(); - } - return Optional.of(Jsons.deserialize(specAsString, ConnectorSpecification.class)); - } - - private static void validateConfig(final JsonNode json) throws JsonValidationException { - final JsonSchemaValidator jsonSchemaValidator = new JsonSchemaValidator(); - final JsonNode specJsonSchema = JsonSchemaValidator.getSchema(AirbyteProtocolSchema.PROTOCOL.getFile(), "ConnectorSpecification"); - jsonSchemaValidator.ensure(specJsonSchema, json); - } - -} diff --git a/airbyte-config/specs/src/main/java/io/airbyte/config/specs/SeedConnectorSpecGenerator.java b/airbyte-config/specs/src/main/java/io/airbyte/config/specs/SeedConnectorSpecGenerator.java deleted file mode 100644 index 05a273b08d84a..0000000000000 --- a/airbyte-config/specs/src/main/java/io/airbyte/config/specs/SeedConnectorSpecGenerator.java +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Copyright (c) 2021 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.config.specs; - -import com.fasterxml.jackson.databind.JsonNode; -import com.google.cloud.storage.StorageOptions; -import com.google.common.annotations.VisibleForTesting; -import io.airbyte.commons.cli.Clis; -import io.airbyte.commons.io.IOs; -import io.airbyte.commons.json.Jsons; -import io.airbyte.commons.util.MoreIterators; -import io.airbyte.commons.yaml.Yamls; -import io.airbyte.config.DockerImageSpec; -import io.airbyte.config.EnvConfigs; -import io.airbyte.protocol.models.ConnectorSpecification; -import java.io.IOException; -import java.nio.file.Path; -import java.util.List; -import java.util.Map; -import java.util.stream.Collectors; -import org.apache.commons.cli.CommandLine; -import org.apache.commons.cli.Option; -import org.apache.commons.cli.Options; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * This script is responsible for ensuring that up-to-date {@link ConnectorSpecification}s for every - * connector definition in the seed are stored in a corresponding resource file, for the purpose of - * seeding the specs into the config database on server startup. See - * ./airbyte-config/specs/readme.md for more details on how this class is run and how it fits into - * the project. - *

- * Specs are stored in a separate file from the definitions in an effort to keep the definitions - * yaml files human-readable and easily-editable, as specs can be rather large. - *

- * Specs are fetched from the GCS spec cache bucket, so if any specs are missing from the bucket - * then this will fail. Note that this script only pulls specs from the bucket cache; it never - * pushes specs to the bucket. Since this script runs at build time, the decision was to depend on - * the bucket cache rather than running a docker container to fetch the spec during the build which - * could be slow and unwieldy. If there is a failure, check the bucket cache and figure out how to - * get the correct spec in there. - */ -public class SeedConnectorSpecGenerator { - - private static final String DOCKER_REPOSITORY_FIELD = "dockerRepository"; - private static final String DOCKER_IMAGE_TAG_FIELD = "dockerImageTag"; - private static final String DOCKER_IMAGE_FIELD = "dockerImage"; - private static final String SPEC_FIELD = "spec"; - private static final String SPEC_BUCKET_NAME = new EnvConfigs().getSpecCacheBucket(); - - private static final Logger LOGGER = LoggerFactory.getLogger(SeedConnectorSpecGenerator.class); - - private static final Option SEED_ROOT_OPTION = Option.builder("s").longOpt("seed-root").hasArg(true).required(true) - .desc("path to where seed resource files are stored").build(); - private static final Options OPTIONS = new Options().addOption(SEED_ROOT_OPTION); - - private final GcsBucketSpecFetcher bucketSpecFetcher; - - public SeedConnectorSpecGenerator(final GcsBucketSpecFetcher bucketSpecFetcher) { - this.bucketSpecFetcher = bucketSpecFetcher; - } - - public static void main(final String[] args) throws Exception { - final CommandLine parsed = Clis.parse(args, OPTIONS); - final Path outputRoot = Path.of(parsed.getOptionValue(SEED_ROOT_OPTION.getOpt())); - - final GcsBucketSpecFetcher bucketSpecFetcher = new GcsBucketSpecFetcher(StorageOptions.getDefaultInstance().getService(), SPEC_BUCKET_NAME); - final SeedConnectorSpecGenerator seedConnectorSpecGenerator = new SeedConnectorSpecGenerator(bucketSpecFetcher); - seedConnectorSpecGenerator.run(outputRoot, SeedConnectorType.SOURCE); - seedConnectorSpecGenerator.run(outputRoot, SeedConnectorType.DESTINATION); - } - - public void run(final Path seedRoot, final SeedConnectorType seedConnectorType) throws IOException { - LOGGER.info("Updating seeded {} definition specs if necessary...", seedConnectorType.name()); - - final JsonNode seedDefinitionsJson = yamlToJson(seedRoot, seedConnectorType.getDefinitionFileName()); - final JsonNode seedSpecsJson = yamlToJson(seedRoot, seedConnectorType.getSpecFileName()); - - final List updatedSeedSpecs = fetchUpdatedSeedSpecs(seedDefinitionsJson, seedSpecsJson); - - final String outputString = String.format("# This file is generated by %s.\n", this.getClass().getName()) - + "# Do NOT edit this file directly. See generator class for more details.\n" - + Yamls.serialize(updatedSeedSpecs); - final Path outputPath = IOs.writeFile(seedRoot.resolve(seedConnectorType.getSpecFileName()), outputString); - - LOGGER.info("Finished updating {}", outputPath); - } - - private JsonNode yamlToJson(final Path root, final String fileName) { - final String yamlString = IOs.readFile(root, fileName); - return Yamls.deserialize(yamlString); - } - - @VisibleForTesting - final List fetchUpdatedSeedSpecs(final JsonNode seedDefinitions, final JsonNode currentSeedSpecs) { - final List seedDefinitionsDockerImages = MoreIterators.toList(seedDefinitions.elements()) - .stream() - .map(json -> String.format("%s:%s", json.get(DOCKER_REPOSITORY_FIELD).asText(), json.get(DOCKER_IMAGE_TAG_FIELD).asText())) - .collect(Collectors.toList()); - - final Map currentSeedImageToSpec = MoreIterators.toList(currentSeedSpecs.elements()) - .stream() - .collect(Collectors.toMap( - json -> json.get(DOCKER_IMAGE_FIELD).asText(), - json -> new DockerImageSpec().withDockerImage(json.get(DOCKER_IMAGE_FIELD).asText()) - .withSpec(Jsons.object(json.get(SPEC_FIELD), ConnectorSpecification.class)))); - - return seedDefinitionsDockerImages - .stream() - .map(dockerImage -> currentSeedImageToSpec.containsKey(dockerImage) ? currentSeedImageToSpec.get(dockerImage) : fetchSpecFromGCS(dockerImage)) - .collect(Collectors.toList()); - } - - private DockerImageSpec fetchSpecFromGCS(final String dockerImage) { - LOGGER.info("Seeded spec not found for docker image {} - fetching from GCS bucket {}...", dockerImage, bucketSpecFetcher.getBucketName()); - final ConnectorSpecification spec = bucketSpecFetcher.attemptFetch(dockerImage) - .orElseThrow(() -> new RuntimeException(String.format( - "Failed to fetch valid spec file for docker image %s from GCS bucket %s", - dockerImage, - bucketSpecFetcher.getBucketName()))); - return new DockerImageSpec().withDockerImage(dockerImage).withSpec(spec); - } - -} diff --git a/airbyte-config/specs/src/main/java/io/airbyte/config/specs/SeedConnectorType.java b/airbyte-config/specs/src/main/java/io/airbyte/config/specs/SeedConnectorType.java deleted file mode 100644 index 36d1326af215b..0000000000000 --- a/airbyte-config/specs/src/main/java/io/airbyte/config/specs/SeedConnectorType.java +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (c) 2021 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.config.specs; - -public enum SeedConnectorType { - - SOURCE( - "source_definitions.yaml", - "source_specs.yaml"), - DESTINATION( - "destination_definitions.yaml", - "destination_specs.yaml"); - - private final String definitionFileName; - private final String specFileName; - - SeedConnectorType(final String definitionFileName, - final String specFileName) { - this.definitionFileName = definitionFileName; - this.specFileName = specFileName; - } - - public String getDefinitionFileName() { - return definitionFileName; - } - - public String getSpecFileName() { - return specFileName; - } - -} diff --git a/airbyte-config/specs/src/test/java/io/airbyte/config/specs/GcsBucketSpecFetcherTest.java b/airbyte-config/specs/src/test/java/io/airbyte/config/specs/GcsBucketSpecFetcherTest.java deleted file mode 100644 index 25e16bea545bf..0000000000000 --- a/airbyte-config/specs/src/test/java/io/airbyte/config/specs/GcsBucketSpecFetcherTest.java +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright (c) 2021 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.config.specs; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; - -import com.google.cloud.storage.Blob; -import com.google.cloud.storage.Storage; -import com.google.common.collect.ImmutableMap; -import io.airbyte.commons.json.Jsons; -import io.airbyte.protocol.models.ConnectorSpecification; -import java.io.IOException; -import java.nio.charset.StandardCharsets; -import java.nio.file.Path; -import java.util.Optional; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -class GcsBucketSpecFetcherTest { - - private static final String BUCKET_NAME = "bucket"; - private static final String DOCKER_REPOSITORY = "image"; - private static final String DOCKER_IMAGE_TAG = "0.1.0"; - private static final String DOCKER_IMAGE = DOCKER_REPOSITORY + ":" + DOCKER_IMAGE_TAG; - private static final String SPEC_PATH = Path.of("specs").resolve(DOCKER_REPOSITORY).resolve(DOCKER_IMAGE_TAG).resolve("spec.json").toString(); - - private Storage storage; - private Blob specBlob; - private final ConnectorSpecification spec = new ConnectorSpecification().withConnectionSpecification(Jsons.jsonNode(ImmutableMap.of("foo", "bar"))); - - @SuppressWarnings("unchecked") - @BeforeEach - void setup() throws IOException { - storage = mock(Storage.class); - - final byte[] specBytes = Jsons.toBytes(Jsons.jsonNode(spec)); - specBlob = mock(Blob.class); - when(specBlob.getContent()).thenReturn(specBytes); - } - - @Test - void testGetsSpecIfPresent() throws IOException { - when(storage.get(BUCKET_NAME, SPEC_PATH)).thenReturn(specBlob); - - final GcsBucketSpecFetcher bucketSpecFetcher = new GcsBucketSpecFetcher(storage, BUCKET_NAME); - final Optional returnedSpec = bucketSpecFetcher.attemptFetch(DOCKER_IMAGE); - - assertTrue(returnedSpec.isPresent()); - assertEquals(spec, returnedSpec.get()); - } - - @Test - void testReturnsEmptyIfNotPresent() throws IOException { - when(storage.get(BUCKET_NAME, SPEC_PATH)).thenReturn(null); - - final GcsBucketSpecFetcher bucketSpecFetcher = new GcsBucketSpecFetcher(storage, BUCKET_NAME); - final Optional returnedSpec = bucketSpecFetcher.attemptFetch(DOCKER_IMAGE); - - assertTrue(returnedSpec.isEmpty()); - } - - @Test - void testReturnsEmptyIfInvalidSpec() throws IOException { - final Blob invalidSpecBlob = mock(Blob.class); - when(invalidSpecBlob.getContent()).thenReturn("{\"notASpec\": true}".getBytes(StandardCharsets.UTF_8)); - when(storage.get(BUCKET_NAME, SPEC_PATH)).thenReturn(invalidSpecBlob); - - final GcsBucketSpecFetcher bucketSpecFetcher = new GcsBucketSpecFetcher(storage, BUCKET_NAME); - final Optional returnedSpec = bucketSpecFetcher.attemptFetch(DOCKER_IMAGE); - - assertTrue(returnedSpec.isEmpty()); - } - -} diff --git a/airbyte-config/specs/src/test/java/io/airbyte/config/specs/SeedConnectorSpecGeneratorTest.java b/airbyte-config/specs/src/test/java/io/airbyte/config/specs/SeedConnectorSpecGeneratorTest.java deleted file mode 100644 index 0925608a2f62f..0000000000000 --- a/airbyte-config/specs/src/test/java/io/airbyte/config/specs/SeedConnectorSpecGeneratorTest.java +++ /dev/null @@ -1,154 +0,0 @@ -/* - * Copyright (c) 2021 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.config.specs; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.mockito.ArgumentMatchers.any; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.never; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.when; - -import com.fasterxml.jackson.databind.JsonNode; -import com.google.common.collect.ImmutableMap; -import io.airbyte.commons.json.Jsons; -import io.airbyte.config.DockerImageSpec; -import io.airbyte.config.StandardDestinationDefinition; -import io.airbyte.protocol.models.ConnectorSpecification; -import java.util.Arrays; -import java.util.List; -import java.util.Optional; -import java.util.UUID; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -class SeedConnectorSpecGeneratorTest { - - private static final UUID DEF_ID1 = java.util.UUID.randomUUID(); - private static final UUID DEF_ID2 = java.util.UUID.randomUUID(); - private static final String CONNECTOR_NAME1 = "connector1"; - private static final String CONNECTOR_NAME2 = "connector2"; - private static final String DOCUMENTATION_URL = "https://wwww.example.com"; - private static final String DOCKER_REPOSITORY1 = "airbyte/connector1"; - private static final String DOCKER_REPOSITORY2 = "airbyte/connector2"; - private static final String DOCKER_TAG1 = "0.1.0"; - private static final String DOCKER_TAG2 = "0.2.0"; - private static final String BUCKET_NAME = "bucket"; - - private SeedConnectorSpecGenerator seedConnectorSpecGenerator; - private GcsBucketSpecFetcher bucketSpecFetcherMock; - - @BeforeEach - void setup() { - bucketSpecFetcherMock = mock(GcsBucketSpecFetcher.class); - when(bucketSpecFetcherMock.getBucketName()).thenReturn(BUCKET_NAME); - - seedConnectorSpecGenerator = new SeedConnectorSpecGenerator(bucketSpecFetcherMock); - } - - @Test - void testMissingSpecIsFetched() { - final StandardDestinationDefinition sourceDefinition1 = new StandardDestinationDefinition() - .withDestinationDefinitionId(DEF_ID1) - .withDockerRepository(DOCKER_REPOSITORY1) - .withDockerImageTag(DOCKER_TAG1) - .withName(CONNECTOR_NAME1) - .withDocumentationUrl(DOCUMENTATION_URL); - final ConnectorSpecification spec1 = new ConnectorSpecification().withConnectionSpecification(Jsons.jsonNode(ImmutableMap.of("foo1", "bar1"))); - final DockerImageSpec dockerImageSpec1 = new DockerImageSpec().withDockerImage(DOCKER_REPOSITORY1 + ":" + DOCKER_TAG1).withSpec(spec1); - - final StandardDestinationDefinition sourceDefinition2 = new StandardDestinationDefinition() - .withDestinationDefinitionId(DEF_ID2) - .withDockerRepository(DOCKER_REPOSITORY2) - .withDockerImageTag(DOCKER_TAG2) - .withName(CONNECTOR_NAME2) - .withDocumentationUrl(DOCUMENTATION_URL); - final ConnectorSpecification spec2 = new ConnectorSpecification().withConnectionSpecification(Jsons.jsonNode(ImmutableMap.of("foo2", "bar2"))); - final DockerImageSpec dockerImageSpec2 = new DockerImageSpec().withDockerImage(DOCKER_REPOSITORY2 + ":" + DOCKER_TAG2).withSpec(spec2); - - final JsonNode seedDefinitions = Jsons.jsonNode(Arrays.asList(sourceDefinition1, sourceDefinition2)); - final JsonNode seedSpecs = Jsons.jsonNode(List.of(dockerImageSpec1)); - - when(bucketSpecFetcherMock.attemptFetch(DOCKER_REPOSITORY2 + ":" + DOCKER_TAG2)).thenReturn(Optional.of(spec2)); - - final List actualSeedSpecs = seedConnectorSpecGenerator.fetchUpdatedSeedSpecs(seedDefinitions, seedSpecs); - final List expectedSeedSpecs = Arrays.asList(dockerImageSpec1, dockerImageSpec2); - - assertEquals(expectedSeedSpecs, actualSeedSpecs); - } - - @Test - void testOutdatedSpecIsFetched() { - final StandardDestinationDefinition sourceDefinition = new StandardDestinationDefinition() - .withDestinationDefinitionId(DEF_ID1) - .withDockerRepository(DOCKER_REPOSITORY1) - .withDockerImageTag(DOCKER_TAG2) - .withName(CONNECTOR_NAME1) - .withDocumentationUrl(DOCUMENTATION_URL); - final ConnectorSpecification outdatedSpec = new ConnectorSpecification().withConnectionSpecification(Jsons.jsonNode(ImmutableMap.of( - "foo1", - "bar1"))); - final DockerImageSpec outdatedDockerImageSpec = new DockerImageSpec().withDockerImage(DOCKER_REPOSITORY1 + ":" + DOCKER_TAG1) - .withSpec(outdatedSpec); - - final JsonNode seedDefinitions = Jsons.jsonNode(List.of(sourceDefinition)); - final JsonNode seedSpecs = Jsons.jsonNode(List.of(outdatedDockerImageSpec)); - - final ConnectorSpecification newSpec = new ConnectorSpecification().withConnectionSpecification(Jsons.jsonNode(ImmutableMap.of("foo2", "bar2"))); - final DockerImageSpec newDockerImageSpec = new DockerImageSpec().withDockerImage(DOCKER_REPOSITORY1 + ":" + DOCKER_TAG2).withSpec(newSpec); - - when(bucketSpecFetcherMock.attemptFetch(DOCKER_REPOSITORY1 + ":" + DOCKER_TAG2)).thenReturn(Optional.of(newSpec)); - - final List actualSeedSpecs = seedConnectorSpecGenerator.fetchUpdatedSeedSpecs(seedDefinitions, seedSpecs); - final List expectedSeedSpecs = List.of(newDockerImageSpec); - - assertEquals(expectedSeedSpecs, actualSeedSpecs); - } - - @Test - void testExtraneousSpecIsRemoved() { - final StandardDestinationDefinition sourceDefinition = new StandardDestinationDefinition() - .withDestinationDefinitionId(DEF_ID1) - .withDockerRepository(DOCKER_REPOSITORY1) - .withDockerImageTag(DOCKER_TAG1) - .withName(CONNECTOR_NAME1) - .withDocumentationUrl(DOCUMENTATION_URL); - final ConnectorSpecification spec1 = new ConnectorSpecification().withConnectionSpecification(Jsons.jsonNode(ImmutableMap.of("foo1", "bar1"))); - final DockerImageSpec dockerImageSpec1 = new DockerImageSpec().withDockerImage(DOCKER_REPOSITORY1 + ":" + DOCKER_TAG1).withSpec(spec1); - - final ConnectorSpecification spec2 = new ConnectorSpecification().withConnectionSpecification(Jsons.jsonNode(ImmutableMap.of("foo2", "bar2"))); - final DockerImageSpec dockerImageSpec2 = new DockerImageSpec().withDockerImage(DOCKER_REPOSITORY2 + ":" + DOCKER_TAG2).withSpec(spec2); - - final JsonNode seedDefinitions = Jsons.jsonNode(List.of(sourceDefinition)); - final JsonNode seedSpecs = Jsons.jsonNode(Arrays.asList(dockerImageSpec1, dockerImageSpec2)); - - final List actualSeedSpecs = seedConnectorSpecGenerator.fetchUpdatedSeedSpecs(seedDefinitions, seedSpecs); - final List expectedSeedSpecs = List.of(dockerImageSpec1); - - assertEquals(expectedSeedSpecs, actualSeedSpecs); - } - - @Test - void testNoFetchIsPerformedIfAllSpecsUpToDate() { - final StandardDestinationDefinition sourceDefinition = new StandardDestinationDefinition() - .withDestinationDefinitionId(DEF_ID1) - .withDockerRepository(DOCKER_REPOSITORY1) - .withDockerImageTag(DOCKER_TAG1) - .withName(CONNECTOR_NAME1) - .withDocumentationUrl(DOCUMENTATION_URL); - final ConnectorSpecification spec = new ConnectorSpecification().withConnectionSpecification(Jsons.jsonNode(ImmutableMap.of("foo", "bar"))); - final DockerImageSpec dockerImageSpec = new DockerImageSpec().withDockerImage(DOCKER_REPOSITORY1 + ":" + DOCKER_TAG1).withSpec(spec); - - final JsonNode seedDefinitions = Jsons.jsonNode(List.of(sourceDefinition)); - final JsonNode seedSpecs = Jsons.jsonNode(List.of(dockerImageSpec)); - - final List actualSeedSpecs = seedConnectorSpecGenerator.fetchUpdatedSeedSpecs(seedDefinitions, seedSpecs); - final List expectedSeedSpecs = List.of(dockerImageSpec); - - assertEquals(expectedSeedSpecs, actualSeedSpecs); - verify(bucketSpecFetcherMock, never()).attemptFetch(any()); - } - -} diff --git a/airbyte-json-validation/build.gradle b/airbyte-json-validation/build.gradle index 2114a4a053e8b..4881fc079a464 100644 --- a/airbyte-json-validation/build.gradle +++ b/airbyte-json-validation/build.gradle @@ -6,6 +6,4 @@ dependencies { implementation 'com.networknt:json-schema-validator:1.0.42' // needed so that we can follow $ref when parsing json. jackson does not support this natively. implementation 'me.andrz.jackson:jackson-json-reference-core:0.3.2' - - implementation project(':airbyte-commons') } diff --git a/airbyte-protocol/models/build.gradle b/airbyte-protocol/models/build.gradle index 85f8d48cac5ab..e4199332b848d 100644 --- a/airbyte-protocol/models/build.gradle +++ b/airbyte-protocol/models/build.gradle @@ -7,8 +7,6 @@ plugins { dependencies { implementation 'javax.validation:validation-api:1.1.0.Final' implementation 'org.apache.commons:commons-lang3:3.11' - - implementation project(':airbyte-commons') } jsonSchema2Pojo { diff --git a/airbyte-scheduler/client/build.gradle b/airbyte-scheduler/client/build.gradle index 5e319c0418efa..d90a0262c97c7 100644 --- a/airbyte-scheduler/client/build.gradle +++ b/airbyte-scheduler/client/build.gradle @@ -5,7 +5,6 @@ plugins { dependencies { implementation project(':airbyte-config:models') implementation project(':airbyte-config:persistence') - implementation project(':airbyte-config:specs') implementation project(':airbyte-json-validation') implementation project(':airbyte-protocol:models') implementation project(':airbyte-scheduler:models') diff --git a/airbyte-scheduler/client/src/main/java/io/airbyte/scheduler/client/BucketSpecCacheSchedulerClient.java b/airbyte-scheduler/client/src/main/java/io/airbyte/scheduler/client/BucketSpecCacheSchedulerClient.java index bcdc972c2cb4a..a615643d0830e 100644 --- a/airbyte-scheduler/client/src/main/java/io/airbyte/scheduler/client/BucketSpecCacheSchedulerClient.java +++ b/airbyte-scheduler/client/src/main/java/io/airbyte/scheduler/client/BucketSpecCacheSchedulerClient.java @@ -4,17 +4,27 @@ package io.airbyte.scheduler.client; +import com.fasterxml.jackson.databind.JsonNode; +import com.google.api.client.util.Preconditions; +import com.google.cloud.storage.Blob; +import com.google.cloud.storage.Storage; import com.google.cloud.storage.StorageOptions; import com.google.common.annotations.VisibleForTesting; +import io.airbyte.commons.json.Jsons; import io.airbyte.config.DestinationConnection; import io.airbyte.config.JobConfig.ConfigType; import io.airbyte.config.SourceConnection; import io.airbyte.config.StandardCheckConnectionOutput; -import io.airbyte.config.specs.GcsBucketSpecFetcher; import io.airbyte.protocol.models.AirbyteCatalog; +import io.airbyte.protocol.models.AirbyteProtocolSchema; import io.airbyte.protocol.models.ConnectorSpecification; +import io.airbyte.validation.json.JsonSchemaValidator; +import io.airbyte.validation.json.JsonValidationException; import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Path; import java.util.Optional; +import java.util.function.Function; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -23,15 +33,17 @@ public class BucketSpecCacheSchedulerClient implements SynchronousSchedulerClien private static final Logger LOGGER = LoggerFactory.getLogger(BucketSpecCacheSchedulerClient.class); private final SynchronousSchedulerClient client; - private final GcsBucketSpecFetcher bucketSpecFetcher; + private final Function> bucketSpecFetcher; public BucketSpecCacheSchedulerClient(final SynchronousSchedulerClient client, final String bucketName) { - this.client = client; - this.bucketSpecFetcher = new GcsBucketSpecFetcher(StorageOptions.getDefaultInstance().getService(), bucketName); + this( + client, + dockerImage -> attemptToFetchSpecFromBucket(StorageOptions.getDefaultInstance().getService(), bucketName, dockerImage)); } @VisibleForTesting - BucketSpecCacheSchedulerClient(final SynchronousSchedulerClient client, final GcsBucketSpecFetcher bucketSpecFetcher) { + BucketSpecCacheSchedulerClient(final SynchronousSchedulerClient client, + final Function> bucketSpecFetcher) { this.client = client; this.bucketSpecFetcher = bucketSpecFetcher; } @@ -60,7 +72,7 @@ public SynchronousResponse createGetSpecJob(final String Optional cachedSpecOptional; // never want to fail because we could not fetch from off board storage. try { - cachedSpecOptional = bucketSpecFetcher.attemptFetch(dockerImage); + cachedSpecOptional = bucketSpecFetcher.apply(dockerImage); LOGGER.debug("Spec bucket cache: Call to cache did not fail."); } catch (final RuntimeException e) { cachedSpecOptional = Optional.empty(); @@ -76,4 +88,38 @@ public SynchronousResponse createGetSpecJob(final String } } + private static void validateConfig(final JsonNode json) throws JsonValidationException { + final JsonSchemaValidator jsonSchemaValidator = new JsonSchemaValidator(); + final JsonNode specJsonSchema = JsonSchemaValidator.getSchema(AirbyteProtocolSchema.PROTOCOL.getFile(), "ConnectorSpecification"); + jsonSchemaValidator.ensure(specJsonSchema, json); + } + + public static Optional attemptToFetchSpecFromBucket(final Storage storage, + final String bucketName, + final String dockerImage) { + final String[] dockerImageComponents = dockerImage.split(":"); + Preconditions.checkArgument(dockerImageComponents.length == 2, "Invalidate docker image: " + dockerImage); + final String dockerImageName = dockerImageComponents[0]; + final String dockerImageTag = dockerImageComponents[1]; + + final Path specPath = Path.of("specs").resolve(dockerImageName).resolve(dockerImageTag).resolve("spec.json"); + LOGGER.debug("Checking path for cached spec: {} {}", bucketName, specPath); + final Blob specAsBlob = storage.get(bucketName, specPath.toString()); + + // if null it means the object was not found. + if (specAsBlob == null) { + LOGGER.debug("Spec not found in bucket storage"); + return Optional.empty(); + } + + final String specAsString = new String(specAsBlob.getContent(), StandardCharsets.UTF_8); + try { + validateConfig(Jsons.deserialize(specAsString)); + } catch (final JsonValidationException e) { + LOGGER.error("Received invalid spec from bucket store. {}", e.toString()); + return Optional.empty(); + } + return Optional.of(Jsons.deserialize(specAsString, ConnectorSpecification.class)); + } + } diff --git a/airbyte-scheduler/client/src/test/java/io/airbyte/scheduler/client/BucketSpecCacheSchedulerClientTest.java b/airbyte-scheduler/client/src/test/java/io/airbyte/scheduler/client/BucketSpecCacheSchedulerClientTest.java index 01f4595b94685..cf21fd2b160df 100644 --- a/airbyte-scheduler/client/src/test/java/io/airbyte/scheduler/client/BucketSpecCacheSchedulerClientTest.java +++ b/airbyte-scheduler/client/src/test/java/io/airbyte/scheduler/client/BucketSpecCacheSchedulerClientTest.java @@ -10,10 +10,10 @@ import static org.mockito.Mockito.verifyNoInteractions; import static org.mockito.Mockito.when; -import io.airbyte.config.specs.GcsBucketSpecFetcher; import io.airbyte.protocol.models.ConnectorSpecification; import java.io.IOException; import java.util.Optional; +import java.util.function.Function; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; @@ -21,18 +21,18 @@ class BucketSpecCacheSchedulerClientTest { private SynchronousSchedulerClient defaultClientMock; - private GcsBucketSpecFetcher bucketSpecFetcherMock; + private Function> bucketSpecFetcherMock; @SuppressWarnings("unchecked") @BeforeEach void setup() { defaultClientMock = mock(SynchronousSchedulerClient.class); - bucketSpecFetcherMock = mock(GcsBucketSpecFetcher.class); + bucketSpecFetcherMock = mock(Function.class); } @Test void testGetsSpecIfPresent() throws IOException { - when(bucketSpecFetcherMock.attemptFetch("source-pokeapi:0.1.0")).thenReturn(Optional.of(new ConnectorSpecification())); + when(bucketSpecFetcherMock.apply("source-pokeapi:0.1.0")).thenReturn(Optional.of(new ConnectorSpecification())); final BucketSpecCacheSchedulerClient client = new BucketSpecCacheSchedulerClient(defaultClientMock, bucketSpecFetcherMock); assertEquals(new ConnectorSpecification(), client.createGetSpecJob("source-pokeapi:0.1.0").getOutput()); verifyNoInteractions(defaultClientMock); @@ -40,7 +40,7 @@ void testGetsSpecIfPresent() throws IOException { @Test void testCallsDelegateIfNotPresent() throws IOException { - when(bucketSpecFetcherMock.attemptFetch("source-pokeapi:0.1.0")).thenReturn(Optional.empty()); + when(bucketSpecFetcherMock.apply("source-pokeapi:0.1.0")).thenReturn(Optional.empty()); when(defaultClientMock.createGetSpecJob("source-pokeapi:0.1.0")) .thenReturn(new SynchronousResponse<>(new ConnectorSpecification(), mock(SynchronousJobMetadata.class))); final BucketSpecCacheSchedulerClient client = new BucketSpecCacheSchedulerClient(defaultClientMock, bucketSpecFetcherMock); @@ -49,7 +49,7 @@ void testCallsDelegateIfNotPresent() throws IOException { @Test void testCallsDelegateIfException() throws IOException { - when(bucketSpecFetcherMock.attemptFetch("source-pokeapi:0.1.0")).thenThrow(new RuntimeException("induced exception")); + when(bucketSpecFetcherMock.apply("source-pokeapi:0.1.0")).thenThrow(new RuntimeException("induced exception")); when(defaultClientMock.createGetSpecJob("source-pokeapi:0.1.0")) .thenReturn(new SynchronousResponse<>(new ConnectorSpecification(), mock(SynchronousJobMetadata.class))); final BucketSpecCacheSchedulerClient client = new BucketSpecCacheSchedulerClient(defaultClientMock, bucketSpecFetcherMock); @@ -62,7 +62,7 @@ void testCallsDelegateIfException() throws IOException { @Disabled @Test void testGetsSpecFromBucket() throws IOException { - when(bucketSpecFetcherMock.attemptFetch("source-pokeapi:0.1.0")).thenReturn(Optional.of(new ConnectorSpecification())); + when(bucketSpecFetcherMock.apply("source-pokeapi:0.1.0")).thenReturn(Optional.of(new ConnectorSpecification())); // todo (cgardens) - replace with prod bucket. final BucketSpecCacheSchedulerClient client = new BucketSpecCacheSchedulerClient(defaultClientMock, "cg-specs"); final ConnectorSpecification actualSpec = client.createGetSpecJob("source-pokeapi:0.1.0").getOutput(); diff --git a/airbyte-server/build.gradle b/airbyte-server/build.gradle index a38db0edc5dbe..edc7c55fb2506 100644 --- a/airbyte-server/build.gradle +++ b/airbyte-server/build.gradle @@ -66,7 +66,6 @@ dependencies { implementation project(':airbyte-config:init') implementation project(':airbyte-config:models') implementation project(':airbyte-config:persistence') - implementation project(':airbyte-config:specs') implementation project(':airbyte-db:lib') implementation project(":airbyte-json-validation") implementation project(':airbyte-migration') diff --git a/build.gradle b/build.gradle index 39d9957cd26b1..271b27c23feb4 100644 --- a/build.gradle +++ b/build.gradle @@ -73,8 +73,7 @@ def createSpotlessTarget = { pattern -> 'normalization_test_output', 'tools', 'secrets', - 'charts', // Helm charts often have injected template strings that will fail general linting. Helm linting is done separately. - 'resources/seed/*_specs.yaml' + 'charts' // Helm charts often have injected template strings that will fail general linting. Helm linting is done separately. ] if (System.getenv().containsKey("SUB_BUILD")) { diff --git a/settings.gradle b/settings.gradle index 44baedba5f042..4d5b7d79afa0a 100644 --- a/settings.gradle +++ b/settings.gradle @@ -42,7 +42,6 @@ include ':airbyte-workers' // reused by acceptance tests in connector base. include ':airbyte-analytics' // transitively used by airbyte-workers. include ':airbyte-config:init' // transitively used by airbyte-workers. include ':airbyte-config:persistence' // transitively used by airbyte-workers. -include ':airbyte-config:specs' // transitively used by airbyte-workers. include ':airbyte-db:jooq' // transitively used by airbyte-workers. include ':airbyte-notification' // transitively used by airbyte-workers. include ':airbyte-scheduler:models' // transitively used by airbyte-workers. From be80b8853310bddfc241824ddcc5643c06fcfc58 Mon Sep 17 00:00:00 2001 From: Benoit Moriceau Date: Wed, 3 Nov 2021 09:07:52 -0700 Subject: [PATCH 27/83] Documentation around temporal versioning (#7569) Add documentation about versioning in temporal workflow. --- airbyte-workers/README.md | 45 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 airbyte-workers/README.md diff --git a/airbyte-workers/README.md b/airbyte-workers/README.md new file mode 100644 index 0000000000000..2195b1864819e --- /dev/null +++ b/airbyte-workers/README.md @@ -0,0 +1,45 @@ +# Temporal Development + +## Versioning + +Temporal is maintaining an internal history of the activity it runs. This history is based on a specific order. If we restart a temporal workflow with +a new implementation that has a different order, the workflow will be stuck and will need manual action to be properly restarted. Temporal provides +an API to be able to manage those changes smoothly. However, temporal is very permissive with version rules. Airbyte will follow +the following rules: + +- There will be one global version per workflow, meaning that we will use a single tag per workflow. +- All the following code modifications will need to bump the version number, it won't be limited to a release of a new airbyte version + - Addition of an activity + - Deletion of an activity + - Change of the input of an activity + - Addition of a temporal sleep timer + +The way to use this version should be the following: + +If no prior version usage is present: + +``` +final int version = Workflow.getVersion(VERSION_LABEL, MINIMAL_VERSION, CURRENT_VERSION); + +if (version >= CURRENT_VERSION) { + // New implemenation +} +``` + +if some prior version usage is present (we bump the version from 4 to 5 in this example): + +``` +final int version = Workflow.getVersion(VERSION_LABEL, MINIMAL_VERSION, CURRENT_VERSION); + +if (version <= 4 && version >= MINIMAL_VERSION) { + // old implemenation +} else if (version >= CURRENT_VERSION) { + // New implemenation +} +``` + +## Removing a version + +Removing a version is a potential breaking change and should be done version carefully. We should maintain a MINIMAL_VERSION to keep track of the +current minimal version. Both MINIMAL_VERSION and CURRENT_VERSION needs to be present on the workflow file even if they are unused (if they have been +used once). From 00356f99108af3c0c534a3aa972331ddc0e32084 Mon Sep 17 00:00:00 2001 From: Jared Rhizor Date: Wed, 3 Nov 2021 09:17:54 -0700 Subject: [PATCH 28/83] add temporal and db logs for kube test output (#7583) --- tools/bin/acceptance_test_kube.sh | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/tools/bin/acceptance_test_kube.sh b/tools/bin/acceptance_test_kube.sh index 7dd9d2502f498..990a019d9c6d4 100755 --- a/tools/bin/acceptance_test_kube.sh +++ b/tools/bin/acceptance_test_kube.sh @@ -49,9 +49,20 @@ server_logs () { echo "server logs:" && kubectl logs deployment.apps/airbyte-ser scheduler_logs () { echo "scheduler logs:" && kubectl logs deployment.apps/airbyte-scheduler; } pod_sweeper_logs () { echo "pod sweeper logs:" && kubectl logs deployment.apps/airbyte-pod-sweeper; } worker_logs () { echo "worker logs:" && kubectl logs deployment.apps/airbyte-worker; } +db_logs () { echo "db logs:" && kubectl logs deployment.apps/airbyte-db; } +temporal_logs () { echo "temporal logs:" && kubectl logs deployment.apps/airbyte-temporal; } describe_pods () { echo "describe pods:" && kubectl describe pods; } describe_nodes () { echo "describe nodes:" && kubectl describe nodes; } -print_all_logs () { server_logs; scheduler_logs; worker_logs; pod_sweeper_logs; describe_nodes; describe_pods; } +print_all_logs () { + server_logs; + scheduler_logs; + worker_logs; + db_logs; + temporal_logs; + pod_sweeper_logs; + describe_nodes; + describe_pods; +} trap "echo 'kube logs:' && print_all_logs" EXIT kubectl port-forward svc/airbyte-server-svc 8001:8001 & From 8981228fc461f079be7fa68748eccefbeb30d5b0 Mon Sep 17 00:00:00 2001 From: Jared Rhizor Date: Wed, 3 Nov 2021 09:18:08 -0700 Subject: [PATCH 29/83] stop trying to copy normalization image for kube tests (#7580) --- tools/bin/acceptance_test_kube.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/bin/acceptance_test_kube.sh b/tools/bin/acceptance_test_kube.sh index 990a019d9c6d4..7eb6f9a9a4ed9 100755 --- a/tools/bin/acceptance_test_kube.sh +++ b/tools/bin/acceptance_test_kube.sh @@ -14,7 +14,6 @@ kind load docker-image airbyte/scheduler:dev --name chart-testing & kind load docker-image airbyte/webapp:dev --name chart-testing & kind load docker-image airbyte/worker:dev --name chart-testing & kind load docker-image airbyte/db:dev --name chart-testing & -kind load docker-image airbyte/normalization:dev --name chart-testing & wait echo "Starting app..." From c77dd7ad66a079e7c0cbd23c9647b764c748c0e2 Mon Sep 17 00:00:00 2001 From: Jenny Brown <85510829+airbyte-jenny@users.noreply.github.com> Date: Wed, 3 Nov 2021 12:17:52 -0500 Subject: [PATCH 30/83] Improved error handling (#7571) * Improved error handling * Comments --- .../java/io/airbyte/validation/json/JsonSchemaValidator.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/airbyte-json-validation/src/main/java/io/airbyte/validation/json/JsonSchemaValidator.java b/airbyte-json-validation/src/main/java/io/airbyte/validation/json/JsonSchemaValidator.java index 420e60bd1227f..5aad98731a67b 100644 --- a/airbyte-json-validation/src/main/java/io/airbyte/validation/json/JsonSchemaValidator.java +++ b/airbyte-json-validation/src/main/java/io/airbyte/validation/json/JsonSchemaValidator.java @@ -66,10 +66,9 @@ public void ensure(final JsonNode schemaJson, final JsonNode objectJson) throws } throw new JsonValidationException(String.format( - "json schema validation failed. \nerrors: %s \nschema: \n%s \nobject: \n%s", + "json schema validation failed when comparing the data to the json schema. \nErrors: %s \nSchema: \n%s", Strings.join(validationMessages, ", "), - schemaJson.toPrettyString(), - objectJson.toPrettyString())); + schemaJson.toPrettyString())); } public void ensureAsRuntime(final JsonNode schemaJson, final JsonNode objectJson) { From 9a07136f2b7e78fd611a3ec12c8ec6db78dc4dd1 Mon Sep 17 00:00:00 2001 From: Jenny Brown <85510829+airbyte-jenny@users.noreply.github.com> Date: Wed, 3 Nov 2021 12:18:53 -0500 Subject: [PATCH 31/83] Fixed compile errors in javadoc content. (#7581) * Fixed compile errors in javadoc content. * Formatting * Clickable docs link for IDE navigation --- .../io/airbyte/commons/logging/MdcScope.java | 2 +- .../commons/util/AutoCloseableIterators.java | 4 ++-- .../java/io/airbyte/config/EnvConfigs.java | 2 ++ .../config/persistence/ConfigRepository.java | 2 +- .../ReadOnlySecretPersistence.java | 2 +- .../db/instance/FlywayMigrationDatabase.java | 5 ++++- .../migrate/migrations/MigrationV0_14_3.java | 2 +- .../oauth/flows/SalesforceOAuthFlow.java | 19 ++++++++++++------- .../scheduler/persistence/JobPersistence.java | 4 +--- .../io/airbyte/server/ConfigDumpExporter.java | 2 +- .../io/airbyte/server/ConfigDumpImporter.java | 4 ++-- .../workers/process/KubePodProcess.java | 3 ++- .../process/KubePortManagerSingleton.java | 4 ---- .../workers/temporal/CancellationHandler.java | 2 -- 14 files changed, 30 insertions(+), 27 deletions(-) diff --git a/airbyte-commons/src/main/java/io/airbyte/commons/logging/MdcScope.java b/airbyte-commons/src/main/java/io/airbyte/commons/logging/MdcScope.java index 190c1e9ebb198..45e52d4495f18 100644 --- a/airbyte-commons/src/main/java/io/airbyte/commons/logging/MdcScope.java +++ b/airbyte-commons/src/main/java/io/airbyte/commons/logging/MdcScope.java @@ -17,7 +17,7 @@ *

  *   
  *     try(final ScopedMDCChange scopedMDCChange = new ScopedMDCChange(
- *      new HashMap() {{
+ *      new HashMap<String, String>() {{
  *        put("my", "value");
  *      }}
  *     )) {
diff --git a/airbyte-commons/src/main/java/io/airbyte/commons/util/AutoCloseableIterators.java b/airbyte-commons/src/main/java/io/airbyte/commons/util/AutoCloseableIterators.java
index 6154cce7219f5..65a07187cd733 100644
--- a/airbyte-commons/src/main/java/io/airbyte/commons/util/AutoCloseableIterators.java
+++ b/airbyte-commons/src/main/java/io/airbyte/commons/util/AutoCloseableIterators.java
@@ -27,8 +27,8 @@ public static  AutoCloseableIterator fromIterator(final Iterator iterat
   }
 
   /**
-   * Coerces a vanilla {@link Iterator} into a {@link AutoCloseableIterator}. The provided
-   * {@param onClose} function will be called at most one time.
+   * Coerces a vanilla {@link Iterator} into a {@link AutoCloseableIterator}. The provided onClose
+   * function will be called at most one time.
    *
    * @param iterator autocloseable iterator to add another close to
    * @param onClose the function that will be called on close
diff --git a/airbyte-config/models/src/main/java/io/airbyte/config/EnvConfigs.java b/airbyte-config/models/src/main/java/io/airbyte/config/EnvConfigs.java
index cb61f7710eb8b..6fd0a99849341 100644
--- a/airbyte-config/models/src/main/java/io/airbyte/config/EnvConfigs.java
+++ b/airbyte-config/models/src/main/java/io/airbyte/config/EnvConfigs.java
@@ -289,9 +289,11 @@ public String getJobImagePullPolicy() {
   /**
    * Returns worker pod tolerations parsed from its own environment variable. The value of the env is
    * a string that represents one or more tolerations.
+   * 
    *
  • Tolerations are separated by a `;` *
  • Each toleration contains k=v pairs mentioning some/all of key, effect, operator and value and * separated by `,` + *
*

* For example:- The following represents two tolerations, one checking existence and another * matching a value diff --git a/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/ConfigRepository.java b/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/ConfigRepository.java index d7d47242df735..40c5edb5286b4 100644 --- a/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/ConfigRepository.java +++ b/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/ConfigRepository.java @@ -461,7 +461,7 @@ public void updateConnectionState(final UUID connectionId, final State state) th /** * Converts between a dumpConfig() output and a replaceAllConfigs() input, by deserializing the - * string/jsonnode into the AirbyteConfig, Stream + * string/jsonnode into the AirbyteConfig, Stream<Object<AirbyteConfig.getClassName()>> * * @param configurations from dumpConfig() * @return input suitable for replaceAllConfigs() diff --git a/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/split_secrets/ReadOnlySecretPersistence.java b/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/split_secrets/ReadOnlySecretPersistence.java index 15c4c2b9aff47..68a3cd5626116 100644 --- a/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/split_secrets/ReadOnlySecretPersistence.java +++ b/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/split_secrets/ReadOnlySecretPersistence.java @@ -8,7 +8,7 @@ /** * Provides a read-only interface to a backing secrets store similar to {@link SecretPersistence}. - * In practice, the functionality should be provided by a {@link SecretPersistence#read function. + * In practice, the functionality should be provided by a {@link SecretPersistence#read} function. */ @FunctionalInterface public interface ReadOnlySecretPersistence { diff --git a/airbyte-db/lib/src/main/java/io/airbyte/db/instance/FlywayMigrationDatabase.java b/airbyte-db/lib/src/main/java/io/airbyte/db/instance/FlywayMigrationDatabase.java index 6dc841a1ce944..c627b7e82d659 100644 --- a/airbyte-db/lib/src/main/java/io/airbyte/db/instance/FlywayMigrationDatabase.java +++ b/airbyte-db/lib/src/main/java/io/airbyte/db/instance/FlywayMigrationDatabase.java @@ -17,10 +17,13 @@ /** * Custom database for jOOQ code generation. It performs the following operations: + *

    *
  • Run Flyway migration.
  • *
  • Dump the database schema.
  • *
  • Create a connection for jOOQ code generation.
  • - *

    + *

+ *

+ *

* Reference: https://github.com/sabomichal/jooq-meta-postgres-flyway */ public abstract class FlywayMigrationDatabase extends PostgresDatabase { diff --git a/airbyte-migration/src/main/java/io/airbyte/migrate/migrations/MigrationV0_14_3.java b/airbyte-migration/src/main/java/io/airbyte/migrate/migrations/MigrationV0_14_3.java index b13080542e103..38bb4224f4882 100644 --- a/airbyte-migration/src/main/java/io/airbyte/migrate/migrations/MigrationV0_14_3.java +++ b/airbyte-migration/src/main/java/io/airbyte/migrate/migrations/MigrationV0_14_3.java @@ -23,7 +23,7 @@ import java.util.stream.Stream; /** - * This migration fixes a mistake. We should have done a minor version bump from 0.14.2 => 0.14.3 + * This migration fixes a mistake. We should have done a minor version bump from 0.14.2 to 0.14.3 * but we did not. This migration cleans up any problems that might have arisen from that. Then we * will do another migration to 0.15 forcing everyone to migrate (guaranteeing they hit this one) * and getting into a good state. The only change here is that instead of using StandardDataSchema diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/SalesforceOAuthFlow.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/SalesforceOAuthFlow.java index 22fdb6dbd7912..c5eec1b59075e 100644 --- a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/SalesforceOAuthFlow.java +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/SalesforceOAuthFlow.java @@ -20,24 +20,26 @@ /** * Following docs from - * https://help.salesforce.com/s/articleView?language=en_US&id=sf.remoteaccess_oauth_web_server_flow.htm + * https://help.salesforce.com/s/articleView?language=en_US&id=sf.remoteaccess_oauth_web_server_flow.htm */ public class SalesforceOAuthFlow extends BaseOAuthFlow { + // Clickable link for IDE + // https://help.salesforce.com/s/articleView?language=en_US&id=sf.remoteaccess_oauth_web_server_flow.htm private static final String AUTHORIZE_URL = "https://login.salesforce.com/services/oauth2/authorize"; private static final String ACCESS_TOKEN_URL = "https://login.salesforce.com/services/oauth2/token"; - public SalesforceOAuthFlow(ConfigRepository configRepository) { + public SalesforceOAuthFlow(final ConfigRepository configRepository) { super(configRepository); } @VisibleForTesting - SalesforceOAuthFlow(ConfigRepository configRepository, HttpClient httpClient, Supplier stateSupplier) { + SalesforceOAuthFlow(final ConfigRepository configRepository, final HttpClient httpClient, final Supplier stateSupplier) { super(configRepository, httpClient, stateSupplier); } @Override - protected String formatConsentUrl(UUID definitionId, String clientId, String redirectUrl) throws IOException { + protected String formatConsentUrl(final UUID definitionId, final String clientId, final String redirectUrl) throws IOException { try { return new URIBuilder(AUTHORIZE_URL) .addParameter("client_id", clientId) @@ -45,7 +47,7 @@ protected String formatConsentUrl(UUID definitionId, String clientId, String red .addParameter("response_type", "code") .addParameter("state", getState()) .build().toString(); - } catch (URISyntaxException e) { + } catch (final URISyntaxException e) { throw new IOException("Failed to format Consent URL for OAuth flow", e); } } @@ -56,7 +58,10 @@ protected String getAccessTokenUrl() { } @Override - protected Map getAccessTokenQueryParameters(String clientId, String clientSecret, String authCode, String redirectUrl) { + protected Map getAccessTokenQueryParameters(final String clientId, + final String clientSecret, + final String authCode, + final String redirectUrl) { return ImmutableMap.builder() .putAll(super.getAccessTokenQueryParameters(clientId, clientSecret, authCode, redirectUrl)) .put("grant_type", "authorization_code") @@ -64,7 +69,7 @@ protected Map getAccessTokenQueryParameters(String clientId, Str } @Override - protected Map extractRefreshToken(JsonNode data, String accessTokenUrl) throws IOException { + protected Map extractRefreshToken(final JsonNode data, final String accessTokenUrl) throws IOException { System.out.println(Jsons.serialize(data)); if (data.has("refresh_token")) { final String refreshToken = data.get("refresh_token").asText(); diff --git a/airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/JobPersistence.java b/airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/JobPersistence.java index d784c1f3f51ca..023ffda55e43b 100644 --- a/airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/JobPersistence.java +++ b/airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/JobPersistence.java @@ -125,7 +125,7 @@ public interface JobPersistence { void writeOutput(long jobId, int attemptNumber, T output) throws IOException; /** - * @param configType - type of config, e.g. sync + * @param configTypes - type of config, e.g. sync * @param configId - id of that config * @return lists job in descending order by created_at * @throws IOException - what you do when you IO @@ -196,8 +196,6 @@ public interface JobPersistence { /** * Purges job history while ensuring that the latest saved-state information is maintained. - * - * @throws IOException */ void purgeJobHistory(); diff --git a/airbyte-server/src/main/java/io/airbyte/server/ConfigDumpExporter.java b/airbyte-server/src/main/java/io/airbyte/server/ConfigDumpExporter.java index 8c3ffed22ab47..b10aeda10272d 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/ConfigDumpExporter.java +++ b/airbyte-server/src/main/java/io/airbyte/server/ConfigDumpExporter.java @@ -231,7 +231,7 @@ private Collection listDestinationDefinition(fina } /** - * List all configurations of type @param that already exists + * List all configurations of type @param <T> that already exists */ public interface ListConfigCall { diff --git a/airbyte-server/src/main/java/io/airbyte/server/ConfigDumpImporter.java b/airbyte-server/src/main/java/io/airbyte/server/ConfigDumpImporter.java index 9abf855011050..de7ac4084d47f 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/ConfigDumpImporter.java +++ b/airbyte-server/src/main/java/io/airbyte/server/ConfigDumpImporter.java @@ -596,8 +596,8 @@ private Map importIntoWorkspace(final ConfigSchema configSchema, } /** - * List all configurations of type @param that already exists (we'll be using this to know which - * ids are already in use) + * List all configurations of type @param <T> that already exists (we'll be using this to know + * which ids are already in use) */ public interface ListConfigCall { diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodProcess.java b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodProcess.java index ca2c7aba0fe53..3937cf80d8d0a 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodProcess.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePodProcess.java @@ -58,6 +58,7 @@ * stderr streams and copy configuration files over. * * This is made possible by: + *
    *
  • 1) An init container that creates 3 named pipes corresponding to stdin, stdout and std err on * a shared volume.
  • *
  • 2) Config files (e.g. config.json, catalog.json etc) are copied from the parent process into @@ -76,7 +77,7 @@ * handling.
  • *
  • 8) A heartbeat sidecar checks if the worker that launched the pod is still alive. If not, the * pod will fail.
  • - * + *
* The docker image used for this pod process must expose a AIRBYTE_ENTRYPOINT which contains the * entrypoint we will wrap when creating the main container in the pod. * diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePortManagerSingleton.java b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePortManagerSingleton.java index 6e58a12d5d746..6583c4becc62f 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePortManagerSingleton.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/process/KubePortManagerSingleton.java @@ -36,8 +36,6 @@ private KubePortManagerSingleton(final Set ports) { /** * Make sure init(ports) is called once prior to repeatedly using getInstance(). - * - * @return */ public static synchronized KubePortManagerSingleton getInstance() { if (instance == null) { @@ -49,8 +47,6 @@ public static synchronized KubePortManagerSingleton getInstance() { /** * Sets up the port range; make sure init(ports) is called once prior to repeatedly using * getInstance(). - * - * @return */ public static synchronized void init(final Set ports) { if (instance != null) { diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/CancellationHandler.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/CancellationHandler.java index 93645a0abb522..090cc0c107f8d 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/CancellationHandler.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/CancellationHandler.java @@ -4,7 +4,6 @@ package io.airbyte.workers.temporal; -import io.airbyte.workers.WorkerException; import io.temporal.activity.Activity; import io.temporal.activity.ActivityExecutionContext; import io.temporal.client.ActivityCompletionException; @@ -35,7 +34,6 @@ public TemporalCancellationHandler() { * * @param onCancellationCallback a runnable that will only run when Temporal indicates the activity * should be killed (cancellation or timeout). - * @throws WorkerException */ @Override public void checkAndHandleCancellation(final Runnable onCancellationCallback) { From c9c41dcd08d80fc424910de393d10b5f803cdca8 Mon Sep 17 00:00:00 2001 From: LiRen Tu Date: Wed, 3 Nov 2021 11:37:24 -0700 Subject: [PATCH 32/83] =?UTF-8?q?=F0=9F=8E=89=20Destination=20S3=20&=20GCS?= =?UTF-8?q?:=20support=20additional=20properties=20(#7288)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Log json schema * Use patched json avro converter * Rename schema * Update unit test cases * Fix ab ap field schema conversion * Rename files * Add unit test cases * Fix dependency for databricks * Bump versions * Update documentations * Update gcs doc * Set additional properties field name * Revert s3 and gcs version * Specify extra props fields * Refactor json avro conversion doc * Update connector doc * Fix databricks spec typo * Bump connector versions in seed --- .../4816b78f-1489-44c1-9060-4b19d5fa9362.json | 2 +- .../ca8f6566-e555-4b40-943a-545bf123117a.json | 2 +- .../seed/destination_definitions.yaml | 4 +- .../destination-databricks/Dockerfile | 2 +- .../destination-databricks/build.gradle | 6 +- .../databricks/DatabricksStreamCopier.java | 2 +- .../src/main/resources/spec.json | 2 +- .../connectors/destination-gcs/Dockerfile | 2 +- .../connectors/destination-gcs/build.gradle | 6 +- .../destination/gcs/avro/GcsAvroWriter.java | 6 +- .../gcs/parquet/GcsParquetWriter.java | 25 +- .../gcs/writer/ProductionWriterFactory.java | 13 +- .../gcs/GcsAvroDestinationAcceptanceTest.java | 6 +- .../gcs/GcsDestinationAcceptanceTest.java | 5 +- .../GcsParquetDestinationAcceptanceTest.java | 6 +- .../connectors/destination-s3/Dockerfile | 2 +- .../connectors/destination-s3/build.gradle | 6 +- .../destination/s3/avro/AvroConstants.java | 26 ++ .../AvroNameTransformer.java} | 4 +- .../s3/avro/AvroRecordFactory.java | 13 +- .../s3/avro/JsonFieldNameUpdater.java | 22 +- .../s3/avro/JsonToAvroSchemaConverter.java | 53 ++- .../destination/s3/avro/S3AvroConstants.java | 13 - .../destination/s3/avro/S3AvroWriter.java | 5 +- .../s3/parquet/S3ParquetWriter.java | 14 +- .../s3/writer/ProductionWriterFactory.java | 12 +- .../s3/S3AvroDestinationAcceptanceTest.java | 5 +- .../s3/S3DestinationAcceptanceTest.java | 5 +- .../S3ParquetDestinationAcceptanceTest.java | 6 +- .../s3/avro/JsonFieldNameUpdaterTest.java | 1 - ...Test.java => JsonToAvroConverterTest.java} | 53 ++- ...a.json => json_conversion_test_cases.json} | 344 +++++++++++++++++- ...e.json => type_conversion_test_cases.json} | 48 ++- docs/SUMMARY.md | 1 + docs/integrations/destinations/databricks.md | 3 +- docs/integrations/destinations/gcs.md | 162 +-------- docs/integrations/destinations/s3.md | 162 +-------- .../json-avro-conversion.md | 231 ++++++++++++ settings.gradle | 6 + 39 files changed, 812 insertions(+), 474 deletions(-) create mode 100644 airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/AvroConstants.java rename airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/{S3NameTransformer.java => avro/AvroNameTransformer.java} (83%) delete mode 100644 airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/S3AvroConstants.java rename airbyte-integrations/connectors/destination-s3/src/test/java/io/airbyte/integrations/destination/s3/avro/{JsonToAvroSchemaConverterTest.java => JsonToAvroConverterTest.java} (60%) rename airbyte-integrations/connectors/destination-s3/src/test/resources/parquet/json_schema_converter/{get_avro_schema.json => json_conversion_test_cases.json} (51%) rename airbyte-integrations/connectors/destination-s3/src/test/resources/parquet/json_schema_converter/{get_field_type.json => type_conversion_test_cases.json} (66%) create mode 100644 docs/understanding-airbyte/json-avro-conversion.md diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/4816b78f-1489-44c1-9060-4b19d5fa9362.json b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/4816b78f-1489-44c1-9060-4b19d5fa9362.json index 42a5b5150bf8d..9feab0c077547 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/4816b78f-1489-44c1-9060-4b19d5fa9362.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/4816b78f-1489-44c1-9060-4b19d5fa9362.json @@ -2,6 +2,6 @@ "destinationDefinitionId": "4816b78f-1489-44c1-9060-4b19d5fa9362", "name": "S3", "dockerRepository": "airbyte/destination-s3", - "dockerImageTag": "0.1.12", + "dockerImageTag": "0.1.13", "documentationUrl": "https://docs.airbyte.io/integrations/destinations/s3" } diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/ca8f6566-e555-4b40-943a-545bf123117a.json b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/ca8f6566-e555-4b40-943a-545bf123117a.json index 89078e4019d88..b7e6e4fff2655 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/ca8f6566-e555-4b40-943a-545bf123117a.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/ca8f6566-e555-4b40-943a-545bf123117a.json @@ -2,6 +2,6 @@ "destinationDefinitionId": "ca8f6566-e555-4b40-943a-545bf123117a", "name": "Google Cloud Storage (GCS)", "dockerRepository": "airbyte/destination-gcs", - "dockerImageTag": "0.1.2", + "dockerImageTag": "0.1.3", "documentationUrl": "https://docs.airbyte.io/integrations/destinations/gcs" } diff --git a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml index 69f4711d2270c..f28327960f430 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml @@ -31,7 +31,7 @@ - name: Google Cloud Storage (GCS) destinationDefinitionId: ca8f6566-e555-4b40-943a-545bf123117a dockerRepository: airbyte/destination-gcs - dockerImageTag: 0.1.2 + dockerImageTag: 0.1.3 documentationUrl: https://docs.airbyte.io/integrations/destinations/gcs - name: Google PubSub destinationDefinitionId: 356668e2-7e34-47f3-a3b0-67a8a481b692 @@ -93,7 +93,7 @@ - name: S3 destinationDefinitionId: 4816b78f-1489-44c1-9060-4b19d5fa9362 dockerRepository: airbyte/destination-s3 - dockerImageTag: 0.1.12 + dockerImageTag: 0.1.13 documentationUrl: https://docs.airbyte.io/integrations/destinations/s3 - name: Snowflake destinationDefinitionId: 424892c4-daac-4491-b35d-c6688ba547ba diff --git a/airbyte-integrations/connectors/destination-databricks/Dockerfile b/airbyte-integrations/connectors/destination-databricks/Dockerfile index 6f09d59bf9b5b..1ef415915e481 100644 --- a/airbyte-integrations/connectors/destination-databricks/Dockerfile +++ b/airbyte-integrations/connectors/destination-databricks/Dockerfile @@ -7,5 +7,5 @@ COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar RUN tar xf ${APPLICATION}.tar --strip-components=1 -LABEL io.airbyte.version=0.1.1 +LABEL io.airbyte.version=0.1.2 LABEL io.airbyte.name=airbyte/destination-databricks diff --git a/airbyte-integrations/connectors/destination-databricks/build.gradle b/airbyte-integrations/connectors/destination-databricks/build.gradle index 24f6b9a9f062c..a685f9655a40c 100644 --- a/airbyte-integrations/connectors/destination-databricks/build.gradle +++ b/airbyte-integrations/connectors/destination-databricks/build.gradle @@ -24,7 +24,11 @@ dependencies { implementation group: 'org.apache.hadoop', name: 'hadoop-aws', version: '3.3.0' implementation group: 'org.apache.hadoop', name: 'hadoop-mapreduce-client-core', version: '3.3.0' implementation group: 'org.apache.parquet', name: 'parquet-avro', version: '1.12.0' - implementation group: 'tech.allegro.schema.json2avro', name: 'converter', version: '0.2.10' + implementation('tech.allegro.schema.json2avro:converter') { + version { + branch = 'master' + } + } integrationTestJavaImplementation project(':airbyte-integrations:bases:standard-destination-test') integrationTestJavaImplementation project(':airbyte-integrations:connectors:destination-databricks') diff --git a/airbyte-integrations/connectors/destination-databricks/src/main/java/io/airbyte/integrations/destination/databricks/DatabricksStreamCopier.java b/airbyte-integrations/connectors/destination-databricks/src/main/java/io/airbyte/integrations/destination/databricks/DatabricksStreamCopier.java index f72c10ad6177c..3f58adbdc4637 100644 --- a/airbyte-integrations/connectors/destination-databricks/src/main/java/io/airbyte/integrations/destination/databricks/DatabricksStreamCopier.java +++ b/airbyte-integrations/connectors/destination-databricks/src/main/java/io/airbyte/integrations/destination/databricks/DatabricksStreamCopier.java @@ -90,7 +90,7 @@ public DatabricksStreamCopier(final String stagingFolder, s3Config.getBucketName(), s3Config.getBucketPath(), databricksConfig.getDatabaseSchema(), streamName); LOGGER.info("[Stream {}] Database schema: {}", streamName, schemaName); - LOGGER.info("[Stream {}] Parquet schema: {}", streamName, parquetWriter.getParquetSchema()); + LOGGER.info("[Stream {}] Parquet schema: {}", streamName, parquetWriter.getSchema()); LOGGER.info("[Stream {}] Tmp table {} location: {}", streamName, tmpTableName, tmpTableLocation); LOGGER.info("[Stream {}] Data table {} location: {}", streamName, destTableName, destTableLocation); diff --git a/airbyte-integrations/connectors/destination-databricks/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-databricks/src/main/resources/spec.json index 14e72127fe02a..4112fb86fd89e 100644 --- a/airbyte-integrations/connectors/destination-databricks/src/main/resources/spec.json +++ b/airbyte-integrations/connectors/destination-databricks/src/main/resources/spec.json @@ -84,7 +84,7 @@ "examples": ["airbyte.staging"] }, "s3_bucket_path": { - "Title": "S3 Bucket Path", + "title": "S3 Bucket Path", "type": "string", "description": "The directory under the S3 bucket where data will be written.", "examples": ["data_sync/test"] diff --git a/airbyte-integrations/connectors/destination-gcs/Dockerfile b/airbyte-integrations/connectors/destination-gcs/Dockerfile index f4141aa02fb79..af9f3aadbad4e 100644 --- a/airbyte-integrations/connectors/destination-gcs/Dockerfile +++ b/airbyte-integrations/connectors/destination-gcs/Dockerfile @@ -7,5 +7,5 @@ COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar RUN tar xf ${APPLICATION}.tar --strip-components=1 -LABEL io.airbyte.version=0.1.2 +LABEL io.airbyte.version=0.1.3 LABEL io.airbyte.name=airbyte/destination-gcs diff --git a/airbyte-integrations/connectors/destination-gcs/build.gradle b/airbyte-integrations/connectors/destination-gcs/build.gradle index bf27f8686edc6..36c46d80ffc5b 100644 --- a/airbyte-integrations/connectors/destination-gcs/build.gradle +++ b/airbyte-integrations/connectors/destination-gcs/build.gradle @@ -30,7 +30,11 @@ dependencies { implementation group: 'org.apache.hadoop', name: 'hadoop-aws', version: '3.3.0' implementation group: 'org.apache.hadoop', name: 'hadoop-mapreduce-client-core', version: '3.3.0' implementation group: 'org.apache.parquet', name: 'parquet-avro', version: '1.12.0' - implementation group: 'tech.allegro.schema.json2avro', name: 'converter', version: '0.2.10' + implementation('tech.allegro.schema.json2avro:converter') { + version { + branch = 'master' + } + } testImplementation 'org.apache.commons:commons-lang3:3.11' diff --git a/airbyte-integrations/connectors/destination-gcs/src/main/java/io/airbyte/integrations/destination/gcs/avro/GcsAvroWriter.java b/airbyte-integrations/connectors/destination-gcs/src/main/java/io/airbyte/integrations/destination/gcs/avro/GcsAvroWriter.java index f8b4dcaa1065d..49c87bd72e5d9 100644 --- a/airbyte-integrations/connectors/destination-gcs/src/main/java/io/airbyte/integrations/destination/gcs/avro/GcsAvroWriter.java +++ b/airbyte-integrations/connectors/destination-gcs/src/main/java/io/airbyte/integrations/destination/gcs/avro/GcsAvroWriter.java @@ -11,7 +11,6 @@ import io.airbyte.integrations.destination.gcs.writer.BaseGcsWriter; import io.airbyte.integrations.destination.s3.S3Format; import io.airbyte.integrations.destination.s3.avro.AvroRecordFactory; -import io.airbyte.integrations.destination.s3.avro.JsonFieldNameUpdater; import io.airbyte.integrations.destination.s3.avro.S3AvroFormatConfig; import io.airbyte.integrations.destination.s3.util.S3StreamTransferManagerHelper; import io.airbyte.integrations.destination.s3.writer.S3Writer; @@ -27,6 +26,7 @@ import org.apache.avro.generic.GenericDatumWriter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import tech.allegro.schema.json2avro.converter.JsonAvroConverter; public class GcsAvroWriter extends BaseGcsWriter implements S3Writer { @@ -42,7 +42,7 @@ public GcsAvroWriter(final GcsDestinationConfig config, final ConfiguredAirbyteStream configuredStream, final Timestamp uploadTimestamp, final Schema schema, - final JsonFieldNameUpdater nameUpdater) + final JsonAvroConverter converter) throws IOException { super(config, s3Client, configuredStream); @@ -52,7 +52,7 @@ public GcsAvroWriter(final GcsDestinationConfig config, LOGGER.info("Full GCS path for stream '{}': {}/{}", stream.getName(), config.getBucketName(), objectKey); - this.avroRecordFactory = new AvroRecordFactory(schema, nameUpdater); + this.avroRecordFactory = new AvroRecordFactory(schema, converter); this.uploadManager = S3StreamTransferManagerHelper.getDefault( config.getBucketName(), objectKey, s3Client, config.getFormatConfig().getPartSize()); // We only need one output stream as we only have one input stream. This is reasonably performant. diff --git a/airbyte-integrations/connectors/destination-gcs/src/main/java/io/airbyte/integrations/destination/gcs/parquet/GcsParquetWriter.java b/airbyte-integrations/connectors/destination-gcs/src/main/java/io/airbyte/integrations/destination/gcs/parquet/GcsParquetWriter.java index 536b014e93392..e72e3613108d3 100644 --- a/airbyte-integrations/connectors/destination-gcs/src/main/java/io/airbyte/integrations/destination/gcs/parquet/GcsParquetWriter.java +++ b/airbyte-integrations/connectors/destination-gcs/src/main/java/io/airbyte/integrations/destination/gcs/parquet/GcsParquetWriter.java @@ -5,16 +5,13 @@ package io.airbyte.integrations.destination.gcs.parquet; import com.amazonaws.services.s3.AmazonS3; -import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectWriter; -import com.fasterxml.jackson.databind.node.ObjectNode; -import io.airbyte.integrations.base.JavaBaseConstants; import io.airbyte.integrations.destination.gcs.GcsDestinationConfig; import io.airbyte.integrations.destination.gcs.credential.GcsHmacKeyCredentialConfig; import io.airbyte.integrations.destination.gcs.writer.BaseGcsWriter; import io.airbyte.integrations.destination.s3.S3Format; -import io.airbyte.integrations.destination.s3.avro.JsonFieldNameUpdater; +import io.airbyte.integrations.destination.s3.avro.AvroRecordFactory; import io.airbyte.integrations.destination.s3.parquet.S3ParquetFormatConfig; import io.airbyte.integrations.destination.s3.writer.S3Writer; import io.airbyte.protocol.models.AirbyteRecordMessage; @@ -42,21 +39,17 @@ public class GcsParquetWriter extends BaseGcsWriter implements S3Writer { private static final ObjectMapper MAPPER = new ObjectMapper(); private static final ObjectWriter WRITER = MAPPER.writer(); - private final Schema schema; - private final JsonFieldNameUpdater nameUpdater; private final ParquetWriter parquetWriter; - private final JsonAvroConverter converter = new JsonAvroConverter(); + private final AvroRecordFactory avroRecordFactory; public GcsParquetWriter(final GcsDestinationConfig config, final AmazonS3 s3Client, final ConfiguredAirbyteStream configuredStream, final Timestamp uploadTimestamp, final Schema schema, - final JsonFieldNameUpdater nameUpdater) + final JsonAvroConverter converter) throws URISyntaxException, IOException { super(config, s3Client, configuredStream); - this.schema = schema; - this.nameUpdater = nameUpdater; final String outputFilename = BaseGcsWriter.getOutputFilename(uploadTimestamp, S3Format.PARQUET); final String objectKey = String.join("/", outputPrefix, outputFilename); @@ -78,6 +71,7 @@ public GcsParquetWriter(final GcsDestinationConfig config, .withDictionaryPageSize(formatConfig.getDictionaryPageSize()) .withDictionaryEncoding(formatConfig.isDictionaryEncoding()) .build(); + this.avroRecordFactory = new AvroRecordFactory(schema, converter); } public static Configuration getHadoopConfig(final GcsDestinationConfig config) { @@ -99,16 +93,7 @@ public static Configuration getHadoopConfig(final GcsDestinationConfig config) { @Override public void write(final UUID id, final AirbyteRecordMessage recordMessage) throws IOException { - JsonNode inputData = recordMessage.getData(); - inputData = nameUpdater.getJsonWithStandardizedFieldNames(inputData); - - final ObjectNode jsonRecord = MAPPER.createObjectNode(); - jsonRecord.put(JavaBaseConstants.COLUMN_NAME_AB_ID, UUID.randomUUID().toString()); - jsonRecord.put(JavaBaseConstants.COLUMN_NAME_EMITTED_AT, recordMessage.getEmittedAt()); - jsonRecord.setAll((ObjectNode) inputData); - - final GenericData.Record avroRecord = converter.convertToGenericDataRecord(WRITER.writeValueAsBytes(jsonRecord), schema); - parquetWriter.write(avroRecord); + parquetWriter.write(avroRecordFactory.getAvroRecord(id, recordMessage)); } @Override diff --git a/airbyte-integrations/connectors/destination-gcs/src/main/java/io/airbyte/integrations/destination/gcs/writer/ProductionWriterFactory.java b/airbyte-integrations/connectors/destination-gcs/src/main/java/io/airbyte/integrations/destination/gcs/writer/ProductionWriterFactory.java index bf76b36d78513..45d1e334d2dcb 100644 --- a/airbyte-integrations/connectors/destination-gcs/src/main/java/io/airbyte/integrations/destination/gcs/writer/ProductionWriterFactory.java +++ b/airbyte-integrations/connectors/destination-gcs/src/main/java/io/airbyte/integrations/destination/gcs/writer/ProductionWriterFactory.java @@ -11,7 +11,7 @@ import io.airbyte.integrations.destination.gcs.jsonl.GcsJsonlWriter; import io.airbyte.integrations.destination.gcs.parquet.GcsParquetWriter; import io.airbyte.integrations.destination.s3.S3Format; -import io.airbyte.integrations.destination.s3.avro.JsonFieldNameUpdater; +import io.airbyte.integrations.destination.s3.avro.AvroConstants; import io.airbyte.integrations.destination.s3.avro.JsonToAvroSchemaConverter; import io.airbyte.integrations.destination.s3.writer.S3Writer; import io.airbyte.protocol.models.AirbyteStream; @@ -35,20 +35,17 @@ public S3Writer create(final GcsDestinationConfig config, if (format == S3Format.AVRO || format == S3Format.PARQUET) { final AirbyteStream stream = configuredStream.getStream(); + LOGGER.info("Json schema for stream {}: {}", stream.getName(), stream.getJsonSchema()); final JsonToAvroSchemaConverter schemaConverter = new JsonToAvroSchemaConverter(); final Schema avroSchema = schemaConverter.getAvroSchema(stream.getJsonSchema(), stream.getName(), stream.getNamespace(), true); - final JsonFieldNameUpdater nameUpdater = new JsonFieldNameUpdater(schemaConverter.getStandardizedNames()); - LOGGER.info("Paquet schema for stream {}: {}", stream.getName(), avroSchema.toString(false)); - if (nameUpdater.hasNameUpdate()) { - LOGGER.info("The following field names will be standardized: {}", nameUpdater); - } + LOGGER.info("Avro schema for stream {}: {}", stream.getName(), avroSchema.toString(false)); if (format == S3Format.AVRO) { - return new GcsAvroWriter(config, s3Client, configuredStream, uploadTimestamp, avroSchema, nameUpdater); + return new GcsAvroWriter(config, s3Client, configuredStream, uploadTimestamp, avroSchema, AvroConstants.JSON_CONVERTER); } else { - return new GcsParquetWriter(config, s3Client, configuredStream, uploadTimestamp, avroSchema, nameUpdater); + return new GcsParquetWriter(config, s3Client, configuredStream, uploadTimestamp, avroSchema, AvroConstants.JSON_CONVERTER); } } diff --git a/airbyte-integrations/connectors/destination-gcs/src/test-integration/java/io/airbyte/integrations/destination/gcs/GcsAvroDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-gcs/src/test-integration/java/io/airbyte/integrations/destination/gcs/GcsAvroDestinationAcceptanceTest.java index d8ead36743695..a62a4e7f0b2c0 100644 --- a/airbyte-integrations/connectors/destination-gcs/src/test-integration/java/io/airbyte/integrations/destination/gcs/GcsAvroDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-gcs/src/test-integration/java/io/airbyte/integrations/destination/gcs/GcsAvroDestinationAcceptanceTest.java @@ -10,6 +10,7 @@ import com.fasterxml.jackson.databind.ObjectReader; import io.airbyte.commons.json.Jsons; import io.airbyte.integrations.destination.s3.S3Format; +import io.airbyte.integrations.destination.s3.avro.AvroConstants; import io.airbyte.integrations.destination.s3.avro.JsonFieldNameUpdater; import io.airbyte.integrations.destination.s3.util.AvroRecordHelper; import java.util.LinkedList; @@ -19,12 +20,9 @@ import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericData.Record; import org.apache.avro.generic.GenericDatumReader; -import tech.allegro.schema.json2avro.converter.JsonAvroConverter; public class GcsAvroDestinationAcceptanceTest extends GcsDestinationAcceptanceTest { - private final JsonAvroConverter converter = new JsonAvroConverter(); - protected GcsAvroDestinationAcceptanceTest() { super(S3Format.AVRO); } @@ -56,7 +54,7 @@ protected List retrieveRecords(final TestDestinationEnv testEnv, final ObjectReader jsonReader = MAPPER.reader(); while (dataFileReader.hasNext()) { final GenericData.Record record = dataFileReader.next(); - final byte[] jsonBytes = converter.convertToJson(record); + final byte[] jsonBytes = AvroConstants.JSON_CONVERTER.convertToJson(record); JsonNode jsonRecord = jsonReader.readTree(jsonBytes); jsonRecord = nameUpdater.getJsonWithOriginalFieldNames(jsonRecord); jsonRecords.add(AvroRecordHelper.pruneAirbyteJson(jsonRecord)); diff --git a/airbyte-integrations/connectors/destination-gcs/src/test-integration/java/io/airbyte/integrations/destination/gcs/GcsDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-gcs/src/test-integration/java/io/airbyte/integrations/destination/gcs/GcsDestinationAcceptanceTest.java index 5072da17dd29f..13bc2ec2c3791 100644 --- a/airbyte-integrations/connectors/destination-gcs/src/test-integration/java/io/airbyte/integrations/destination/gcs/GcsDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-gcs/src/test-integration/java/io/airbyte/integrations/destination/gcs/GcsDestinationAcceptanceTest.java @@ -4,8 +4,6 @@ package io.airbyte.integrations.destination.gcs; -import static io.airbyte.integrations.destination.s3.S3DestinationConstants.NAME_TRANSFORMER; - import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.model.DeleteObjectsRequest.KeyVersion; import com.amazonaws.services.s3.model.S3ObjectSummary; @@ -15,6 +13,7 @@ import io.airbyte.commons.io.IOs; import io.airbyte.commons.jackson.MoreMappers; import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.destination.s3.S3DestinationConstants; import io.airbyte.integrations.destination.s3.S3Format; import io.airbyte.integrations.destination.s3.S3FormatConfig; import io.airbyte.integrations.destination.s3.util.S3OutputPathHelper; @@ -89,7 +88,7 @@ protected List getAllSyncedObjects(final String streamName, fin .listObjects(config.getBucketName(), outputPrefix) .getObjectSummaries() .stream() - .filter(o -> o.getKey().contains(NAME_TRANSFORMER.convertStreamName(streamName) + "/")) + .filter(o -> o.getKey().contains(S3DestinationConstants.NAME_TRANSFORMER.convertStreamName(streamName) + "/")) .sorted(Comparator.comparingLong(o -> o.getLastModified().getTime())) .collect(Collectors.toList()); LOGGER.info( diff --git a/airbyte-integrations/connectors/destination-gcs/src/test-integration/java/io/airbyte/integrations/destination/gcs/GcsParquetDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-gcs/src/test-integration/java/io/airbyte/integrations/destination/gcs/GcsParquetDestinationAcceptanceTest.java index f04c3bb3b3fb3..6db884528f298 100644 --- a/airbyte-integrations/connectors/destination-gcs/src/test-integration/java/io/airbyte/integrations/destination/gcs/GcsParquetDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-gcs/src/test-integration/java/io/airbyte/integrations/destination/gcs/GcsParquetDestinationAcceptanceTest.java @@ -11,6 +11,7 @@ import io.airbyte.commons.json.Jsons; import io.airbyte.integrations.destination.gcs.parquet.GcsParquetWriter; import io.airbyte.integrations.destination.s3.S3Format; +import io.airbyte.integrations.destination.s3.avro.AvroConstants; import io.airbyte.integrations.destination.s3.avro.JsonFieldNameUpdater; import io.airbyte.integrations.destination.s3.util.AvroRecordHelper; import java.io.IOException; @@ -22,12 +23,9 @@ import org.apache.hadoop.conf.Configuration; import org.apache.parquet.avro.AvroReadSupport; import org.apache.parquet.hadoop.ParquetReader; -import tech.allegro.schema.json2avro.converter.JsonAvroConverter; public class GcsParquetDestinationAcceptanceTest extends GcsDestinationAcceptanceTest { - private final JsonAvroConverter converter = new JsonAvroConverter(); - protected GcsParquetDestinationAcceptanceTest() { super(S3Format.PARQUET); } @@ -63,7 +61,7 @@ protected List retrieveRecords(final TestDestinationEnv testEnv, final ObjectReader jsonReader = MAPPER.reader(); GenericData.Record record; while ((record = parquetReader.read()) != null) { - final byte[] jsonBytes = converter.convertToJson(record); + final byte[] jsonBytes = AvroConstants.JSON_CONVERTER.convertToJson(record); JsonNode jsonRecord = jsonReader.readTree(jsonBytes); jsonRecord = nameUpdater.getJsonWithOriginalFieldNames(jsonRecord); jsonRecords.add(AvroRecordHelper.pruneAirbyteJson(jsonRecord)); diff --git a/airbyte-integrations/connectors/destination-s3/Dockerfile b/airbyte-integrations/connectors/destination-s3/Dockerfile index c27f60d88243e..aab1915a9fbdc 100644 --- a/airbyte-integrations/connectors/destination-s3/Dockerfile +++ b/airbyte-integrations/connectors/destination-s3/Dockerfile @@ -7,5 +7,5 @@ COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar RUN tar xf ${APPLICATION}.tar --strip-components=1 -LABEL io.airbyte.version=0.1.12 +LABEL io.airbyte.version=0.1.13 LABEL io.airbyte.name=airbyte/destination-s3 diff --git a/airbyte-integrations/connectors/destination-s3/build.gradle b/airbyte-integrations/connectors/destination-s3/build.gradle index 6900ba3e8112c..547e83765d541 100644 --- a/airbyte-integrations/connectors/destination-s3/build.gradle +++ b/airbyte-integrations/connectors/destination-s3/build.gradle @@ -26,7 +26,11 @@ dependencies { implementation group: 'org.apache.hadoop', name: 'hadoop-aws', version: '3.3.0' implementation group: 'org.apache.hadoop', name: 'hadoop-mapreduce-client-core', version: '3.3.0' implementation group: 'org.apache.parquet', name: 'parquet-avro', version: '1.12.0' - implementation group: 'tech.allegro.schema.json2avro', name: 'converter', version: '0.2.10' + implementation('tech.allegro.schema.json2avro:converter') { + version { + branch = 'master' + } + } testImplementation 'org.apache.commons:commons-lang3:3.11' diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/AvroConstants.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/AvroConstants.java new file mode 100644 index 0000000000000..50b9012fbbd99 --- /dev/null +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/AvroConstants.java @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.s3.avro; + +import java.util.Set; +import tech.allegro.schema.json2avro.converter.JsonAvroConverter; + +public class AvroConstants { + + // Field name with special character + public static final String DOC_KEY_VALUE_DELIMITER = ":"; + public static final String DOC_KEY_ORIGINAL_NAME = "_airbyte_original_name"; + + public static final String AVRO_EXTRA_PROPS_FIELD = "_airbyte_additional_properties"; + // This set must include _ab_additional_col in source_s3/source_files_abstract/stream.py + public static final Set JSON_EXTRA_PROPS_FIELDS = Set.of("_ab_additional_properties", AVRO_EXTRA_PROPS_FIELD); + public static final AvroNameTransformer NAME_TRANSFORMER = new AvroNameTransformer(); + public static final JsonAvroConverter JSON_CONVERTER = JsonAvroConverter.builder() + .setNameTransformer(NAME_TRANSFORMER::getIdentifier) + .setJsonAdditionalPropsFieldNames(JSON_EXTRA_PROPS_FIELDS) + .setAvroAdditionalPropsFieldName(AVRO_EXTRA_PROPS_FIELD) + .build(); + +} diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/S3NameTransformer.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/AvroNameTransformer.java similarity index 83% rename from airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/S3NameTransformer.java rename to airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/AvroNameTransformer.java index 936dc6b27c1ec..c1dc15a076d14 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/S3NameTransformer.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/AvroNameTransformer.java @@ -2,11 +2,11 @@ * Copyright (c) 2021 Airbyte, Inc., all rights reserved. */ -package io.airbyte.integrations.destination.s3; +package io.airbyte.integrations.destination.s3.avro; import io.airbyte.integrations.destination.ExtendedNameTransformer; -public class S3NameTransformer extends ExtendedNameTransformer { +public class AvroNameTransformer extends ExtendedNameTransformer { @Override protected String applyDefaultCase(final String input) { diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/AvroRecordFactory.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/AvroRecordFactory.java index 94611e32bcf4b..791df02105424 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/AvroRecordFactory.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/AvroRecordFactory.java @@ -5,7 +5,6 @@ package io.airbyte.integrations.destination.s3.avro; import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectWriter; import com.fasterxml.jackson.databind.node.ObjectNode; @@ -23,22 +22,18 @@ public class AvroRecordFactory { private static final ObjectWriter WRITER = MAPPER.writer(); private final Schema schema; - private final JsonFieldNameUpdater nameUpdater; - private final JsonAvroConverter converter = new JsonAvroConverter(); + private final JsonAvroConverter converter; - public AvroRecordFactory(final Schema schema, final JsonFieldNameUpdater nameUpdater) { + public AvroRecordFactory(final Schema schema, final JsonAvroConverter converter) { this.schema = schema; - this.nameUpdater = nameUpdater; + this.converter = converter; } public GenericData.Record getAvroRecord(final UUID id, final AirbyteRecordMessage recordMessage) throws JsonProcessingException { - JsonNode inputData = recordMessage.getData(); - inputData = nameUpdater.getJsonWithStandardizedFieldNames(inputData); - final ObjectNode jsonRecord = MAPPER.createObjectNode(); jsonRecord.put(JavaBaseConstants.COLUMN_NAME_AB_ID, id.toString()); jsonRecord.put(JavaBaseConstants.COLUMN_NAME_EMITTED_AT, recordMessage.getEmittedAt()); - jsonRecord.setAll((ObjectNode) inputData); + jsonRecord.setAll((ObjectNode) recordMessage.getData()); return converter.convertToGenericDataRecord(WRITER.writeValueAsBytes(jsonRecord), schema); } diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/JsonFieldNameUpdater.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/JsonFieldNameUpdater.java index e6386f2beb52f..1a4377b3bbc05 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/JsonFieldNameUpdater.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/JsonFieldNameUpdater.java @@ -10,8 +10,9 @@ import java.util.Map; /** - * This helper class tracks whether a Json has special field name that needs to be replaced with a - * standardized one, and can perform the replacement when necessary. + * This helper class is for testing only. It tracks the original and standardized names, and revert + * them when necessary, so that the tests can correctly compare the generated json with the original + * input. */ public class JsonFieldNameUpdater { @@ -22,23 +23,8 @@ public JsonFieldNameUpdater(final Map standardizedNames) { this.standardizedNames = ImmutableMap.copyOf(standardizedNames); } - public boolean hasNameUpdate() { - return standardizedNames.size() > 0; - } - - public JsonNode getJsonWithStandardizedFieldNames(final JsonNode input) { - if (!hasNameUpdate()) { - return input; - } - String jsonString = Jsons.serialize(input); - for (final Map.Entry entry : standardizedNames.entrySet()) { - jsonString = jsonString.replaceAll(quote(entry.getKey()), quote(entry.getValue())); - } - return Jsons.deserialize(jsonString); - } - public JsonNode getJsonWithOriginalFieldNames(final JsonNode input) { - if (!hasNameUpdate()) { + if (standardizedNames.size() == 0) { return input; } String jsonString = Jsons.serialize(input); diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/JsonToAvroSchemaConverter.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/JsonToAvroSchemaConverter.java index dd271277b482e..7280b89cbc59a 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/JsonToAvroSchemaConverter.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/JsonToAvroSchemaConverter.java @@ -9,7 +9,6 @@ import com.google.common.base.Preconditions; import io.airbyte.commons.util.MoreIterators; import io.airbyte.integrations.base.JavaBaseConstants; -import io.airbyte.integrations.destination.s3.S3NameTransformer; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; @@ -21,28 +20,28 @@ import javax.annotation.Nullable; import org.apache.avro.LogicalTypes; import org.apache.avro.Schema; -import org.apache.avro.Schema.Type; import org.apache.avro.SchemaBuilder; import org.apache.avro.SchemaBuilder.RecordBuilder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import tech.allegro.schema.json2avro.converter.AdditionalPropertyField; /** * The main function of this class is to convert a JsonSchema to Avro schema. It can also * standardize schema names, and keep track of a mapping from the original names to the standardized - * ones. + * ones, which is needed for unit tests. *

* For limitations of this converter, see the README of this connector: * https://docs.airbyte.io/integrations/destinations/s3#avro */ public class JsonToAvroSchemaConverter { - public static final Schema UUID_SCHEMA = LogicalTypes.uuid() - .addToSchema(Schema.create(Type.STRING)); + private static final Schema UUID_SCHEMA = LogicalTypes.uuid() + .addToSchema(Schema.create(Schema.Type.STRING)); + private static final Schema NULL_SCHEMA = Schema.create(Schema.Type.NULL); private static final Logger LOGGER = LoggerFactory.getLogger(JsonToAvroSchemaConverter.class); private static final Schema TIMESTAMP_MILLIS_SCHEMA = LogicalTypes.timestampMillis() - .addToSchema(Schema.create(Type.LONG)); - private static final S3NameTransformer NAME_TRANSFORMER = new S3NameTransformer(); + .addToSchema(Schema.create(Schema.Type.LONG)); private final Map standardizedNames = new HashMap<>(); @@ -99,7 +98,7 @@ public Schema getAvroSchema(final JsonNode jsonSchema, final String name, @Nullable final String namespace, final boolean appendAirbyteFields) { - final String stdName = NAME_TRANSFORMER.getIdentifier(name); + final String stdName = AvroConstants.NAME_TRANSFORMER.getIdentifier(name); RecordBuilder builder = SchemaBuilder.record(stdName); if (!stdName.equals(name)) { standardizedNames.put(name, stdName); @@ -107,8 +106,8 @@ public Schema getAvroSchema(final JsonNode jsonSchema, stdName); builder = builder.doc( String.format("%s%s%s", - S3AvroConstants.DOC_KEY_ORIGINAL_NAME, - S3AvroConstants.DOC_KEY_VALUE_DELIMITER, + AvroConstants.DOC_KEY_ORIGINAL_NAME, + AvroConstants.DOC_KEY_VALUE_DELIMITER, name)); } if (namespace != null) { @@ -116,7 +115,11 @@ public Schema getAvroSchema(final JsonNode jsonSchema, } final JsonNode properties = jsonSchema.get("properties"); - final List fieldNames = new ArrayList<>(MoreIterators.toList(properties.fieldNames())); + // object field with no "properties" will be handled by the default additional properties + // field during object conversion; so it is fine if there is no "properties" + final List fieldNames = properties == null + ? Collections.emptyList() + : new ArrayList<>(MoreIterators.toList(properties.fieldNames())); SchemaBuilder.FieldAssembler assembler = builder.fields(); @@ -127,7 +130,13 @@ public Schema getAvroSchema(final JsonNode jsonSchema, } for (final String fieldName : fieldNames) { - final String stdFieldName = NAME_TRANSFORMER.getIdentifier(fieldName); + // ignore additional properties fields, which will be consolidated + // into one field at the end + if (AvroConstants.JSON_EXTRA_PROPS_FIELDS.contains(fieldName)) { + continue; + } + + final String stdFieldName = AvroConstants.NAME_TRANSFORMER.getIdentifier(fieldName); final JsonNode fieldDefinition = properties.get(fieldName); SchemaBuilder.FieldBuilder fieldBuilder = assembler.name(stdFieldName); if (!stdFieldName.equals(fieldName)) { @@ -135,14 +144,18 @@ public Schema getAvroSchema(final JsonNode jsonSchema, LOGGER.warn("Field name contains illegal character(s) and is standardized: {} -> {}", fieldName, stdFieldName); fieldBuilder = fieldBuilder.doc(String.format("%s%s%s", - S3AvroConstants.DOC_KEY_ORIGINAL_NAME, - S3AvroConstants.DOC_KEY_VALUE_DELIMITER, + AvroConstants.DOC_KEY_ORIGINAL_NAME, + AvroConstants.DOC_KEY_VALUE_DELIMITER, fieldName)); } assembler = fieldBuilder.type(getNullableFieldTypes(fieldName, fieldDefinition)) .withDefault(null); } + // support additional properties in one field + assembler = assembler.name(AvroConstants.AVRO_EXTRA_PROPS_FIELD) + .type(AdditionalPropertyField.FIELD_SCHEMA).withDefault(null); + return assembler.endRecord(); } @@ -150,6 +163,12 @@ Schema getSingleFieldType(final String fieldName, final JsonSchemaType fieldType Preconditions .checkState(fieldType != JsonSchemaType.NULL, "Null types should have been filtered out"); + // the additional properties fields are filtered out and never passed into this method; + // but this method is able to handle them for completeness + if (AvroConstants.JSON_EXTRA_PROPS_FIELDS.contains(fieldName)) { + return AdditionalPropertyField.FIELD_SCHEMA; + } + final Schema fieldSchema; switch (fieldType) { case STRING, NUMBER, INTEGER, BOOLEAN -> fieldSchema = Schema.create(fieldType.getAvroType()); @@ -166,7 +185,7 @@ Schema getSingleFieldType(final String fieldName, final JsonSchemaType fieldType fieldSchema = Schema.createArray(getNullableFieldTypes(String.format("%s.items", fieldName), items)); } else if (items.isArray()) { final List arrayElementTypes = getSchemasFromTypes(fieldName, (ArrayNode) items); - arrayElementTypes.add(0, Schema.create(Type.NULL)); + arrayElementTypes.add(0, NULL_SCHEMA); fieldSchema = Schema.createArray(Schema.createUnion(arrayElementTypes)); } else { throw new IllegalStateException( @@ -217,7 +236,9 @@ Schema getNullableFieldTypes(final String fieldName, final JsonNode fieldDefinit return Schema.create(Schema.Type.NULL); } else { // Mark every field as nullable to prevent missing value exceptions from Avro / Parquet. - nonNullFieldTypes.add(0, Schema.create(Schema.Type.NULL)); + if (!nonNullFieldTypes.contains(NULL_SCHEMA)) { + nonNullFieldTypes.add(0, NULL_SCHEMA); + } return Schema.createUnion(nonNullFieldTypes); } } diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/S3AvroConstants.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/S3AvroConstants.java deleted file mode 100644 index 72be89baa4d80..0000000000000 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/S3AvroConstants.java +++ /dev/null @@ -1,13 +0,0 @@ -/* - * Copyright (c) 2021 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.integrations.destination.s3.avro; - -public class S3AvroConstants { - - // Field name with special character - public static final String DOC_KEY_VALUE_DELIMITER = ":"; - public static final String DOC_KEY_ORIGINAL_NAME = "_airbyte_original_name"; - -} diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/S3AvroWriter.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/S3AvroWriter.java index e9e379d97e40a..7448a48b79084 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/S3AvroWriter.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/S3AvroWriter.java @@ -24,6 +24,7 @@ import org.apache.avro.generic.GenericDatumWriter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import tech.allegro.schema.json2avro.converter.JsonAvroConverter; public class S3AvroWriter extends BaseS3Writer implements S3Writer { @@ -39,7 +40,7 @@ public S3AvroWriter(final S3DestinationConfig config, final ConfiguredAirbyteStream configuredStream, final Timestamp uploadTimestamp, final Schema schema, - final JsonFieldNameUpdater nameUpdater) + final JsonAvroConverter converter) throws IOException { super(config, s3Client, configuredStream); @@ -49,7 +50,7 @@ public S3AvroWriter(final S3DestinationConfig config, LOGGER.info("Full S3 path for stream '{}': s3://{}/{}", stream.getName(), config.getBucketName(), objectKey); - this.avroRecordFactory = new AvroRecordFactory(schema, nameUpdater); + this.avroRecordFactory = new AvroRecordFactory(schema, converter); this.uploadManager = S3StreamTransferManagerHelper.getDefault( config.getBucketName(), objectKey, s3Client, config.getFormatConfig().getPartSize()); // We only need one output stream as we only have one input stream. This is reasonably performant. diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/parquet/S3ParquetWriter.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/parquet/S3ParquetWriter.java index 88f658758541f..65afc0805f14b 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/parquet/S3ParquetWriter.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/parquet/S3ParquetWriter.java @@ -8,7 +8,6 @@ import io.airbyte.integrations.destination.s3.S3DestinationConfig; import io.airbyte.integrations.destination.s3.S3Format; import io.airbyte.integrations.destination.s3.avro.AvroRecordFactory; -import io.airbyte.integrations.destination.s3.avro.JsonFieldNameUpdater; import io.airbyte.integrations.destination.s3.writer.BaseS3Writer; import io.airbyte.integrations.destination.s3.writer.S3Writer; import io.airbyte.protocol.models.AirbyteRecordMessage; @@ -29,6 +28,7 @@ import org.apache.parquet.hadoop.util.HadoopOutputFile; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import tech.allegro.schema.json2avro.converter.JsonAvroConverter; public class S3ParquetWriter extends BaseS3Writer implements S3Writer { @@ -36,7 +36,7 @@ public class S3ParquetWriter extends BaseS3Writer implements S3Writer { private final ParquetWriter parquetWriter; private final AvroRecordFactory avroRecordFactory; - private final Schema parquetSchema; + private final Schema schema; private final String outputFilename; public S3ParquetWriter(final S3DestinationConfig config, @@ -44,7 +44,7 @@ public S3ParquetWriter(final S3DestinationConfig config, final ConfiguredAirbyteStream configuredStream, final Timestamp uploadTimestamp, final Schema schema, - final JsonFieldNameUpdater nameUpdater) + final JsonAvroConverter converter) throws URISyntaxException, IOException { super(config, s3Client, configuredStream); @@ -69,8 +69,8 @@ public S3ParquetWriter(final S3DestinationConfig config, .withDictionaryPageSize(formatConfig.getDictionaryPageSize()) .withDictionaryEncoding(formatConfig.isDictionaryEncoding()) .build(); - this.avroRecordFactory = new AvroRecordFactory(schema, nameUpdater); - this.parquetSchema = schema; + this.avroRecordFactory = new AvroRecordFactory(schema, converter); + this.schema = schema; } public static Configuration getHadoopConfig(final S3DestinationConfig config) { @@ -88,8 +88,8 @@ public static Configuration getHadoopConfig(final S3DestinationConfig config) { return hadoopConfig; } - public Schema getParquetSchema() { - return parquetSchema; + public Schema getSchema() { + return schema; } /** diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/writer/ProductionWriterFactory.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/writer/ProductionWriterFactory.java index f91c6d244cf5b..39041093271c6 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/writer/ProductionWriterFactory.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/writer/ProductionWriterFactory.java @@ -7,7 +7,7 @@ import com.amazonaws.services.s3.AmazonS3; import io.airbyte.integrations.destination.s3.S3DestinationConfig; import io.airbyte.integrations.destination.s3.S3Format; -import io.airbyte.integrations.destination.s3.avro.JsonFieldNameUpdater; +import io.airbyte.integrations.destination.s3.avro.AvroConstants; import io.airbyte.integrations.destination.s3.avro.JsonToAvroSchemaConverter; import io.airbyte.integrations.destination.s3.avro.S3AvroWriter; import io.airbyte.integrations.destination.s3.csv.S3CsvWriter; @@ -34,19 +34,17 @@ public S3Writer create(final S3DestinationConfig config, if (format == S3Format.AVRO || format == S3Format.PARQUET) { final AirbyteStream stream = configuredStream.getStream(); + LOGGER.info("Json schema for stream {}: {}", stream.getName(), stream.getJsonSchema()); + final JsonToAvroSchemaConverter schemaConverter = new JsonToAvroSchemaConverter(); final Schema avroSchema = schemaConverter.getAvroSchema(stream.getJsonSchema(), stream.getName(), stream.getNamespace(), true); - final JsonFieldNameUpdater nameUpdater = new JsonFieldNameUpdater(schemaConverter.getStandardizedNames()); LOGGER.info("Avro schema for stream {}: {}", stream.getName(), avroSchema.toString(false)); - if (nameUpdater.hasNameUpdate()) { - LOGGER.info("The following field names will be standardized: {}", nameUpdater); - } if (format == S3Format.AVRO) { - return new S3AvroWriter(config, s3Client, configuredStream, uploadTimestamp, avroSchema, nameUpdater); + return new S3AvroWriter(config, s3Client, configuredStream, uploadTimestamp, avroSchema, AvroConstants.JSON_CONVERTER); } else { - return new S3ParquetWriter(config, s3Client, configuredStream, uploadTimestamp, avroSchema, nameUpdater); + return new S3ParquetWriter(config, s3Client, configuredStream, uploadTimestamp, avroSchema, AvroConstants.JSON_CONVERTER); } } diff --git a/airbyte-integrations/connectors/destination-s3/src/test-integration/java/io/airbyte/integrations/destination/s3/S3AvroDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-s3/src/test-integration/java/io/airbyte/integrations/destination/s3/S3AvroDestinationAcceptanceTest.java index 9352a73750fdc..9626c7ed5f8d0 100644 --- a/airbyte-integrations/connectors/destination-s3/src/test-integration/java/io/airbyte/integrations/destination/s3/S3AvroDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-s3/src/test-integration/java/io/airbyte/integrations/destination/s3/S3AvroDestinationAcceptanceTest.java @@ -9,6 +9,7 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectReader; import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.destination.s3.avro.AvroConstants; import io.airbyte.integrations.destination.s3.avro.JsonFieldNameUpdater; import io.airbyte.integrations.destination.s3.util.AvroRecordHelper; import java.util.LinkedList; @@ -22,8 +23,6 @@ public class S3AvroDestinationAcceptanceTest extends S3DestinationAcceptanceTest { - private final JsonAvroConverter converter = new JsonAvroConverter(); - protected S3AvroDestinationAcceptanceTest() { super(S3Format.AVRO); } @@ -55,7 +54,7 @@ protected List retrieveRecords(final TestDestinationEnv testEnv, final ObjectReader jsonReader = MAPPER.reader(); while (dataFileReader.hasNext()) { final GenericData.Record record = dataFileReader.next(); - final byte[] jsonBytes = converter.convertToJson(record); + final byte[] jsonBytes = AvroConstants.JSON_CONVERTER.convertToJson(record); JsonNode jsonRecord = jsonReader.readTree(jsonBytes); jsonRecord = nameUpdater.getJsonWithOriginalFieldNames(jsonRecord); jsonRecords.add(AvroRecordHelper.pruneAirbyteJson(jsonRecord)); diff --git a/airbyte-integrations/connectors/destination-s3/src/test-integration/java/io/airbyte/integrations/destination/s3/S3DestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-s3/src/test-integration/java/io/airbyte/integrations/destination/s3/S3DestinationAcceptanceTest.java index 8d555fd27738e..e382c3b634a60 100644 --- a/airbyte-integrations/connectors/destination-s3/src/test-integration/java/io/airbyte/integrations/destination/s3/S3DestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-s3/src/test-integration/java/io/airbyte/integrations/destination/s3/S3DestinationAcceptanceTest.java @@ -4,8 +4,6 @@ package io.airbyte.integrations.destination.s3; -import static io.airbyte.integrations.destination.s3.S3DestinationConstants.NAME_TRANSFORMER; - import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.model.DeleteObjectsRequest; import com.amazonaws.services.s3.model.DeleteObjectsRequest.KeyVersion; @@ -17,6 +15,7 @@ import io.airbyte.commons.io.IOs; import io.airbyte.commons.jackson.MoreMappers; import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.destination.s3.avro.AvroConstants; import io.airbyte.integrations.destination.s3.util.S3OutputPathHelper; import io.airbyte.integrations.standardtest.destination.DestinationAcceptanceTest; import java.nio.file.Path; @@ -88,7 +87,7 @@ protected List getAllSyncedObjects(final String streamName, fin .listObjects(config.getBucketName(), outputPrefix) .getObjectSummaries() .stream() - .filter(o -> o.getKey().contains(NAME_TRANSFORMER.convertStreamName(streamName) + "/")) + .filter(o -> o.getKey().contains(AvroConstants.NAME_TRANSFORMER.convertStreamName(streamName) + "/")) .sorted(Comparator.comparingLong(o -> o.getLastModified().getTime())) .collect(Collectors.toList()); LOGGER.info( diff --git a/airbyte-integrations/connectors/destination-s3/src/test-integration/java/io/airbyte/integrations/destination/s3/S3ParquetDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-s3/src/test-integration/java/io/airbyte/integrations/destination/s3/S3ParquetDestinationAcceptanceTest.java index f20b01623ecdf..bec71d1883ab3 100644 --- a/airbyte-integrations/connectors/destination-s3/src/test-integration/java/io/airbyte/integrations/destination/s3/S3ParquetDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-s3/src/test-integration/java/io/airbyte/integrations/destination/s3/S3ParquetDestinationAcceptanceTest.java @@ -9,6 +9,7 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectReader; import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.destination.s3.avro.AvroConstants; import io.airbyte.integrations.destination.s3.avro.JsonFieldNameUpdater; import io.airbyte.integrations.destination.s3.parquet.S3ParquetWriter; import io.airbyte.integrations.destination.s3.util.AvroRecordHelper; @@ -21,12 +22,9 @@ import org.apache.hadoop.conf.Configuration; import org.apache.parquet.avro.AvroReadSupport; import org.apache.parquet.hadoop.ParquetReader; -import tech.allegro.schema.json2avro.converter.JsonAvroConverter; public class S3ParquetDestinationAcceptanceTest extends S3DestinationAcceptanceTest { - private final JsonAvroConverter converter = new JsonAvroConverter(); - protected S3ParquetDestinationAcceptanceTest() { super(S3Format.PARQUET); } @@ -62,7 +60,7 @@ protected List retrieveRecords(final TestDestinationEnv testEnv, final ObjectReader jsonReader = MAPPER.reader(); GenericData.Record record; while ((record = parquetReader.read()) != null) { - final byte[] jsonBytes = converter.convertToJson(record); + final byte[] jsonBytes = AvroConstants.JSON_CONVERTER.convertToJson(record); JsonNode jsonRecord = jsonReader.readTree(jsonBytes); jsonRecord = nameUpdater.getJsonWithOriginalFieldNames(jsonRecord); jsonRecords.add(AvroRecordHelper.pruneAirbyteJson(jsonRecord)); diff --git a/airbyte-integrations/connectors/destination-s3/src/test/java/io/airbyte/integrations/destination/s3/avro/JsonFieldNameUpdaterTest.java b/airbyte-integrations/connectors/destination-s3/src/test/java/io/airbyte/integrations/destination/s3/avro/JsonFieldNameUpdaterTest.java index d92a90e768de3..772a6110ef20e 100644 --- a/airbyte-integrations/connectors/destination-s3/src/test/java/io/airbyte/integrations/destination/s3/avro/JsonFieldNameUpdaterTest.java +++ b/airbyte-integrations/connectors/destination-s3/src/test/java/io/airbyte/integrations/destination/s3/avro/JsonFieldNameUpdaterTest.java @@ -29,7 +29,6 @@ public void testFieldNameUpdate() throws IOException { final JsonNode original = testCase.get("original"); final JsonNode updated = testCase.get("updated"); - assertEquals(updated, nameUpdater.getJsonWithStandardizedFieldNames(original)); assertEquals(original, nameUpdater.getJsonWithOriginalFieldNames(updated)); } } diff --git a/airbyte-integrations/connectors/destination-s3/src/test/java/io/airbyte/integrations/destination/s3/avro/JsonToAvroSchemaConverterTest.java b/airbyte-integrations/connectors/destination-s3/src/test/java/io/airbyte/integrations/destination/s3/avro/JsonToAvroConverterTest.java similarity index 60% rename from airbyte-integrations/connectors/destination-s3/src/test/java/io/airbyte/integrations/destination/s3/avro/JsonToAvroSchemaConverterTest.java rename to airbyte-integrations/connectors/destination-s3/src/test/java/io/airbyte/integrations/destination/s3/avro/JsonToAvroConverterTest.java index 8f5420d11929a..4c7d6eef6a6ba 100644 --- a/airbyte-integrations/connectors/destination-s3/src/test/java/io/airbyte/integrations/destination/s3/avro/JsonToAvroSchemaConverterTest.java +++ b/airbyte-integrations/connectors/destination-s3/src/test/java/io/airbyte/integrations/destination/s3/avro/JsonToAvroConverterTest.java @@ -8,12 +8,16 @@ import static org.junit.jupiter.api.Assertions.assertTrue; import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectWriter; import com.google.common.collect.Lists; +import io.airbyte.commons.jackson.MoreMappers; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.resources.MoreResources; import io.airbyte.commons.util.MoreIterators; import java.util.Collections; import java.util.stream.Stream; +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericData; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtensionContext; import org.junit.jupiter.params.ParameterizedTest; @@ -21,7 +25,10 @@ import org.junit.jupiter.params.provider.ArgumentsProvider; import org.junit.jupiter.params.provider.ArgumentsSource; -class JsonToAvroSchemaConverterTest { +class JsonToAvroConverterTest { + + private static final ObjectWriter WRITER = MoreMappers.initMapper().writer(); + private static final JsonToAvroSchemaConverter SCHEMA_CONVERTER = new JsonToAvroSchemaConverter(); @Test public void testGetSingleTypes() { @@ -55,7 +62,7 @@ public static class GetFieldTypeTestCaseProvider implements ArgumentsProvider { @Override public Stream provideArguments(final ExtensionContext context) throws Exception { - final JsonNode testCases = Jsons.deserialize(MoreResources.readResource("parquet/json_schema_converter/get_field_type.json")); + final JsonNode testCases = Jsons.deserialize(MoreResources.readResource("parquet/json_schema_converter/type_conversion_test_cases.json")); return MoreIterators.toList(testCases.elements()).stream().map(testCase -> Arguments.of( testCase.get("fieldName").asText(), testCase.get("jsonFieldSchema"), @@ -66,11 +73,10 @@ public Stream provideArguments(final ExtensionContext conte @ParameterizedTest @ArgumentsSource(GetFieldTypeTestCaseProvider.class) - public void testGetFieldType(final String fieldName, final JsonNode jsonFieldSchema, final JsonNode avroFieldType) { - final JsonToAvroSchemaConverter converter = new JsonToAvroSchemaConverter(); + public void testFieldTypeConversion(final String fieldName, final JsonNode jsonFieldSchema, final JsonNode avroFieldType) { assertEquals( avroFieldType, - Jsons.deserialize(converter.getNullableFieldTypes(fieldName, jsonFieldSchema).toString()), + Jsons.deserialize(SCHEMA_CONVERTER.getNullableFieldTypes(fieldName, jsonFieldSchema).toString()), String.format("Test for %s failed", fieldName)); } @@ -78,30 +84,45 @@ public static class GetAvroSchemaTestCaseProvider implements ArgumentsProvider { @Override public Stream provideArguments(final ExtensionContext context) throws Exception { - final JsonNode testCases = Jsons.deserialize(MoreResources.readResource("parquet/json_schema_converter/get_avro_schema.json")); + final JsonNode testCases = Jsons.deserialize(MoreResources.readResource("parquet/json_schema_converter/json_conversion_test_cases.json")); return MoreIterators.toList(testCases.elements()).stream().map(testCase -> Arguments.of( testCase.get("schemaName").asText(), testCase.get("namespace").asText(), testCase.get("appendAirbyteFields").asBoolean(), testCase.get("jsonSchema"), - testCase.get("avroSchema"))); + testCase.get("jsonObject"), + testCase.get("avroSchema"), + testCase.get("avroObject"))); } } + /** + * This test verifies both the schema and object conversion. + */ @ParameterizedTest @ArgumentsSource(GetAvroSchemaTestCaseProvider.class) - public void testGetAvroSchema( - final String schemaName, - final String namespace, - final boolean appendAirbyteFields, - final JsonNode jsonSchema, - final JsonNode avroSchema) { - final JsonToAvroSchemaConverter converter = new JsonToAvroSchemaConverter(); + public void testJsonAvroConversion(final String schemaName, + final String namespace, + final boolean appendAirbyteFields, + final JsonNode jsonSchema, + final JsonNode jsonObject, + final JsonNode avroSchema, + final JsonNode avroObject) throws Exception { + final Schema actualAvroSchema = SCHEMA_CONVERTER.getAvroSchema(jsonSchema, schemaName, namespace, appendAirbyteFields); assertEquals( avroSchema, - Jsons.deserialize(converter.getAvroSchema(jsonSchema, schemaName, namespace, appendAirbyteFields).toString()), - String.format("Test for %s failed", schemaName)); + Jsons.deserialize(actualAvroSchema.toString()), + String.format("Schema conversion for %s failed", schemaName)); + + final Schema.Parser schemaParser = new Schema.Parser(); + final GenericData.Record actualAvroObject = AvroConstants.JSON_CONVERTER.convertToGenericDataRecord( + WRITER.writeValueAsBytes(jsonObject), + schemaParser.parse(Jsons.serialize(avroSchema))); + assertEquals( + avroObject, + Jsons.deserialize(actualAvroObject.toString()), + String.format("Object conversion for %s failed", schemaName)); } } diff --git a/airbyte-integrations/connectors/destination-s3/src/test/resources/parquet/json_schema_converter/get_avro_schema.json b/airbyte-integrations/connectors/destination-s3/src/test/resources/parquet/json_schema_converter/json_conversion_test_cases.json similarity index 51% rename from airbyte-integrations/connectors/destination-s3/src/test/resources/parquet/json_schema_converter/get_avro_schema.json rename to airbyte-integrations/connectors/destination-s3/src/test/resources/parquet/json_schema_converter/json_conversion_test_cases.json index 77a415baa2777..212ec9959fba1 100644 --- a/airbyte-integrations/connectors/destination-s3/src/test/resources/parquet/json_schema_converter/get_avro_schema.json +++ b/airbyte-integrations/connectors/destination-s3/src/test/resources/parquet/json_schema_converter/json_conversion_test_cases.json @@ -11,6 +11,9 @@ } } }, + "jsonObject": { + "node_id": "abc123" + }, "avroSchema": { "type": "record", "name": "simple_schema", @@ -20,8 +23,20 @@ "name": "node_id", "type": ["null", "string"], "default": null + }, + { + "name":"_airbyte_additional_properties", + "type":[ + "null", + { "type":"map", "values":"string" } + ], + "default":null } ] + }, + "avroObject": { + "node_id": "abc123", + "_airbyte_additional_properties": null } }, { @@ -47,6 +62,13 @@ } } }, + "jsonObject": { + "node_id": "abc123", + "user": { + "first_name": "charles", + "last_name": "darwin" + } + }, "avroSchema": { "type": "record", "name": "nested_record", @@ -75,13 +97,38 @@ "name": "last_name", "type": ["null", "string"], "default": null + }, + { + "name":"_airbyte_additional_properties", + "type":[ + "null", + { "type":"map", "values":"string" } + ], + "default":null } ] } ], "default": null + }, + { + "name":"_airbyte_additional_properties", + "type":[ + "null", + { "type":"map", "values":"string" } + ], + "default":null } ] + }, + "avroObject": { + "node_id": "abc123", + "user": { + "first_name": "charles", + "last_name": "darwin", + "_airbyte_additional_properties": null + }, + "_airbyte_additional_properties": null } }, { @@ -96,6 +143,11 @@ } } }, + "jsonObject": { + "_airbyte_ab_id": "752fcd83-7e46-41da-b7ff-f05cb070c893", + "_airbyte_emitted_at": 1634982000, + "node_id": "abc123" + }, "avroSchema": { "type": "record", "name": "record_with_airbyte_fields", @@ -119,8 +171,22 @@ "name": "node_id", "type": ["null", "string"], "default": null + }, + { + "name":"_airbyte_additional_properties", + "type":[ + "null", + { "type":"map", "values":"string" } + ], + "default": null } ] + }, + "avroObject": { + "_airbyte_ab_id": "752fcd83-7e46-41da-b7ff-f05cb070c893", + "_airbyte_emitted_at": 1634982000, + "node_id": "abc123", + "_airbyte_additional_properties": null } }, { @@ -135,6 +201,9 @@ } } }, + "jsonObject": { + "node:id": "abc123" + }, "avroSchema": { "type": "record", "name": "name_with_special_characters", @@ -146,8 +215,20 @@ "doc": "_airbyte_original_name:node:id", "type": ["null", "string"], "default": null + }, + { + "name":"_airbyte_additional_properties", + "type":[ + "null", + { "type":"map", "values":"string" } + ], + "default":null } ] + }, + "avroObject": { + "node_id": "abc123", + "_airbyte_additional_properties": null } }, { @@ -162,6 +243,9 @@ } } }, + "jsonObject": { + "identifier": 65536.0 + }, "avroSchema": { "type": "record", "name": "record_with_union_type", @@ -171,8 +255,20 @@ "name": "identifier", "type": ["null", "double", "string"], "default": null + }, + { + "name":"_airbyte_additional_properties", + "type":[ + "null", + { "type":"map", "values":"string" } + ], + "default":null } ] + }, + "avroObject": { + "identifier": 65536.0, + "_airbyte_additional_properties": null } }, { @@ -190,6 +286,9 @@ } } }, + "jsonObject": { + "identifier": ["151", "152"] + }, "avroSchema": { "type": "record", "name": "array_with_same_type", @@ -205,8 +304,20 @@ } ], "default": null + }, + { + "name":"_airbyte_additional_properties", + "type":[ + "null", + { "type":"map", "values":"string" } + ], + "default":null } ] + }, + "avroObject": { + "identifier": ["151", "152"], + "_airbyte_additional_properties": null } }, { @@ -235,6 +346,9 @@ } } }, + "jsonObject": { + "identifiers": ["151", 152, true, false] + }, "avroSchema": { "type": "record", "name": "array_with_union_type", @@ -250,8 +364,20 @@ } ], "default": null + }, + { + "name":"_airbyte_additional_properties", + "type":[ + "null", + { "type":"map", "values":"string" } + ], + "default":null } ] + }, + "avroObject": { + "identifiers": ["151", 152, true, false], + "_airbyte_additional_properties": null } }, { @@ -276,6 +402,9 @@ } } }, + "jsonObject": { + "created_at": 1634982000 + }, "avroSchema": { "type": "record", "name": "field_with_combined_restriction", @@ -285,8 +414,20 @@ "name": "created_at", "type": ["null", "string", "int"], "default": null + }, + { + "name":"_airbyte_additional_properties", + "type":[ + "null", + { "type":"map", "values":"string" } + ], + "default":null } ] + }, + "avroObject": { + "created_at": 1634982000, + "_airbyte_additional_properties": null } }, { @@ -316,6 +457,11 @@ } } }, + "jsonObject": { + "user": { + "created_at": "1634982000" + } + }, "avroSchema": { "type": "record", "name": "record_with_combined_restriction_field", @@ -334,13 +480,36 @@ "name": "created_at", "type": ["null", "string", "int"], "default": null + }, + { + "name":"_airbyte_additional_properties", + "type":[ + "null", + { "type":"map", "values":"string" } + ], + "default":null } ] } ], "default": null + }, + { + "name":"_airbyte_additional_properties", + "type":[ + "null", + { "type":"map", "values":"string" } + ], + "default":null } ] + }, + "avroObject": { + "user": { + "created_at": "1634982000", + "_airbyte_additional_properties": null + }, + "_airbyte_additional_properties": null } }, { @@ -362,6 +531,9 @@ } } }, + "jsonObject": { + "identifiers": [151, 152, "153", true, false] + }, "avroSchema": { "type": "record", "name": "array_with_combined_restriction_field", @@ -377,8 +549,20 @@ } ], "default": null + }, + { + "name":"_airbyte_additional_properties", + "type":[ + "null", + { "type":"map", "values":"string" } + ], + "default":null } ] + }, + "avroObject": { + "identifiers": [151, 152, "153", true, false], + "_airbyte_additional_properties": null } }, { @@ -388,23 +572,177 @@ "jsonSchema": { "type": "object", "properties": { - "5filed_name": { + "5field_name": { "type": ["null", "string"] } } }, + "jsonObject": { + "_5field_name": "theory of relativity" + }, "avroSchema": { "type": "record", "name": "field_with_bad_first_char", "namespace": "namespace11", "fields": [ { - "name": "_5filed_name", + "name": "_5field_name", + "type": ["null", "string"], + "doc": "_airbyte_original_name:5field_name", + "default": null + }, + { + "name":"_airbyte_additional_properties", + "type":[ + "null", + { "type":"map", "values":"string" } + ], + "default":null + } + ] + }, + "avroObject": { + "_5field_name": "theory of relativity", + "_airbyte_additional_properties": null + } + }, + { + "schemaName": "record_with_airbyte_additional_properties", + "namespace": "namespace12", + "appendAirbyteFields": false, + "jsonSchema": { + "type": "object", + "properties": { + "node_id": { + "type": ["null", "string"] + }, + "_airbyte_additional_properties": { + "type": "object" + } + } + }, + "jsonObject": { + "node_id": "abc123", + "_airbyte_additional_properties": { + "username": "343-guilty-spark", + "password": 1439, + "active": true + } + }, + "avroSchema": { + "type": "record", + "name": "record_with_airbyte_additional_properties", + "namespace": "namespace12", + "fields": [ + { + "name": "node_id", "type": ["null", "string"], - "doc": "_airbyte_original_name:5filed_name", "default": null + }, + { + "name":"_airbyte_additional_properties", + "type":[ + "null", + { "type":"map", "values":"string" } + ], + "default":null } ] + }, + "avroObject": { + "node_id": "abc123", + "_airbyte_additional_properties": { + "username": "343-guilty-spark", + "password": "1439", + "active": "true" + } + } + }, + { + "schemaName": "record_with_ab_additional_properties", + "namespace": "namespace13", + "appendAirbyteFields": false, + "jsonSchema": { + "type": "object", + "properties": { + "node_id": { + "type": ["null", "string"] + }, + "_ab_additional_properties": { + "type": "object" + } + } + }, + "jsonObject": { + "node_id": "abc123", + "_ab_additional_properties": { + "username": "343-guilty-spark", + "password": 1439, + "active": true + } + }, + "avroSchema": { + "type": "record", + "name": "record_with_ab_additional_properties", + "namespace": "namespace13", + "fields": [ + { + "name": "node_id", + "type": ["null", "string"], + "default": null + }, + { + "name":"_airbyte_additional_properties", + "type":[ + "null", + { "type":"map", "values":"string" } + ], + "default":null + } + ] + }, + "avroObject": { + "node_id": "abc123", + "_airbyte_additional_properties": { + "username": "343-guilty-spark", + "password": "1439", + "active": "true" + } + } + }, + { + "schemaName": "record_without_properties", + "namespace": "namespace14", + "appendAirbyteFields": false, + "jsonSchema": { + "type": "object" + }, + "jsonObject": { + "username": "343-guilty-spark", + "password": 1439, + "active": true + }, + "avroSchema": { + "type": "record", + "name": "record_without_properties", + "namespace": "namespace14", + "fields": [ + { + "name":"_airbyte_additional_properties", + "type":[ + "null", + { "type":"map", "values":"string" } + ], + "default":null + } + ] + }, + "avroObject": { + "_airbyte_additional_properties": { + "username": "343-guilty-spark", + "password": "1439", + "active": "true" + } } } ] diff --git a/airbyte-integrations/connectors/destination-s3/src/test/resources/parquet/json_schema_converter/get_field_type.json b/airbyte-integrations/connectors/destination-s3/src/test/resources/parquet/json_schema_converter/type_conversion_test_cases.json similarity index 66% rename from airbyte-integrations/connectors/destination-s3/src/test/resources/parquet/json_schema_converter/get_field_type.json rename to airbyte-integrations/connectors/destination-s3/src/test/resources/parquet/json_schema_converter/type_conversion_test_cases.json index 6dd9a503e984b..306d1213ab296 100644 --- a/airbyte-integrations/connectors/destination-s3/src/test/resources/parquet/json_schema_converter/get_field_type.json +++ b/airbyte-integrations/connectors/destination-s3/src/test/resources/parquet/json_schema_converter/type_conversion_test_cases.json @@ -99,29 +99,71 @@ "name": "node_id", "type": ["null", "string"], "default": null + }, + { + "name": "_airbyte_additional_properties", + "type": [ + "null", + {"type": "map", "values": "string"} + ], + "default": null + } + ] + } + ] + }, + { + "fieldName": "object_field_without_properties", + "jsonFieldSchema": { + "type": "object" + }, + "avroFieldType": [ + "null", + { + "type": "record", + "name": "object_field_without_properties", + "fields": [ + { + "name": "_airbyte_additional_properties", + "type": ["null", {"type": "map", "values": "string"}], + "default": null } ] } ] }, + { + "fieldName": "_airbyte_additional_properties", + "jsonFieldSchema": { + "type": "object" + }, + "avroFieldType": ["null", {"type":"map","values":"string"}] + }, + { + "fieldName": "_ab_additional_properties", + "jsonFieldSchema": { + "type": "object" + }, + "avroFieldType": ["null", {"type":"map","values":"string"}] + }, { "fieldName": "any_of_field", "jsonFieldSchema": { - "anyOf": [{ "type": "string" }, { "type": "integer" }] + "anyOf": [{"type": "string"}, {"type": "integer"}] }, "avroFieldType": ["null", "string", "int"] }, { "fieldName": "all_of_field", "jsonFieldSchema": { - "allOf": [{ "type": "string" }, { "type": "integer" }] + "allOf": [{"type": "string"}, {"type": "integer"}] }, "avroFieldType": ["null", "string", "int"] }, { "fieldName": "one_of_field", "jsonFieldSchema": { - "oneOf": [{ "type": "string" }, { "type": "integer" }] + "oneOf": [{"type": "string"}, {"type": "integer"}] }, "avroFieldType": ["null", "string", "int"] } diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index a4da56377cf34..44cfa4f835ba0 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -220,6 +220,7 @@ * [Technical Stack](understanding-airbyte/tech-stack.md) * [Change Data Capture \(CDC\)](understanding-airbyte/cdc.md) * [Namespaces](understanding-airbyte/namespaces.md) + * [Json to Avro Conversion](understanding-airbyte/json-avro-conversion.md) * [Glossary of Terms](understanding-airbyte/glossary.md) * [API documentation](api-documentation.md) * [Project Overview](project-overview/README.md) diff --git a/docs/integrations/destinations/databricks.md b/docs/integrations/destinations/databricks.md index b10eb2254db42..761a6eb4c9719 100644 --- a/docs/integrations/destinations/databricks.md +++ b/docs/integrations/destinations/databricks.md @@ -89,7 +89,7 @@ Each table will have the following columns: | `_airbyte_emitted_at` | timestamp | Data emission timestamp. | | Data fields from the source stream | various | All fields in the staging Parquet files will be expanded in the table. | -Learn how source data is converted to Parquet and the current limitations [here](https://docs.airbyte.io/integrations/destinations/s3#data-schema). +Under the hood, an Airbyte data stream in Json schema is first converted to an Avro schema, then the Json object is converted to an Avro record, and finally the Avro record is outputted to the Parquet format. Because the data stream can come from any data source, the Json to Avro conversion process has arbitrary rules and limitations. Learn more about how source data is converted to Avro and the current limitations [here](https://docs.airbyte.io/understanding-airbyte/json-avro-conversion). ## Getting started @@ -103,6 +103,7 @@ Learn how source data is converted to Parquet and the current limitations [here] | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.1.2 | 2021-10-25 | [\#7288](https://github.com/airbytehq/airbyte/issues/7288) | Support Json `additionalProperties`. | | 0.1.1 | 2021-10-05 | [\#6792](https://github.com/airbytehq/airbyte/pull/6792) | Require users to accept Databricks JDBC Driver [Terms & Conditions](https://databricks.com/jdbc-odbc-driver-license). | | 0.1.0 | 2021-09-14 | [\#5998](https://github.com/airbytehq/airbyte/pull/5998) | Initial private release. | diff --git a/docs/integrations/destinations/gcs.md b/docs/integrations/destinations/gcs.md index d0f4806f40c24..8feba67751575 100644 --- a/docs/integrations/destinations/gcs.md +++ b/docs/integrations/destinations/gcs.md @@ -98,157 +98,7 @@ Here is the available compression codecs: #### Data schema -Under the hood, an Airbyte data stream in Json schema is converted to an Avro schema, and then the Json object is converted to an Avro record based on the Avro schema. Because the data stream can come from any data source, the Avro S3 destination connector has the following arbitrary rules. - -1. Json schema types are mapped to Avro typea as follows: - -| Json Data Type | Avro Data Type | -| :---: | :---: | -| string | string | -| number | double | -| integer | int | -| boolean | boolean | -| null | null | -| object | record | -| array | array | - -1. Built-in Json schema formats are not mapped to Avro logical types at this moment. -2. Combined restrictions \("allOf", "anyOf", and "oneOf"\) will be converted to type unions. The corresponding Avro schema can be less stringent. For example, the following Json schema - - ```javascript - { - "oneOf": [ - { "type": "string" }, - { "type": "integer" } - ] - } - ``` - - will become this in Avro schema: - - ```javascript - { - "type": ["null", "string", "int"] - } - ``` - -3. Keyword `not` is not supported, as there is no equivalent validation mechanism in Avro schema. -4. Only alphanumeric characters and underscores \(`/a-zA-Z0-9_/`\) are allowed in a stream or field name. Any special character will be converted to an alphabet or underscore. For example, `spécial:character_names` will become `special_character_names`. The original names will be stored in the `doc` property in this format: `_airbyte_original_name:`. -5. All field will be nullable. For example, a `string` Json field will be typed as `["null", "string"]` in Avro. This is necessary because the incoming data stream may have optional fields. -6. For array fields in Json schema, when the `items` property is an array, it means that each element in the array should follow its own schema sequentially. For example, the following specification means the first item in the array should be a string, and the second a number. - - ```javascript - { - "array_field": { - "type": "array", - "items": [ - { "type": "string" }, - { "type": "number" } - ] - } - } - ``` - -This is not supported in Avro schema. As a compromise, the converter creates a union, \["string", "number"\], which is less stringent: - -```javascript - { - "name": "array_field", - "type": [ - "null", - { - "type": "array", - "items": ["null", "string"] - } - ], - "default": null - } -``` - -1. Two Airbyte specific fields will be added to each Avro record: - -| Field | Schema | Document | -| :--- | :--- | :---: | -| `_airbyte_ab_id` | `uuid` | [link](http://avro.apache.org/docs/current/spec.html#UUID) | -| `_airbyte_emitted_at` | `timestamp-millis` | [link](http://avro.apache.org/docs/current/spec.html#Timestamp+%28millisecond+precision%29) | - -1. Currently `additionalProperties` is not supported. This means if the source is schemaless \(e.g. Mongo\), or has flexible fields, they will be ignored. We will have a solution soon. Feel free to submit a new issue if this is blocking for you. - -For example, given the following Json schema: - -```javascript -{ - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "properties": { - "id": { - "type": "integer" - }, - "user": { - "type": ["null", "object"], - "properties": { - "id": { - "type": "integer" - }, - "field_with_spécial_character": { - "type": "integer" - } - } - }, - "created_at": { - "type": ["null", "string"], - "format": "date-time" - } - } -} -``` - -Its corresponding Avro schema will be: - -```javascript -{ - "name" : "stream_name", - "type" : "record", - "fields" : [ { - "name" : "_airbyte_ab_id", - "type" : { - "type" : "string", - "logicalType" : "uuid" - } - }, { - "name" : "_airbyte_emitted_at", - "type" : { - "type" : "long", - "logicalType" : "timestamp-millis" - } - }, { - "name" : "id", - "type" : [ "null", "int" ], - "default" : null - }, { - "name" : "user", - "type" : [ "null", { - "type" : "record", - "name" : "user", - "fields" : [ { - "name" : "id", - "type" : [ "null", "int" ], - "default" : null - }, { - "name" : "field_with_special_character", - "type" : [ "null", "int" ], - "doc" : "_airbyte_original_name:field_with_spécial_character", - "default" : null - } ] - } ], - "default" : null - }, { - "name" : "created_at", - "type" : [ "null", "string" ], - "default" : null - } ] -} -``` +Under the hood, an Airbyte data stream in Json schema is first converted to an Avro schema, then the Json object is converted to an Avro record. Because the data stream can come from any data source, the Json to Avro conversion process has arbitrary rules and limitations. Learn more about how source data is converted to Avro and the current limitations [here](https://docs.airbyte.io/understanding-airbyte/json-avro-conversion). ### CSV @@ -263,7 +113,7 @@ Like most of the other Airbyte destination connectors, usually the output has th For example, given the following json object from a source: -```javascript +```json { "user_id": 123, "name": { @@ -289,7 +139,7 @@ With root level normalization, the output CSV is: [Json Lines](https://jsonlines.org/) is a text format with one JSON per line. Each line has a structure as follows: -```javascript +```json { "_airbyte_ab_id": "", "_airbyte_emitted_at": "", @@ -299,7 +149,7 @@ With root level normalization, the output CSV is: For example, given the following two json objects from a source: -```javascript +```json [ { "user_id": 123, @@ -344,7 +194,7 @@ These parameters are related to the `ParquetOutputFormat`. See the [Java doc](ht #### Data schema -Under the hood, an Airbyte data stream in Json schema is first converted to an Avro schema, then the Json object is converted to an Avro record, and finally the Avro record is outputted to the Parquet format. See the `Data schema` section from the [Avro output](gcs.md#avro) for rules and limitations. +Under the hood, an Airbyte data stream in Json schema is first converted to an Avro schema, then the Json object is converted to an Avro record, and finally the Avro record is outputted to the Parquet format. Because the data stream can come from any data source, the Json to Avro conversion process has arbitrary rules and limitations. Learn more about how source data is converted to Avro and the current limitations [here](https://docs.airbyte.io/understanding-airbyte/json-avro-conversion). ## Getting started @@ -372,7 +222,7 @@ Under the hood, an Airbyte data stream in Json schema is first converted to an A | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.1.13 | 2021-11-03 | [\#7288](https://github.com/airbytehq/airbyte/issues/7288) | Support Json `additionalProperties`. | | 0.1.2 | 2021-09-12 | [\#5720](https://github.com/airbytehq/airbyte/issues/5720) | Added configurable block size for stream. Each stream is limited to 10,000 by GCS | | 0.1.1 | 2021-08-26 | [\#5296](https://github.com/airbytehq/airbyte/issues/5296) | Added storing gcsCsvFileLocation property for CSV format. This is used by destination-bigquery \(GCS Staging upload type\) | | 0.1.0 | 2021-07-16 | [\#4329](https://github.com/airbytehq/airbyte/pull/4784) | Initial release. | - diff --git a/docs/integrations/destinations/s3.md b/docs/integrations/destinations/s3.md index ab811c48bf0c8..d85654362606c 100644 --- a/docs/integrations/destinations/s3.md +++ b/docs/integrations/destinations/s3.md @@ -97,158 +97,7 @@ Here is the available compression codecs: #### Data schema -Under the hood, an Airbyte data stream in Json schema is converted to an Avro schema, and then the Json object is converted to an Avro record based on the Avro schema. Because the data stream can come from any data source, the Avro S3 destination connector has the following arbitrary rules. - -1. Json schema types are mapped to Avro types as follows: - - | Json Data Type | Avro Data Type | - | :---: | :---: | - | string | string | - | number | double | - | integer | int | - | boolean | boolean | - | null | null | - | object | record | - | array | array | - -2. Built-in Json schema formats are not mapped to Avro logical types at this moment. -3. Combined restrictions \("allOf", "anyOf", and "oneOf"\) will be converted to type unions. The corresponding Avro schema can be less stringent. For example, the following Json schema - - ```javascript - { - "oneOf": [ - { "type": "string" }, - { "type": "integer" } - ] - } - ``` - - will become this in Avro schema: - - ```javascript - { - "type": ["null", "string", "int"] - } - ``` - -4. Keyword `not` is not supported, as there is no equivalent validation mechanism in Avro schema. -5. Only alphanumeric characters and underscores \(`/a-zA-Z0-9_/`\) are allowed in a stream or field name. Any special character will be converted to an alphabet or underscore. For example, `spécial:character_names` will become `special_character_names`. The original names will be stored in the `doc` property in this format: `_airbyte_original_name:`. -6. The field name cannot start with a number, so an underscore will be added to the field name at the beginning. -7. All field will be nullable. For example, a `string` Json field will be typed as `["null", "string"]` in Avro. This is necessary because the incoming data stream may have optional fields. -8. For array fields in Json schema, when the `items` property is an array, it means that each element in the array should follow its own schema sequentially. For example, the following specification means the first item in the array should be a string, and the second a number. - - ```javascript - { - "array_field": { - "type": "array", - "items": [ - { "type": "string" }, - { "type": "number" } - ] - } - } - ``` - - This is not supported in Avro schema. As a compromise, the converter creates a union, \["string", "number"\], which is less stringent: - - ```javascript - { - "name": "array_field", - "type": [ - "null", - { - "type": "array", - "items": ["null", "string"] - } - ], - "default": null - } - ``` - -9. Two Airbyte specific fields will be added to each Avro record: - - | Field | Schema | Document | - | :--- | :--- | :---: | - | `_airbyte_ab_id` | `uuid` | [link](http://avro.apache.org/docs/current/spec.html#UUID) | - | `_airbyte_emitted_at` | `timestamp-millis` | [link](http://avro.apache.org/docs/current/spec.html#Timestamp+%28millisecond+precision%29) | - -10. Currently `additionalProperties` is not supported. This means if the source is schemaless \(e.g. Mongo\), or has flexible fields, they will be ignored. We will have a solution soon. Feel free to submit a new issue if this is blocking for you. - -For example, given the following Json schema: - -```javascript -{ - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "properties": { - "id": { - "type": "integer" - }, - "user": { - "type": ["null", "object"], - "properties": { - "id": { - "type": "integer" - }, - "field_with_spécial_character": { - "type": "integer" - } - } - }, - "created_at": { - "type": ["null", "string"], - "format": "date-time" - } - } -} -``` - -Its corresponding Avro schema will be: - -```javascript -{ - "name" : "stream_name", - "type" : "record", - "fields" : [ { - "name" : "_airbyte_ab_id", - "type" : { - "type" : "string", - "logicalType" : "uuid" - } - }, { - "name" : "_airbyte_emitted_at", - "type" : { - "type" : "long", - "logicalType" : "timestamp-millis" - } - }, { - "name" : "id", - "type" : [ "null", "int" ], - "default" : null - }, { - "name" : "user", - "type" : [ "null", { - "type" : "record", - "name" : "user", - "fields" : [ { - "name" : "id", - "type" : [ "null", "int" ], - "default" : null - }, { - "name" : "field_with_special_character", - "type" : [ "null", "int" ], - "doc" : "_airbyte_original_name:field_with_spécial_character", - "default" : null - } ] - } ], - "default" : null - }, { - "name" : "created_at", - "type" : [ "null", "string" ], - "default" : null - } ] -} -``` +Under the hood, an Airbyte data stream in Json schema is first converted to an Avro schema, then the Json object is converted to an Avro record. Because the data stream can come from any data source, the Json to Avro conversion process has arbitrary rules and limitations. Learn more about how source data is converted to Avro and the current limitations [here](https://docs.airbyte.io/understanding-airbyte/json-avro-conversion). ### CSV @@ -263,7 +112,7 @@ Like most of the other Airbyte destination connectors, usually the output has th For example, given the following json object from a source: -```javascript +```json { "user_id": 123, "name": { @@ -289,7 +138,7 @@ With root level normalization, the output CSV is: [Json Lines](https://jsonlines.org/) is a text format with one JSON per line. Each line has a structure as follows: -```javascript +```json { "_airbyte_ab_id": "", "_airbyte_emitted_at": "", @@ -299,7 +148,7 @@ With root level normalization, the output CSV is: For example, given the following two json objects from a source: -```javascript +```json [ { "user_id": 123, @@ -344,7 +193,7 @@ These parameters are related to the `ParquetOutputFormat`. See the [Java doc](ht #### Data schema -Under the hood, an Airbyte data stream in Json schema is first converted to an Avro schema, then the Json object is converted to an Avro record, and finally the Avro record is outputted to the Parquet format. See the `Data schema` section from the [Avro output](s3.md#avro) for rules and limitations. +Under the hood, an Airbyte data stream in Json schema is first converted to an Avro schema, then the Json object is converted to an Avro record, and finally the Avro record is outputted to the Parquet format. Because the data stream can come from any data source, the Json to Avro conversion process has arbitrary rules and limitations. Learn more about how source data is converted to Avro and the current limitations [here](https://docs.airbyte.io/understanding-airbyte/json-avro-conversion). ## Getting Started \(Airbyte Open-Source / Airbyte Cloud\) @@ -375,6 +224,7 @@ Under the hood, an Airbyte data stream in Json schema is first converted to an A | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.1.13 | 2021-11-03 | [\#7288](https://github.com/airbytehq/airbyte/issues/7288) | Support Json `additionalProperties`. | | 0.1.12 | 2021-09-13 | [\#5720](https://github.com/airbytehq/airbyte/issues/5720) | Added configurable block size for stream. Each stream is limited to 10,000 by S3 | | 0.1.11 | 2021-09-10 | [\#5729](https://github.com/airbytehq/airbyte/pull/5729) | For field names that start with a digit, a `_` will be appended at the beginning for the`Parquet` and `Avro` formats. | | 0.1.10 | 2021-08-17 | [\#4699](https://github.com/airbytehq/airbyte/pull/4699) | Added json config validator | diff --git a/docs/understanding-airbyte/json-avro-conversion.md b/docs/understanding-airbyte/json-avro-conversion.md new file mode 100644 index 0000000000000..7b8887d7cfac5 --- /dev/null +++ b/docs/understanding-airbyte/json-avro-conversion.md @@ -0,0 +1,231 @@ +# Json to Avro Conversion for Blob Storage Destinations + +When an Airbyte data stream is synced to the Avro or Parquet format (e.g. Parquet on S3), the source Json schema is converted to an Avro schema, then the Json object is converted to an Avro record based on the Avro schema (and further to Parquet if necessary). Because the data stream can come from any data source, the Json to Avro conversion process has the following rules and limitations. + +1. Json schema types are mapped to Avro types as follows: + + | Json Data Type | Avro Data Type | + | :---: | :---: | + | string | string | + | number | double | + | integer | int | + | boolean | boolean | + | null | null | + | object | record | + | array | array | + +2. Built-in Json schema formats are not mapped to Avro logical types at this moment. +3. Combined restrictions \("allOf", "anyOf", and "oneOf"\) will be converted to type unions. The corresponding Avro schema can be less stringent. For example, the following Json schema + + ```json + { + "oneOf": [ + { "type": "string" }, + { "type": "integer" } + ] + } + ``` + + will become this in Avro schema: + + ```json + { + "type": ["null", "string", "int"] + } + ``` + +4. Keyword `not` is not supported, as there is no equivalent validation mechanism in Avro schema. +5. Only alphanumeric characters and underscores \(`/a-zA-Z0-9_/`\) are allowed in a stream or field name. Any special character will be converted to an alphabet or underscore. For example, `spécial:character_names` will become `special_character_names`. The original names will be stored in the `doc` property in this format: `_airbyte_original_name:`. +6. The field name cannot start with a number, so an underscore will be added to the field name at the beginning. +7. All field will be nullable. For example, a `string` Json field will be typed as `["null", "string"]` in Avro. This is necessary because the incoming data stream may have optional fields. +8. For array fields in Json schema, when the `items` property is an array, it means that each element in the array should follow its own schema sequentially. For example, the following specification means the first item in the array should be a string, and the second a number. + + ```json + { + "array_field": { + "type": "array", + "items": [ + { "type": "string" }, + { "type": "number" } + ] + } + } + ``` + + This is not supported in Avro schema. As a compromise, the converter creates a union, \["string", "number"\], which is less stringent: + + ```json + { + "name": "array_field", + "type": [ + "null", + { + "type": "array", + "items": ["null", "string"] + } + ], + "default": null + } + ``` + +9. Three Airbyte specific fields will be added to each Avro record: + + | Field | Schema | Document | + | :--- | :--- | :---: | + | `_airbyte_ab_id` | `uuid` | [link](http://avro.apache.org/docs/current/spec.html#UUID) | + | `_airbyte_emitted_at` | `timestamp-millis` | [link](http://avro.apache.org/docs/current/spec.html#Timestamp+%28millisecond+precision%29) | + | `_airbyte_additional_properties` | `map` of `string` | See explanation below. | + +10. A Json object can have additional properties of unknown types, which is not compatible with the Avro schema. To solve this problem during Json to Avro object conversion, we introduce a special field: `_airbyte_additional_properties` typed as a nullable `map` from `string` to `string`: + +```json +{ + "name": "_airbyte_additional_properties", + "type": ["null", { "type": "map", "values": "string" }], + "default": null +} +``` + +For example, given the following Json schema: + +```json +{ + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "properties": { + "username": { + "type": ["null", "string"] + } + } +} +``` + +this Json object + +```json +{ + "username": "admin", + "active": true, + "age": 21, + "auth": { + "auth_type": "ssl", + "api_key": "abcdefg/012345", + "admin": false, + "id": 1000 + } +} +``` + +will be converted to the following Avro object: + +```json +{ + "username": "admin", + "_airbyte_additional_properties": { + "active": "true", + "age": "21", + "auth": "{\"auth_type\":\"ssl\",\"api_key\":\"abcdefg/012345\",\"admin\":false,\"id\":1000}" + } +} +``` + +Note that all fields other than the `username` is moved under `_ab_additional_properties` as serialized strings, including the original object `auth`. + +11. Based on the above rules, here is an overall example. Given the following Json schema: + +```json +{ + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "properties": { + "id": { + "type": "integer" + }, + "user": { + "type": ["null", "object"], + "properties": { + "id": { + "type": "integer" + }, + "field_with_spécial_character": { + "type": "integer" + } + } + }, + "created_at": { + "type": ["null", "string"], + "format": "date-time" + } + } +} +``` + +Its corresponding Avro schema will be: + +```json +{ + "name": "stream_name", + "type": "record", + "fields": [ + { + "name": "_airbyte_ab_id", + "type": { + "type": "string", + "logicalType": "uuid" + } + }, + { + "name": "_airbyte_emitted_at", + "type": { + "type": "long", + "logicalType": "timestamp-millis" + } + }, + { + "name": "id", + "type": ["null", "int"], + "default": null + }, + { + "name": "user", + "type": [ + "null", + { + "type": "record", + "name": "user", + "fields": [ + { + "name": "id", + "type": ["null", "int"], + "default": null + }, + { + "name": "field_with_special_character", + "type": ["null", "int"], + "doc": "_airbyte_original_name:field_with_spécial_character", + "default": null + }, + { + "name": "_airbyte_additional_properties", + "type": ["null", { "type": "map", "values": "string" }], + "default": null + } + ] + } + ], + "default": null + }, + { + "name": "created_at", + "type": ["null", "string"], + "default": null + }, + { + "name": "_airbyte_additional_properties", + "type": ["null", { "type": "map", "values": "string" }], + "default": null + } + ] +} + +``` diff --git a/settings.gradle b/settings.gradle index 4d5b7d79afa0a..207c947b3895a 100644 --- a/settings.gradle +++ b/settings.gradle @@ -11,6 +11,12 @@ gradleEnterprise { } } +sourceControl { + gitRepository("https://github.com/airbytehq/json-avro-converter.git") { + producesModule("tech.allegro.schema.json2avro:converter") + } +} + rootProject.name = 'airbyte' // SUB_BUILD is an enum of , PLATFORM, CONNECTORS_BASE. Blank is equivalent to all. From 670690e852d35705c501d05e6ef2f7b1a57f6944 Mon Sep 17 00:00:00 2001 From: Jared Rhizor Date: Wed, 3 Nov 2021 12:28:02 -0700 Subject: [PATCH 33/83] Bump Airbyte version from 0.30.25-alpha to 0.30.26-alpha (#7599) Co-authored-by: sherifnada Co-authored-by: Sherif Nada --- .bumpversion.cfg | 2 +- .env | 2 +- airbyte-scheduler/app/Dockerfile | 4 ++-- airbyte-server/Dockerfile | 4 ++-- airbyte-webapp/package-lock.json | 2 +- airbyte-webapp/package.json | 2 +- airbyte-workers/Dockerfile | 4 ++-- charts/airbyte/Chart.yaml | 2 +- charts/airbyte/README.md | 8 ++++---- charts/airbyte/values.yaml | 8 ++++---- docs/operator-guides/upgrading-airbyte.md | 2 +- kube/overlays/stable-with-resource-limits/.env | 2 +- .../stable-with-resource-limits/kustomization.yaml | 10 +++++----- kube/overlays/stable/.env | 2 +- kube/overlays/stable/kustomization.yaml | 10 +++++----- 15 files changed, 32 insertions(+), 32 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index ebf0c22c63ae2..96a09cc68d035 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.30.25-alpha +current_version = 0.30.26-alpha commit = False tag = False parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-[a-z]+)? diff --git a/.env b/.env index afa76ab975b76..9240486d7fb78 100644 --- a/.env +++ b/.env @@ -1,4 +1,4 @@ -VERSION=0.30.25-alpha +VERSION=0.30.26-alpha # Airbyte Internal Job Database, see https://docs.airbyte.io/operator-guides/configuring-airbyte-db DATABASE_USER=docker diff --git a/airbyte-scheduler/app/Dockerfile b/airbyte-scheduler/app/Dockerfile index 5144d2136f9ca..10297a8b2b233 100644 --- a/airbyte-scheduler/app/Dockerfile +++ b/airbyte-scheduler/app/Dockerfile @@ -5,7 +5,7 @@ ENV APPLICATION airbyte-scheduler WORKDIR /app -ADD build/distributions/${APPLICATION}-0.30.25-alpha.tar /app +ADD build/distributions/${APPLICATION}-0.30.26-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.25-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.26-alpha/bin/${APPLICATION}"] diff --git a/airbyte-server/Dockerfile b/airbyte-server/Dockerfile index 3ea4279ef3efb..cad45e1cb133d 100644 --- a/airbyte-server/Dockerfile +++ b/airbyte-server/Dockerfile @@ -7,7 +7,7 @@ ENV APPLICATION airbyte-server WORKDIR /app -ADD build/distributions/${APPLICATION}-0.30.25-alpha.tar /app +ADD build/distributions/${APPLICATION}-0.30.26-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.25-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.26-alpha/bin/${APPLICATION}"] diff --git a/airbyte-webapp/package-lock.json b/airbyte-webapp/package-lock.json index 4ec7dbde8ff68..69473256d506e 100644 --- a/airbyte-webapp/package-lock.json +++ b/airbyte-webapp/package-lock.json @@ -1,6 +1,6 @@ { "name": "airbyte-webapp", - "version": "0.30.25-alpha", + "version": "0.30.26-alpha", "lockfileVersion": 1, "requires": true, "dependencies": { diff --git a/airbyte-webapp/package.json b/airbyte-webapp/package.json index 56d5644fdbfda..5f56911784bd7 100644 --- a/airbyte-webapp/package.json +++ b/airbyte-webapp/package.json @@ -1,6 +1,6 @@ { "name": "airbyte-webapp", - "version": "0.30.25-alpha", + "version": "0.30.26-alpha", "private": true, "scripts": { "start": "react-scripts start", diff --git a/airbyte-workers/Dockerfile b/airbyte-workers/Dockerfile index 77828fb4fc94d..5a7fde8cc9d9e 100644 --- a/airbyte-workers/Dockerfile +++ b/airbyte-workers/Dockerfile @@ -23,7 +23,7 @@ ENV APPLICATION airbyte-workers WORKDIR /app # Move worker app -ADD build/distributions/${APPLICATION}-0.30.25-alpha.tar /app +ADD build/distributions/${APPLICATION}-0.30.26-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.25-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.26-alpha/bin/${APPLICATION}"] diff --git a/charts/airbyte/Chart.yaml b/charts/airbyte/Chart.yaml index f41f0b93bd520..e0a8869097fa4 100644 --- a/charts/airbyte/Chart.yaml +++ b/charts/airbyte/Chart.yaml @@ -21,7 +21,7 @@ version: 0.3.0 # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "0.30.25-alpha" +appVersion: "0.30.26-alpha" dependencies: - name: common diff --git a/charts/airbyte/README.md b/charts/airbyte/README.md index 7e94fbf812a48..102e167d29020 100644 --- a/charts/airbyte/README.md +++ b/charts/airbyte/README.md @@ -29,7 +29,7 @@ | `webapp.replicaCount` | Number of webapp replicas | `1` | | `webapp.image.repository` | The repository to use for the airbyte webapp image. | `airbyte/webapp` | | `webapp.image.pullPolicy` | the pull policy to use for the airbyte webapp image | `IfNotPresent` | -| `webapp.image.tag` | The airbyte webapp image tag. Defaults to the chart's AppVersion | `0.30.25-alpha` | +| `webapp.image.tag` | The airbyte webapp image tag. Defaults to the chart's AppVersion | `0.30.26-alpha` | | `webapp.podAnnotations` | Add extra annotations to the webapp pod(s) | `{}` | | `webapp.service.type` | The service type to use for the webapp service | `ClusterIP` | | `webapp.service.port` | The service port to expose the webapp on | `80` | @@ -56,7 +56,7 @@ | `scheduler.replicaCount` | Number of scheduler replicas | `1` | | `scheduler.image.repository` | The repository to use for the airbyte scheduler image. | `airbyte/scheduler` | | `scheduler.image.pullPolicy` | the pull policy to use for the airbyte scheduler image | `IfNotPresent` | -| `scheduler.image.tag` | The airbyte scheduler image tag. Defaults to the chart's AppVersion | `0.30.25-alpha` | +| `scheduler.image.tag` | The airbyte scheduler image tag. Defaults to the chart's AppVersion | `0.30.26-alpha` | | `scheduler.podAnnotations` | Add extra annotations to the scheduler pod | `{}` | | `scheduler.resources.limits` | The resources limits for the scheduler container | `{}` | | `scheduler.resources.requests` | The requested resources for the scheduler container | `{}` | @@ -87,7 +87,7 @@ | `server.replicaCount` | Number of server replicas | `1` | | `server.image.repository` | The repository to use for the airbyte server image. | `airbyte/server` | | `server.image.pullPolicy` | the pull policy to use for the airbyte server image | `IfNotPresent` | -| `server.image.tag` | The airbyte server image tag. Defaults to the chart's AppVersion | `0.30.25-alpha` | +| `server.image.tag` | The airbyte server image tag. Defaults to the chart's AppVersion | `0.30.26-alpha` | | `server.podAnnotations` | Add extra annotations to the server pod | `{}` | | `server.livenessProbe.enabled` | Enable livenessProbe on the server | `true` | | `server.livenessProbe.initialDelaySeconds` | Initial delay seconds for livenessProbe | `30` | @@ -121,7 +121,7 @@ | `worker.replicaCount` | Number of worker replicas | `1` | | `worker.image.repository` | The repository to use for the airbyte worker image. | `airbyte/worker` | | `worker.image.pullPolicy` | the pull policy to use for the airbyte worker image | `IfNotPresent` | -| `worker.image.tag` | The airbyte worker image tag. Defaults to the chart's AppVersion | `0.30.25-alpha` | +| `worker.image.tag` | The airbyte worker image tag. Defaults to the chart's AppVersion | `0.30.26-alpha` | | `worker.podAnnotations` | Add extra annotations to the worker pod(s) | `{}` | | `worker.livenessProbe.enabled` | Enable livenessProbe on the worker | `true` | | `worker.livenessProbe.initialDelaySeconds` | Initial delay seconds for livenessProbe | `30` | diff --git a/charts/airbyte/values.yaml b/charts/airbyte/values.yaml index fe01794f85b92..dd4c76f51509d 100644 --- a/charts/airbyte/values.yaml +++ b/charts/airbyte/values.yaml @@ -44,7 +44,7 @@ webapp: image: repository: airbyte/webapp pullPolicy: IfNotPresent - tag: 0.30.25-alpha + tag: 0.30.26-alpha ## @param webapp.podAnnotations [object] Add extra annotations to the webapp pod(s) ## @@ -141,7 +141,7 @@ scheduler: image: repository: airbyte/scheduler pullPolicy: IfNotPresent - tag: 0.30.25-alpha + tag: 0.30.26-alpha ## @param scheduler.podAnnotations [object] Add extra annotations to the scheduler pod ## @@ -248,7 +248,7 @@ server: image: repository: airbyte/server pullPolicy: IfNotPresent - tag: 0.30.25-alpha + tag: 0.30.26-alpha ## @param server.podAnnotations [object] Add extra annotations to the server pod ## @@ -360,7 +360,7 @@ worker: image: repository: airbyte/worker pullPolicy: IfNotPresent - tag: 0.30.25-alpha + tag: 0.30.26-alpha ## @param worker.podAnnotations [object] Add extra annotations to the worker pod(s) ## diff --git a/docs/operator-guides/upgrading-airbyte.md b/docs/operator-guides/upgrading-airbyte.md index 1891484d5b426..d1757c00d326d 100644 --- a/docs/operator-guides/upgrading-airbyte.md +++ b/docs/operator-guides/upgrading-airbyte.md @@ -82,7 +82,7 @@ If you are upgrading from \(i.e. your current version of Airbyte is\) Airbyte ve Here's an example of what it might look like with the values filled in. It assumes that the downloaded `airbyte_archive.tar.gz` is in `/tmp`. ```bash - docker run --rm -v /tmp:/config airbyte/migration:0.30.25-alpha --\ + docker run --rm -v /tmp:/config airbyte/migration:0.30.26-alpha --\ --input /config/airbyte_archive.tar.gz\ --output /config/airbyte_archive_migrated.tar.gz ``` diff --git a/kube/overlays/stable-with-resource-limits/.env b/kube/overlays/stable-with-resource-limits/.env index e404a0ed13369..1a4e3ffea60e9 100644 --- a/kube/overlays/stable-with-resource-limits/.env +++ b/kube/overlays/stable-with-resource-limits/.env @@ -1,4 +1,4 @@ -AIRBYTE_VERSION=0.30.25-alpha +AIRBYTE_VERSION=0.30.26-alpha # Airbyte Internal Database, see https://docs.airbyte.io/operator-guides/configuring-airbyte-db DATABASE_HOST=airbyte-db-svc diff --git a/kube/overlays/stable-with-resource-limits/kustomization.yaml b/kube/overlays/stable-with-resource-limits/kustomization.yaml index 103773a260009..0f6e9c1684cfa 100644 --- a/kube/overlays/stable-with-resource-limits/kustomization.yaml +++ b/kube/overlays/stable-with-resource-limits/kustomization.yaml @@ -8,15 +8,15 @@ bases: images: - name: airbyte/db - newTag: 0.30.25-alpha + newTag: 0.30.26-alpha - name: airbyte/scheduler - newTag: 0.30.25-alpha + newTag: 0.30.26-alpha - name: airbyte/server - newTag: 0.30.25-alpha + newTag: 0.30.26-alpha - name: airbyte/webapp - newTag: 0.30.25-alpha + newTag: 0.30.26-alpha - name: airbyte/worker - newTag: 0.30.25-alpha + newTag: 0.30.26-alpha - name: temporalio/auto-setup newTag: 1.7.0 diff --git a/kube/overlays/stable/.env b/kube/overlays/stable/.env index e404a0ed13369..1a4e3ffea60e9 100644 --- a/kube/overlays/stable/.env +++ b/kube/overlays/stable/.env @@ -1,4 +1,4 @@ -AIRBYTE_VERSION=0.30.25-alpha +AIRBYTE_VERSION=0.30.26-alpha # Airbyte Internal Database, see https://docs.airbyte.io/operator-guides/configuring-airbyte-db DATABASE_HOST=airbyte-db-svc diff --git a/kube/overlays/stable/kustomization.yaml b/kube/overlays/stable/kustomization.yaml index 56bcecd4507d0..9ef8fb2f7d624 100644 --- a/kube/overlays/stable/kustomization.yaml +++ b/kube/overlays/stable/kustomization.yaml @@ -8,15 +8,15 @@ bases: images: - name: airbyte/db - newTag: 0.30.25-alpha + newTag: 0.30.26-alpha - name: airbyte/scheduler - newTag: 0.30.25-alpha + newTag: 0.30.26-alpha - name: airbyte/server - newTag: 0.30.25-alpha + newTag: 0.30.26-alpha - name: airbyte/webapp - newTag: 0.30.25-alpha + newTag: 0.30.26-alpha - name: airbyte/worker - newTag: 0.30.25-alpha + newTag: 0.30.26-alpha - name: temporalio/auto-setup newTag: 1.7.0 From b9e9e589dfefc4d2ce6096876dece7e39bc9623d Mon Sep 17 00:00:00 2001 From: Jared Rhizor Date: Wed, 3 Nov 2021 13:04:19 -0700 Subject: [PATCH 34/83] Bump Airbyte version from 0.30.26-alpha to 0.30.27-alpha (#7606) Co-authored-by: sherifnada --- .bumpversion.cfg | 2 +- .env | 2 +- airbyte-scheduler/app/Dockerfile | 4 ++-- airbyte-server/Dockerfile | 4 ++-- airbyte-webapp/package-lock.json | 2 +- airbyte-webapp/package.json | 2 +- airbyte-workers/Dockerfile | 4 ++-- charts/airbyte/Chart.yaml | 2 +- charts/airbyte/README.md | 8 ++++---- charts/airbyte/values.yaml | 8 ++++---- docs/operator-guides/upgrading-airbyte.md | 2 +- kube/overlays/stable-with-resource-limits/.env | 2 +- .../stable-with-resource-limits/kustomization.yaml | 10 +++++----- kube/overlays/stable/.env | 2 +- kube/overlays/stable/kustomization.yaml | 10 +++++----- 15 files changed, 32 insertions(+), 32 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 96a09cc68d035..0ddee4521505c 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.30.26-alpha +current_version = 0.30.27-alpha commit = False tag = False parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-[a-z]+)? diff --git a/.env b/.env index 9240486d7fb78..37865db7292d5 100644 --- a/.env +++ b/.env @@ -1,4 +1,4 @@ -VERSION=0.30.26-alpha +VERSION=0.30.27-alpha # Airbyte Internal Job Database, see https://docs.airbyte.io/operator-guides/configuring-airbyte-db DATABASE_USER=docker diff --git a/airbyte-scheduler/app/Dockerfile b/airbyte-scheduler/app/Dockerfile index 10297a8b2b233..3e6c01c25599a 100644 --- a/airbyte-scheduler/app/Dockerfile +++ b/airbyte-scheduler/app/Dockerfile @@ -5,7 +5,7 @@ ENV APPLICATION airbyte-scheduler WORKDIR /app -ADD build/distributions/${APPLICATION}-0.30.26-alpha.tar /app +ADD build/distributions/${APPLICATION}-0.30.27-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.26-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.27-alpha/bin/${APPLICATION}"] diff --git a/airbyte-server/Dockerfile b/airbyte-server/Dockerfile index cad45e1cb133d..cd8ce2180c2a6 100644 --- a/airbyte-server/Dockerfile +++ b/airbyte-server/Dockerfile @@ -7,7 +7,7 @@ ENV APPLICATION airbyte-server WORKDIR /app -ADD build/distributions/${APPLICATION}-0.30.26-alpha.tar /app +ADD build/distributions/${APPLICATION}-0.30.27-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.26-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.27-alpha/bin/${APPLICATION}"] diff --git a/airbyte-webapp/package-lock.json b/airbyte-webapp/package-lock.json index 69473256d506e..f46ebd18eb3f6 100644 --- a/airbyte-webapp/package-lock.json +++ b/airbyte-webapp/package-lock.json @@ -1,6 +1,6 @@ { "name": "airbyte-webapp", - "version": "0.30.26-alpha", + "version": "0.30.27-alpha", "lockfileVersion": 1, "requires": true, "dependencies": { diff --git a/airbyte-webapp/package.json b/airbyte-webapp/package.json index 5f56911784bd7..0fe0613ccc0dc 100644 --- a/airbyte-webapp/package.json +++ b/airbyte-webapp/package.json @@ -1,6 +1,6 @@ { "name": "airbyte-webapp", - "version": "0.30.26-alpha", + "version": "0.30.27-alpha", "private": true, "scripts": { "start": "react-scripts start", diff --git a/airbyte-workers/Dockerfile b/airbyte-workers/Dockerfile index 5a7fde8cc9d9e..adbdf161edcb9 100644 --- a/airbyte-workers/Dockerfile +++ b/airbyte-workers/Dockerfile @@ -23,7 +23,7 @@ ENV APPLICATION airbyte-workers WORKDIR /app # Move worker app -ADD build/distributions/${APPLICATION}-0.30.26-alpha.tar /app +ADD build/distributions/${APPLICATION}-0.30.27-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.26-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.27-alpha/bin/${APPLICATION}"] diff --git a/charts/airbyte/Chart.yaml b/charts/airbyte/Chart.yaml index e0a8869097fa4..a0b4c91c94e75 100644 --- a/charts/airbyte/Chart.yaml +++ b/charts/airbyte/Chart.yaml @@ -21,7 +21,7 @@ version: 0.3.0 # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "0.30.26-alpha" +appVersion: "0.30.27-alpha" dependencies: - name: common diff --git a/charts/airbyte/README.md b/charts/airbyte/README.md index 102e167d29020..1f6f355087a8c 100644 --- a/charts/airbyte/README.md +++ b/charts/airbyte/README.md @@ -29,7 +29,7 @@ | `webapp.replicaCount` | Number of webapp replicas | `1` | | `webapp.image.repository` | The repository to use for the airbyte webapp image. | `airbyte/webapp` | | `webapp.image.pullPolicy` | the pull policy to use for the airbyte webapp image | `IfNotPresent` | -| `webapp.image.tag` | The airbyte webapp image tag. Defaults to the chart's AppVersion | `0.30.26-alpha` | +| `webapp.image.tag` | The airbyte webapp image tag. Defaults to the chart's AppVersion | `0.30.27-alpha` | | `webapp.podAnnotations` | Add extra annotations to the webapp pod(s) | `{}` | | `webapp.service.type` | The service type to use for the webapp service | `ClusterIP` | | `webapp.service.port` | The service port to expose the webapp on | `80` | @@ -56,7 +56,7 @@ | `scheduler.replicaCount` | Number of scheduler replicas | `1` | | `scheduler.image.repository` | The repository to use for the airbyte scheduler image. | `airbyte/scheduler` | | `scheduler.image.pullPolicy` | the pull policy to use for the airbyte scheduler image | `IfNotPresent` | -| `scheduler.image.tag` | The airbyte scheduler image tag. Defaults to the chart's AppVersion | `0.30.26-alpha` | +| `scheduler.image.tag` | The airbyte scheduler image tag. Defaults to the chart's AppVersion | `0.30.27-alpha` | | `scheduler.podAnnotations` | Add extra annotations to the scheduler pod | `{}` | | `scheduler.resources.limits` | The resources limits for the scheduler container | `{}` | | `scheduler.resources.requests` | The requested resources for the scheduler container | `{}` | @@ -87,7 +87,7 @@ | `server.replicaCount` | Number of server replicas | `1` | | `server.image.repository` | The repository to use for the airbyte server image. | `airbyte/server` | | `server.image.pullPolicy` | the pull policy to use for the airbyte server image | `IfNotPresent` | -| `server.image.tag` | The airbyte server image tag. Defaults to the chart's AppVersion | `0.30.26-alpha` | +| `server.image.tag` | The airbyte server image tag. Defaults to the chart's AppVersion | `0.30.27-alpha` | | `server.podAnnotations` | Add extra annotations to the server pod | `{}` | | `server.livenessProbe.enabled` | Enable livenessProbe on the server | `true` | | `server.livenessProbe.initialDelaySeconds` | Initial delay seconds for livenessProbe | `30` | @@ -121,7 +121,7 @@ | `worker.replicaCount` | Number of worker replicas | `1` | | `worker.image.repository` | The repository to use for the airbyte worker image. | `airbyte/worker` | | `worker.image.pullPolicy` | the pull policy to use for the airbyte worker image | `IfNotPresent` | -| `worker.image.tag` | The airbyte worker image tag. Defaults to the chart's AppVersion | `0.30.26-alpha` | +| `worker.image.tag` | The airbyte worker image tag. Defaults to the chart's AppVersion | `0.30.27-alpha` | | `worker.podAnnotations` | Add extra annotations to the worker pod(s) | `{}` | | `worker.livenessProbe.enabled` | Enable livenessProbe on the worker | `true` | | `worker.livenessProbe.initialDelaySeconds` | Initial delay seconds for livenessProbe | `30` | diff --git a/charts/airbyte/values.yaml b/charts/airbyte/values.yaml index dd4c76f51509d..593def5d6fe1b 100644 --- a/charts/airbyte/values.yaml +++ b/charts/airbyte/values.yaml @@ -44,7 +44,7 @@ webapp: image: repository: airbyte/webapp pullPolicy: IfNotPresent - tag: 0.30.26-alpha + tag: 0.30.27-alpha ## @param webapp.podAnnotations [object] Add extra annotations to the webapp pod(s) ## @@ -141,7 +141,7 @@ scheduler: image: repository: airbyte/scheduler pullPolicy: IfNotPresent - tag: 0.30.26-alpha + tag: 0.30.27-alpha ## @param scheduler.podAnnotations [object] Add extra annotations to the scheduler pod ## @@ -248,7 +248,7 @@ server: image: repository: airbyte/server pullPolicy: IfNotPresent - tag: 0.30.26-alpha + tag: 0.30.27-alpha ## @param server.podAnnotations [object] Add extra annotations to the server pod ## @@ -360,7 +360,7 @@ worker: image: repository: airbyte/worker pullPolicy: IfNotPresent - tag: 0.30.26-alpha + tag: 0.30.27-alpha ## @param worker.podAnnotations [object] Add extra annotations to the worker pod(s) ## diff --git a/docs/operator-guides/upgrading-airbyte.md b/docs/operator-guides/upgrading-airbyte.md index d1757c00d326d..953feb8df7121 100644 --- a/docs/operator-guides/upgrading-airbyte.md +++ b/docs/operator-guides/upgrading-airbyte.md @@ -82,7 +82,7 @@ If you are upgrading from \(i.e. your current version of Airbyte is\) Airbyte ve Here's an example of what it might look like with the values filled in. It assumes that the downloaded `airbyte_archive.tar.gz` is in `/tmp`. ```bash - docker run --rm -v /tmp:/config airbyte/migration:0.30.26-alpha --\ + docker run --rm -v /tmp:/config airbyte/migration:0.30.27-alpha --\ --input /config/airbyte_archive.tar.gz\ --output /config/airbyte_archive_migrated.tar.gz ``` diff --git a/kube/overlays/stable-with-resource-limits/.env b/kube/overlays/stable-with-resource-limits/.env index 1a4e3ffea60e9..b008ae065197d 100644 --- a/kube/overlays/stable-with-resource-limits/.env +++ b/kube/overlays/stable-with-resource-limits/.env @@ -1,4 +1,4 @@ -AIRBYTE_VERSION=0.30.26-alpha +AIRBYTE_VERSION=0.30.27-alpha # Airbyte Internal Database, see https://docs.airbyte.io/operator-guides/configuring-airbyte-db DATABASE_HOST=airbyte-db-svc diff --git a/kube/overlays/stable-with-resource-limits/kustomization.yaml b/kube/overlays/stable-with-resource-limits/kustomization.yaml index 0f6e9c1684cfa..4dfe2c2ed9e9b 100644 --- a/kube/overlays/stable-with-resource-limits/kustomization.yaml +++ b/kube/overlays/stable-with-resource-limits/kustomization.yaml @@ -8,15 +8,15 @@ bases: images: - name: airbyte/db - newTag: 0.30.26-alpha + newTag: 0.30.27-alpha - name: airbyte/scheduler - newTag: 0.30.26-alpha + newTag: 0.30.27-alpha - name: airbyte/server - newTag: 0.30.26-alpha + newTag: 0.30.27-alpha - name: airbyte/webapp - newTag: 0.30.26-alpha + newTag: 0.30.27-alpha - name: airbyte/worker - newTag: 0.30.26-alpha + newTag: 0.30.27-alpha - name: temporalio/auto-setup newTag: 1.7.0 diff --git a/kube/overlays/stable/.env b/kube/overlays/stable/.env index 1a4e3ffea60e9..b008ae065197d 100644 --- a/kube/overlays/stable/.env +++ b/kube/overlays/stable/.env @@ -1,4 +1,4 @@ -AIRBYTE_VERSION=0.30.26-alpha +AIRBYTE_VERSION=0.30.27-alpha # Airbyte Internal Database, see https://docs.airbyte.io/operator-guides/configuring-airbyte-db DATABASE_HOST=airbyte-db-svc diff --git a/kube/overlays/stable/kustomization.yaml b/kube/overlays/stable/kustomization.yaml index 9ef8fb2f7d624..25a57a2eedbe4 100644 --- a/kube/overlays/stable/kustomization.yaml +++ b/kube/overlays/stable/kustomization.yaml @@ -8,15 +8,15 @@ bases: images: - name: airbyte/db - newTag: 0.30.26-alpha + newTag: 0.30.27-alpha - name: airbyte/scheduler - newTag: 0.30.26-alpha + newTag: 0.30.27-alpha - name: airbyte/server - newTag: 0.30.26-alpha + newTag: 0.30.27-alpha - name: airbyte/webapp - newTag: 0.30.26-alpha + newTag: 0.30.27-alpha - name: airbyte/worker - newTag: 0.30.26-alpha + newTag: 0.30.27-alpha - name: temporalio/auto-setup newTag: 1.7.0 From 62086c7a25fb36a7b916b86de1a199d9df1bdd09 Mon Sep 17 00:00:00 2001 From: Jenny Brown <85510829+airbyte-jenny@users.noreply.github.com> Date: Wed, 3 Nov 2021 15:59:12 -0500 Subject: [PATCH 35/83] Refactor EnvConfigs to be used just in main() and simplify LogClientDelegate to use a pojo LogConfiguration instead (#7397) * Move EnvConfigs towards the outer shell of the app, clarifying input parameters along the way. * Merged master and resolved conflicts * Merged master and resolved conflicts * Merged master and resolved conflicts * EnvConfigs refactor to move it out towards the main() methods. * EnvConfigs refactor to move it out towards the main() methods. * EnvConfigs refactor to move it towards main() methods. * Format * Moved EnvConfigs() out towards main() methods. Cleaned up edge cases for testing not depending on EnvConfigs anymore. * Moving EnvConfigs out of tests where possible. * Formatting * Log tests seem to actually use the environment variables in automated builds. * Set test back to original structure to rely on env variables * Format * Solved merge issues * PR review cleanups and tweaks * Formatting * Drop class that was no longer needed. * PR fix on unneeded line * Formatting * Removed unused code --- .../main/java/io/airbyte/config/Configs.java | 3 + .../java/io/airbyte/config/EnvConfigs.java | 31 +++-- .../io/airbyte/config/helpers/GcsLogs.java | 2 +- .../config/helpers/LogClientSingleton.java | 103 ++++++++-------- .../config/helpers/LogConfigDelegator.java | 57 --------- .../io/airbyte/config/helpers/LogConfigs.java | 4 +- .../config/helpers/LogConfiguration.java | 75 ++++++++++++ .../airbyte/config/helpers/GcsLogsTest.java | 7 +- .../config/helpers/KubeLoggingConfigTest.java | 8 +- .../helpers/LogClientSingletonTest.java | 11 +- .../io/airbyte/config/helpers/S3LogsTest.java | 9 +- ...cretManagerPersistenceIntegrationTest.java | 4 +- .../main/java/io/airbyte/db/Databases.java | 3 + .../V0_30_22_001__Store_last_sync_state.java | 33 ++++-- ...30_22_001__Store_last_sync_state_test.java | 18 ++- .../airbyte/scheduler/app/JobSubmitter.java | 13 +- .../airbyte/scheduler/app/SchedulerApp.java | 17 ++- .../scheduler/app/JobSubmitterTest.java | 4 +- .../server/ConfigurationApiFactory.java | 33 ++++-- .../java/io/airbyte/server/ServerApp.java | 11 +- .../java/io/airbyte/server/ServerFactory.java | 31 +++-- .../airbyte/server/apis/ConfigurationApi.java | 32 +++-- .../server/converters/JobConverter.java | 30 +++-- .../server/handlers/JobHistoryHandler.java | 11 +- .../airbyte/server/handlers/LogsHandler.java | 13 +- .../server/handlers/SchedulerHandler.java | 39 ++++-- .../io/airbyte/server/RequestLoggerTest.java | 6 +- .../server/apis/ConfigurationApiTest.java | 11 +- .../server/converters/JobConverterTest.java | 10 +- .../handlers/JobHistoryHandlerTest.java | 4 +- .../server/handlers/LogsHandlerTest.java | 9 +- .../server/handlers/SchedulerHandlerTest.java | 6 +- .../java/io/airbyte/workers/WorkerApp.java | 54 +++++++-- .../workers/temporal/SyncWorkflow.java | 111 +++++++++++++++--- .../temporal/TemporalAttemptExecution.java | 60 +++++++--- .../CheckConnectionActivityImpl.java | 28 ++++- .../catalog/DiscoverCatalogActivityImpl.java | 29 ++++- .../temporal/spec/SpecActivityImpl.java | 30 ++++- .../workers/DefaultReplicationWorkerTest.java | 4 +- .../DefaultNormalizationRunnerTest.java | 4 +- .../DefaultAirbyteDestinationTest.java | 4 +- .../airbyte/DefaultAirbyteSourceTest.java | 6 +- .../TemporalAttemptExecutionTest.java | 7 +- 43 files changed, 683 insertions(+), 302 deletions(-) delete mode 100644 airbyte-config/models/src/main/java/io/airbyte/config/helpers/LogConfigDelegator.java create mode 100644 airbyte-config/models/src/main/java/io/airbyte/config/helpers/LogConfiguration.java diff --git a/airbyte-config/models/src/main/java/io/airbyte/config/Configs.java b/airbyte-config/models/src/main/java/io/airbyte/config/Configs.java index 99b1fc3709159..6d3804ad9b20f 100644 --- a/airbyte-config/models/src/main/java/io/airbyte/config/Configs.java +++ b/airbyte-config/models/src/main/java/io/airbyte/config/Configs.java @@ -5,6 +5,7 @@ package io.airbyte.config; import io.airbyte.commons.version.AirbyteVersion; +import io.airbyte.config.helpers.LogConfigs; import java.nio.file.Path; import java.util.List; import java.util.Map; @@ -96,6 +97,8 @@ public interface Configs { String getMemoryLimit(); // Logging + LogConfigs getLogConfigs(); + String getS3LogBucket(); String getS3LogBucketRegion(); diff --git a/airbyte-config/models/src/main/java/io/airbyte/config/EnvConfigs.java b/airbyte-config/models/src/main/java/io/airbyte/config/EnvConfigs.java index 6fd0a99849341..3950fab5d432b 100644 --- a/airbyte-config/models/src/main/java/io/airbyte/config/EnvConfigs.java +++ b/airbyte-config/models/src/main/java/io/airbyte/config/EnvConfigs.java @@ -9,6 +9,8 @@ import com.google.common.base.Strings; import io.airbyte.commons.version.AirbyteVersion; import io.airbyte.config.helpers.LogClientSingleton; +import io.airbyte.config.helpers.LogConfigs; +import io.airbyte.config.helpers.LogConfiguration; import java.nio.file.Path; import java.util.Arrays; import java.util.HashSet; @@ -93,6 +95,7 @@ public class EnvConfigs implements Configs { public static final String DEFAULT_NETWORK = "host"; private final Function getEnv; + private LogConfiguration logConfiguration; public EnvConfigs() { this(System::getenv); @@ -100,6 +103,14 @@ public EnvConfigs() { EnvConfigs(final Function getEnv) { this.getEnv = getEnv; + this.logConfiguration = new LogConfiguration( + getEnvOrDefault(LogClientSingleton.S3_LOG_BUCKET, ""), + getEnvOrDefault(LogClientSingleton.S3_LOG_BUCKET_REGION, ""), + getEnvOrDefault(LogClientSingleton.AWS_ACCESS_KEY_ID, ""), + getEnvOrDefault(LogClientSingleton.AWS_SECRET_ACCESS_KEY, ""), + getEnvOrDefault(LogClientSingleton.S3_MINIO_ENDPOINT, ""), + getEnvOrDefault(LogClientSingleton.GCP_STORAGE_BUCKET, ""), + getEnvOrDefault(LogClientSingleton.GOOGLE_APPLICATION_CREDENTIALS, "")); } @Override @@ -401,37 +412,41 @@ public String getJobsImagePullSecret() { @Override public String getS3LogBucket() { - return getEnvOrDefault(LogClientSingleton.S3_LOG_BUCKET, ""); + return logConfiguration.getS3LogBucket(); } @Override public String getS3LogBucketRegion() { - return getEnvOrDefault(LogClientSingleton.S3_LOG_BUCKET_REGION, ""); + return logConfiguration.getS3LogBucketRegion(); } @Override public String getAwsAccessKey() { - return getEnvOrDefault(LogClientSingleton.AWS_ACCESS_KEY_ID, ""); + return logConfiguration.getAwsAccessKey(); } @Override public String getAwsSecretAccessKey() { - return getEnvOrDefault(LogClientSingleton.AWS_SECRET_ACCESS_KEY, ""); + return logConfiguration.getAwsSecretAccessKey(); } @Override public String getS3MinioEndpoint() { - return getEnvOrDefault(LogClientSingleton.S3_MINIO_ENDPOINT, ""); + return logConfiguration.getS3MinioEndpoint(); } @Override public String getGcpStorageBucket() { - return getEnvOrDefault(LogClientSingleton.GCP_STORAGE_BUCKET, ""); + return logConfiguration.getGcpStorageBucket(); } @Override public String getGoogleApplicationCredentials() { - return getEnvOrDefault(LogClientSingleton.GOOGLE_APPLICATION_CREDENTIALS, ""); + return logConfiguration.getGoogleApplicationCredentials(); + } + + public LogConfigs getLogConfigs() { + return logConfiguration; } @Override @@ -445,7 +460,7 @@ public SecretPersistenceType getSecretPersistenceType() { return SecretPersistenceType.valueOf(secretPersistenceStr); } - private String getEnvOrDefault(final String key, final String defaultValue) { + protected String getEnvOrDefault(final String key, final String defaultValue) { return getEnvOrDefault(key, defaultValue, Function.identity(), false); } diff --git a/airbyte-config/models/src/main/java/io/airbyte/config/helpers/GcsLogs.java b/airbyte-config/models/src/main/java/io/airbyte/config/helpers/GcsLogs.java index e248982892d28..5a0787366d46a 100644 --- a/airbyte-config/models/src/main/java/io/airbyte/config/helpers/GcsLogs.java +++ b/airbyte-config/models/src/main/java/io/airbyte/config/helpers/GcsLogs.java @@ -135,7 +135,7 @@ public static void main(final String[] args) throws IOException { blob.downloadTo(os); } os.close(); - final var data = new GcsLogs().tailCloudLog(new LogConfigDelegator(new EnvConfigs()), "tail", 6); + final var data = new GcsLogs().tailCloudLog((new EnvConfigs()).getLogConfigs(), "tail", 6); System.out.println(data); } diff --git a/airbyte-config/models/src/main/java/io/airbyte/config/helpers/LogClientSingleton.java b/airbyte-config/models/src/main/java/io/airbyte/config/helpers/LogClientSingleton.java index 11924e72e31a0..c393017121d39 100644 --- a/airbyte-config/models/src/main/java/io/airbyte/config/helpers/LogClientSingleton.java +++ b/airbyte-config/models/src/main/java/io/airbyte/config/helpers/LogClientSingleton.java @@ -8,7 +8,6 @@ import io.airbyte.commons.io.IOs; import io.airbyte.config.Configs; import io.airbyte.config.Configs.WorkerEnvironment; -import io.airbyte.config.EnvConfigs; import java.io.File; import java.io.IOException; import java.nio.file.Path; @@ -19,11 +18,10 @@ import org.slf4j.LoggerFactory; import org.slf4j.MDC; -// todo (cgardens) - make this an actual singleton so we can write tests and mock the components. /** * Airbyte's logging layer entrypoint. Handles logs written to local disk as well as logs written to * cloud storages. - * + *

* Although the configuration is passed in as {@link Configs}, it is transformed to * {@link LogConfigs} within this class. Beyond this class, all configuration consumption is via the * {@link LogConfigs} interface via the {@link CloudLogs} interface. @@ -31,51 +29,56 @@ public class LogClientSingleton { private static final Logger LOGGER = LoggerFactory.getLogger(LogClientSingleton.class); + private static LogClientSingleton instance; @VisibleForTesting - static final int LOG_TAIL_SIZE = 1000000; + final static int LOG_TAIL_SIZE = 1000000; @VisibleForTesting - static CloudLogs logClient; + CloudLogs logClient; // Any changes to the following values must also be propagated to the log4j2.xml in main/resources. - public static String WORKSPACE_MDC_KEY = "workspace_app_root"; - public static String CLOUD_WORKSPACE_MDC_KEY = "cloud_workspace_app_root"; + public static final String WORKSPACE_MDC_KEY = "workspace_app_root"; + public static final String CLOUD_WORKSPACE_MDC_KEY = "cloud_workspace_app_root"; - public static String JOB_LOG_PATH_MDC_KEY = "job_log_path"; - public static String CLOUD_JOB_LOG_PATH_MDC_KEY = "cloud_job_log_path"; + public static final String JOB_LOG_PATH_MDC_KEY = "job_log_path"; + public static final String CLOUD_JOB_LOG_PATH_MDC_KEY = "cloud_job_log_path"; // S3/Minio - public static String S3_LOG_BUCKET = "S3_LOG_BUCKET"; - public static String S3_LOG_BUCKET_REGION = "S3_LOG_BUCKET_REGION"; - public static String AWS_ACCESS_KEY_ID = "AWS_ACCESS_KEY_ID"; - public static String AWS_SECRET_ACCESS_KEY = "AWS_SECRET_ACCESS_KEY"; - public static String S3_MINIO_ENDPOINT = "S3_MINIO_ENDPOINT"; + public static final String S3_LOG_BUCKET = "S3_LOG_BUCKET"; + public static final String S3_LOG_BUCKET_REGION = "S3_LOG_BUCKET_REGION"; + public static final String AWS_ACCESS_KEY_ID = "AWS_ACCESS_KEY_ID"; + public static final String AWS_SECRET_ACCESS_KEY = "AWS_SECRET_ACCESS_KEY"; + public static final String S3_MINIO_ENDPOINT = "S3_MINIO_ENDPOINT"; // GCS - public static String GCP_STORAGE_BUCKET = "GCP_STORAGE_BUCKET"; - public static String GOOGLE_APPLICATION_CREDENTIALS = "GOOGLE_APPLICATION_CREDENTIALS"; + public static final String GCP_STORAGE_BUCKET = "GCP_STORAGE_BUCKET"; + public static final String GOOGLE_APPLICATION_CREDENTIALS = "GOOGLE_APPLICATION_CREDENTIALS"; - public static int DEFAULT_PAGE_SIZE = 1000; - public static String LOG_FILENAME = "logs.log"; - public static String APP_LOGGING_CLOUD_PREFIX = "app-logging"; - public static String JOB_LOGGING_CLOUD_PREFIX = "job-logging"; + public static final int DEFAULT_PAGE_SIZE = 1000; + public static final String LOG_FILENAME = "logs.log"; + public static final String APP_LOGGING_CLOUD_PREFIX = "app-logging"; + public static final String JOB_LOGGING_CLOUD_PREFIX = "job-logging"; - public static Path getServerLogsRoot(final Configs configs) { - return configs.getWorkspaceRoot().resolve("server/logs"); + public static synchronized LogClientSingleton getInstance() { + if (instance == null) { + instance = new LogClientSingleton(); + } + return instance; } - public static Path getSchedulerLogsRoot(final Configs configs) { - return configs.getWorkspaceRoot().resolve("scheduler/logs"); + public Path getServerLogsRoot(final Path workspaceRoot) { + return workspaceRoot.resolve("server/logs"); } - public static File getServerLogFile(final Configs configs) { - final var logPathBase = getServerLogsRoot(configs); - if (shouldUseLocalLogs(configs.getWorkerEnvironment())) { - return logPathBase.resolve(LOG_FILENAME).toFile(); - } + public Path getSchedulerLogsRoot(final Path workspaceRoot) { + return workspaceRoot.resolve("scheduler/logs"); + } - final var logConfigs = new LogConfigDelegator(configs); - final var cloudLogPath = APP_LOGGING_CLOUD_PREFIX + logPathBase; + public File getServerLogFile(final Path workspaceRoot, final WorkerEnvironment workerEnvironment, final LogConfigs logConfigs) { + if (shouldUseLocalLogs(workerEnvironment)) { + return getServerLogsRoot(workspaceRoot).resolve(LOG_FILENAME).toFile(); + } + final var cloudLogPath = APP_LOGGING_CLOUD_PREFIX + getServerLogsRoot(workspaceRoot); try { return logClient.downloadCloudLog(logConfigs, cloudLogPath); } catch (final IOException e) { @@ -83,14 +86,12 @@ public static File getServerLogFile(final Configs configs) { } } - public static File getSchedulerLogFile(final Configs configs) { - final var logPathBase = getSchedulerLogsRoot(configs); - if (shouldUseLocalLogs(configs.getWorkerEnvironment())) { - return logPathBase.resolve(LOG_FILENAME).toFile(); + public File getSchedulerLogFile(final Path workspaceRoot, final WorkerEnvironment workerEnvironment, final LogConfigs logConfigs) { + if (shouldUseLocalLogs(workerEnvironment)) { + return getSchedulerLogsRoot(workspaceRoot).resolve(LOG_FILENAME).toFile(); } - final var logConfigs = new LogConfigDelegator(configs); - final var cloudLogPath = APP_LOGGING_CLOUD_PREFIX + logPathBase; + final var cloudLogPath = APP_LOGGING_CLOUD_PREFIX + getSchedulerLogsRoot(workspaceRoot); try { return logClient.downloadCloudLog(logConfigs, cloudLogPath); } catch (final IOException e) { @@ -98,16 +99,15 @@ public static File getSchedulerLogFile(final Configs configs) { } } - public static List getJobLogFile(final Configs configs, final Path logPath) throws IOException { + public List getJobLogFile(final WorkerEnvironment workerEnvironment, final LogConfigs logConfigs, final Path logPath) throws IOException { if (logPath == null || logPath.equals(Path.of(""))) { return Collections.emptyList(); } - if (shouldUseLocalLogs(configs.getWorkerEnvironment())) { + if (shouldUseLocalLogs(workerEnvironment)) { return IOs.getTail(LOG_TAIL_SIZE, logPath); } - final var logConfigs = new LogConfigDelegator(configs); final var cloudLogPath = JOB_LOGGING_CLOUD_PREFIX + logPath; return logClient.tailCloudLog(logConfigs, cloudLogPath, LOG_TAIL_SIZE); } @@ -116,52 +116,47 @@ public static List getJobLogFile(final Configs configs, final Path logPa * Primarily to clean up logs after testing. Only valid for Kube logs. */ @VisibleForTesting - public static void deleteLogs(final Configs configs, final String logPath) { + public void deleteLogs(final WorkerEnvironment workerEnvironment, final LogConfigs logConfigs, final String logPath) { if (logPath == null || logPath.equals(Path.of(""))) { return; } - if (shouldUseLocalLogs(configs.getWorkerEnvironment())) { + if (shouldUseLocalLogs(workerEnvironment)) { throw new NotImplementedException("Local log deletes not supported."); } - final var logConfigs = new LogConfigDelegator(configs); final var cloudLogPath = JOB_LOGGING_CLOUD_PREFIX + logPath; logClient.deleteLogs(logConfigs, cloudLogPath); } - public static void setJobMdc(final Path path) { - // setJobMdc is referenced from TemporalAttemptExecution without input parameters, so hard to pass - // this in. - final Configs configs = new EnvConfigs(); - if (shouldUseLocalLogs(configs.getWorkerEnvironment())) { + public void setJobMdc(final WorkerEnvironment workerEnvironment, final LogConfigs logConfigs, final Path path) { + if (shouldUseLocalLogs(workerEnvironment)) { LOGGER.debug("Setting docker job mdc"); MDC.put(LogClientSingleton.JOB_LOG_PATH_MDC_KEY, path.resolve(LogClientSingleton.LOG_FILENAME).toString()); } else { LOGGER.debug("Setting kube job mdc"); - final var logConfigs = new LogConfigDelegator(configs); createCloudClientIfNull(logConfigs); MDC.put(LogClientSingleton.CLOUD_JOB_LOG_PATH_MDC_KEY, path.resolve(LogClientSingleton.LOG_FILENAME).toString()); } } - public static void setWorkspaceMdc(final Path path) { - final var configs = new EnvConfigs(); - if (shouldUseLocalLogs(configs.getWorkerEnvironment())) { + public void setWorkspaceMdc(final WorkerEnvironment workerEnvironment, final LogConfigs logConfigs, final Path path) { + if (shouldUseLocalLogs(workerEnvironment)) { LOGGER.debug("Setting docker workspace mdc"); MDC.put(LogClientSingleton.WORKSPACE_MDC_KEY, path.toString()); } else { LOGGER.debug("Setting kube workspace mdc"); - final var logConfigs = new LogConfigDelegator(configs); createCloudClientIfNull(logConfigs); MDC.put(LogClientSingleton.CLOUD_WORKSPACE_MDC_KEY, path.toString()); } } + // This method should cease to exist here and become a property on the enum instead + // TODO handle this as part of refactor https://github.com/airbytehq/airbyte/issues/7545 private static boolean shouldUseLocalLogs(final WorkerEnvironment workerEnvironment) { return workerEnvironment.equals(WorkerEnvironment.DOCKER); } - private static void createCloudClientIfNull(final LogConfigs configs) { + private void createCloudClientIfNull(final LogConfigs configs) { if (logClient == null) { logClient = CloudLogs.createCloudLogClient(configs); } diff --git a/airbyte-config/models/src/main/java/io/airbyte/config/helpers/LogConfigDelegator.java b/airbyte-config/models/src/main/java/io/airbyte/config/helpers/LogConfigDelegator.java deleted file mode 100644 index 18f194d283d18..0000000000000 --- a/airbyte-config/models/src/main/java/io/airbyte/config/helpers/LogConfigDelegator.java +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2021 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.config.helpers; - -import io.airbyte.config.Configs; - -/** - * Implements {@link LogConfigs} by delegating to a {@link Configs} implementation. Because the - * logging configuration overlaps with other configuration, this delegation is intended to avoid - * multiple configurations existing at once. - */ -public class LogConfigDelegator implements LogConfigs { - - private final Configs delegate; - - public LogConfigDelegator(final Configs configs) { - delegate = configs; - } - - @Override - public String getS3LogBucket() { - return delegate.getS3LogBucket(); - } - - @Override - public String getS3LogBucketRegion() { - return delegate.getS3LogBucketRegion(); - } - - @Override - public String getAwsAccessKey() { - return delegate.getAwsAccessKey(); - } - - @Override - public String getAwsSecretAccessKey() { - return delegate.getAwsSecretAccessKey(); - } - - @Override - public String getS3MinioEndpoint() { - return delegate.getS3MinioEndpoint(); - } - - @Override - public String getGcpStorageBucket() { - return delegate.getGcpStorageBucket(); - } - - @Override - public String getGoogleApplicationCredentials() { - return delegate.getGoogleApplicationCredentials(); - } - -} diff --git a/airbyte-config/models/src/main/java/io/airbyte/config/helpers/LogConfigs.java b/airbyte-config/models/src/main/java/io/airbyte/config/helpers/LogConfigs.java index 5acaadd83bace..345e21a5cd045 100644 --- a/airbyte-config/models/src/main/java/io/airbyte/config/helpers/LogConfigs.java +++ b/airbyte-config/models/src/main/java/io/airbyte/config/helpers/LogConfigs.java @@ -6,8 +6,8 @@ /** * Configuration required to retrieve logs. This is a subset of the methods defined in - * {@link io.airbyte.config.Configs} so actual look up can be delegated in - * {@link LogConfigDelegator}. This prevents conflicting configuration existing at once. + * {@link io.airbyte.config.Configs} so actual look up can be delegated in {@link LogConfiguration}. + * This prevents conflicting configuration existing at once. */ public interface LogConfigs { diff --git a/airbyte-config/models/src/main/java/io/airbyte/config/helpers/LogConfiguration.java b/airbyte-config/models/src/main/java/io/airbyte/config/helpers/LogConfiguration.java new file mode 100644 index 0000000000000..85903e2c4100c --- /dev/null +++ b/airbyte-config/models/src/main/java/io/airbyte/config/helpers/LogConfiguration.java @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.config.helpers; + +/** + * Implements {@link LogConfigs} with immutable values. Because the logging configuration overlaps + * with other configuration, this delegation is intended to avoid multiple configurations existing + * at once. + */ +public class LogConfiguration implements LogConfigs { + + public final static LogConfiguration EMPTY = new LogConfiguration("", "", "", "", "", "", ""); + + private final String s3LogBucket; + private final String s3LogBucketRegion; + private final String awsAccessKey; + private final String awsSecretAccessKey; + private final String s3MinioEndpoint; + private final String gcpStorageBucket; + private final String googleApplicationCredentials; + + public LogConfiguration(final String s3LogBucket, + final String s3LogBucketRegion, + final String awsAccessKey, + final String awsSecretAccessKey, + final String s3MinioEndpoint, + final String gcpStorageBucket, + final String googleApplicationCredentials) { + this.s3LogBucket = s3LogBucket; + this.s3LogBucketRegion = s3LogBucketRegion; + this.awsAccessKey = awsAccessKey; + this.awsSecretAccessKey = awsSecretAccessKey; + this.s3MinioEndpoint = s3MinioEndpoint; + this.gcpStorageBucket = gcpStorageBucket; + this.googleApplicationCredentials = googleApplicationCredentials; + } + + @Override + public String getS3LogBucket() { + return s3LogBucket; + } + + @Override + public String getS3LogBucketRegion() { + return s3LogBucketRegion; + } + + @Override + public String getAwsAccessKey() { + return awsAccessKey; + } + + @Override + public String getAwsSecretAccessKey() { + return awsSecretAccessKey; + } + + @Override + public String getS3MinioEndpoint() { + return s3MinioEndpoint; + } + + @Override + public String getGcpStorageBucket() { + return gcpStorageBucket; + } + + @Override + public String getGoogleApplicationCredentials() { + return googleApplicationCredentials; + } + +} diff --git a/airbyte-config/models/src/test/java/io/airbyte/config/helpers/GcsLogsTest.java b/airbyte-config/models/src/test/java/io/airbyte/config/helpers/GcsLogsTest.java index f353c166716c0..b59774795442f 100644 --- a/airbyte-config/models/src/test/java/io/airbyte/config/helpers/GcsLogsTest.java +++ b/airbyte-config/models/src/test/java/io/airbyte/config/helpers/GcsLogsTest.java @@ -10,6 +10,7 @@ import static org.mockito.Mockito.when; import io.airbyte.config.EnvConfigs; +import java.io.File; import java.io.IOException; import java.nio.file.Files; import java.util.ArrayList; @@ -36,8 +37,7 @@ public void testMissingConfiguration() { */ @Test public void testRetrieveAllLogs() throws IOException { - final var configs = new LogConfigDelegator(new EnvConfigs()); - final var data = GcsLogs.getFile(configs, "paginate", 6); + final File data = GcsLogs.getFile((new EnvConfigs()).getLogConfigs(), "paginate", 6); final var retrieved = new ArrayList(); Files.lines(data.toPath()).forEach(retrieved::add); @@ -56,8 +56,7 @@ public void testRetrieveAllLogs() throws IOException { */ @Test public void testTail() throws IOException { - final var configs = new LogConfigDelegator(new EnvConfigs()); - final var data = new GcsLogs().tailCloudLog(configs, "tail", 6); + final var data = new GcsLogs().tailCloudLog((new EnvConfigs()).getLogConfigs(), "tail", 6); final var expected = List.of("Line 4", "Line 5", "Line 6", "Line 7", "Line 8", "Line 9"); assertEquals(data, expected); diff --git a/airbyte-config/models/src/test/java/io/airbyte/config/helpers/KubeLoggingConfigTest.java b/airbyte-config/models/src/test/java/io/airbyte/config/helpers/KubeLoggingConfigTest.java index 8d2477852e33b..726d235dcf40e 100644 --- a/airbyte-config/models/src/test/java/io/airbyte/config/helpers/KubeLoggingConfigTest.java +++ b/airbyte-config/models/src/test/java/io/airbyte/config/helpers/KubeLoggingConfigTest.java @@ -31,7 +31,8 @@ public class KubeLoggingConfigTest { public void cleanUpLogs() { if (logPath != null) { try { - LogClientSingleton.deleteLogs(new EnvConfigs(), logPath); + final EnvConfigs envConfigs = new EnvConfigs(); + LogClientSingleton.getInstance().deleteLogs(envConfigs.getWorkerEnvironment(), envConfigs.getLogConfigs(), logPath); } catch (final Exception e) { // Ignore Minio delete error. } @@ -47,9 +48,10 @@ public void cleanUpLogs() { */ @Test public void testLoggingConfiguration() throws IOException, InterruptedException { + final EnvConfigs envConfigs = new EnvConfigs(); final var randPath = Strings.addRandomSuffix("-", "", 5); // This mirrors our Log4j2 set up. See log4j2.xml. - LogClientSingleton.setJobMdc(Path.of(randPath)); + LogClientSingleton.getInstance().setJobMdc(envConfigs.getWorkerEnvironment(), envConfigs.getLogConfigs(), Path.of(randPath)); final var toLog = List.of("line 1", "line 2", "line 3"); for (final String l : toLog) { @@ -64,7 +66,7 @@ public void testLoggingConfiguration() throws IOException, InterruptedException logPath = randPath + "/logs.log/"; // The same env vars that log4j2 uses to determine where to publish to determine how to retrieve the // log file. - final var logs = LogClientSingleton.getJobLogFile(new EnvConfigs(), Path.of(logPath)); + final var logs = LogClientSingleton.getInstance().getJobLogFile(envConfigs.getWorkerEnvironment(), envConfigs.getLogConfigs(), Path.of(logPath)); // Each log line is of the form . Further, there might be // other log lines from the system running. Join all the lines to simplify assertions. final var logsLine = Strings.join(logs, " "); diff --git a/airbyte-config/models/src/test/java/io/airbyte/config/helpers/LogClientSingletonTest.java b/airbyte-config/models/src/test/java/io/airbyte/config/helpers/LogClientSingletonTest.java index c1d60e8955e72..2d5de01af56b5 100644 --- a/airbyte-config/models/src/test/java/io/airbyte/config/helpers/LogClientSingletonTest.java +++ b/airbyte-config/models/src/test/java/io/airbyte/config/helpers/LogClientSingletonTest.java @@ -29,25 +29,28 @@ class LogClientSingletonTest { void setup() { configs = mock(Configs.class); mockLogClient = mock(CloudLogs.class); - LogClientSingleton.logClient = mockLogClient; + LogClientSingleton.getInstance().logClient = mockLogClient; } @Test void testGetJobLogFileK8s() throws IOException { when(configs.getWorkerEnvironment()).thenReturn(WorkerEnvironment.KUBERNETES); - assertEquals(Collections.emptyList(), LogClientSingleton.getJobLogFile(configs, Path.of("/job/1"))); + assertEquals(Collections.emptyList(), + LogClientSingleton.getInstance().getJobLogFile(configs.getWorkerEnvironment(), configs.getLogConfigs(), Path.of("/job/1"))); verify(mockLogClient).tailCloudLog(any(), eq("job-logging/job/1"), eq(LogClientSingleton.LOG_TAIL_SIZE)); } @Test void testGetJobLogFileNullPath() throws IOException { - assertEquals(Collections.emptyList(), LogClientSingleton.getJobLogFile(configs, null)); + assertEquals(Collections.emptyList(), + LogClientSingleton.getInstance().getJobLogFile(configs.getWorkerEnvironment(), configs.getLogConfigs(), null)); verifyNoInteractions(mockLogClient); } @Test void testGetJobLogFileEmptyPath() throws IOException { - assertEquals(Collections.emptyList(), LogClientSingleton.getJobLogFile(configs, Path.of(""))); + assertEquals(Collections.emptyList(), + LogClientSingleton.getInstance().getJobLogFile(configs.getWorkerEnvironment(), configs.getLogConfigs(), Path.of(""))); verifyNoInteractions(mockLogClient); } diff --git a/airbyte-config/models/src/test/java/io/airbyte/config/helpers/S3LogsTest.java b/airbyte-config/models/src/test/java/io/airbyte/config/helpers/S3LogsTest.java index 74959a55be01f..07d47af19fdba 100644 --- a/airbyte-config/models/src/test/java/io/airbyte/config/helpers/S3LogsTest.java +++ b/airbyte-config/models/src/test/java/io/airbyte/config/helpers/S3LogsTest.java @@ -25,6 +25,8 @@ @Tag("logger-client") public class S3LogsTest { + private static final LogConfigs logConfigs = (new EnvConfigs()).getLogConfigs(); + @Test public void testMissingCredentials() { final var configs = mock(LogConfigs.class); @@ -41,8 +43,7 @@ public void testMissingCredentials() { */ @Test public void testRetrieveAllLogs() throws IOException { - final var configs = new LogConfigDelegator(new EnvConfigs()); - final var data = S3Logs.getFile(configs, "paginate", 6); + final var data = S3Logs.getFile(logConfigs, "paginate", 6); final var retrieved = new ArrayList(); Files.lines(data.toPath()).forEach(retrieved::add); @@ -61,9 +62,7 @@ public void testRetrieveAllLogs() throws IOException { */ @Test public void testTail() throws IOException { - final var configs = new LogConfigDelegator(new EnvConfigs()); - final var data = new S3Logs().tailCloudLog(configs, "tail", 6); - + final var data = new S3Logs().tailCloudLog(logConfigs, "tail", 6); final var expected = List.of("Line 4", "Line 5", "Line 6", "Line 7", "Line 8", "Line 9"); assertEquals(data, expected); } diff --git a/airbyte-config/persistence/src/test-integration/java/io/airbyte/config/persistence/GoogleSecretManagerPersistenceIntegrationTest.java b/airbyte-config/persistence/src/test-integration/java/io/airbyte/config/persistence/GoogleSecretManagerPersistenceIntegrationTest.java index 21521f6881d64..e4dc563be31fd 100644 --- a/airbyte-config/persistence/src/test-integration/java/io/airbyte/config/persistence/GoogleSecretManagerPersistenceIntegrationTest.java +++ b/airbyte-config/persistence/src/test-integration/java/io/airbyte/config/persistence/GoogleSecretManagerPersistenceIntegrationTest.java @@ -9,6 +9,7 @@ import com.google.api.gax.rpc.NotFoundException; import com.google.cloud.secretmanager.v1.SecretName; +import io.airbyte.config.Configs; import io.airbyte.config.EnvConfigs; import io.airbyte.config.persistence.split_secrets.GoogleSecretManagerPersistence; import io.airbyte.config.persistence.split_secrets.SecretCoordinate; @@ -26,10 +27,10 @@ public class GoogleSecretManagerPersistenceIntegrationTest { private GoogleSecretManagerPersistence persistence; private String baseCoordinate; + private final Configs configs = new EnvConfigs(); @BeforeEach void setUp() { - final var configs = new EnvConfigs(); persistence = GoogleSecretManagerPersistence.getEphemeral( configs.getSecretStoreGcpProjectId(), configs.getSecretStoreGcpCredentials()); @@ -38,7 +39,6 @@ void setUp() { @AfterEach void tearDown() throws IOException { - final var configs = new EnvConfigs(); try (final var client = GoogleSecretManagerPersistence.getSecretManagerServiceClient(configs.getSecretStoreGcpCredentials())) { // try to delete this so we aren't charged for the secret // if this is missed due to some sort of failure the secret will be deleted after the ttl diff --git a/airbyte-db/lib/src/main/java/io/airbyte/db/Databases.java b/airbyte-db/lib/src/main/java/io/airbyte/db/Databases.java index 776356a264991..323edd576ccb9 100644 --- a/airbyte-db/lib/src/main/java/io/airbyte/db/Databases.java +++ b/airbyte-db/lib/src/main/java/io/airbyte/db/Databases.java @@ -33,6 +33,9 @@ public static Database createPostgresDatabaseWithRetry(final String username, final String jdbcConnectionString, final Function isDbReady) { Database database = null; + if (jdbcConnectionString == null || jdbcConnectionString.trim().equals("")) { + throw new IllegalArgumentException("Using a null or empty jdbc url will hang database creation; aborting."); + } while (database == null) { LOGGER.warn("Waiting for database to become available..."); diff --git a/airbyte-db/lib/src/main/java/io/airbyte/db/instance/configs/migrations/V0_30_22_001__Store_last_sync_state.java b/airbyte-db/lib/src/main/java/io/airbyte/db/instance/configs/migrations/V0_30_22_001__Store_last_sync_state.java index 79567bb34c5c5..ce1e8f1debe69 100644 --- a/airbyte-db/lib/src/main/java/io/airbyte/db/instance/configs/migrations/V0_30_22_001__Store_last_sync_state.java +++ b/airbyte-db/lib/src/main/java/io/airbyte/db/instance/configs/migrations/V0_30_22_001__Store_last_sync_state.java @@ -9,7 +9,6 @@ import io.airbyte.commons.jackson.MoreMappers; import io.airbyte.commons.json.Jsons; import io.airbyte.config.ConfigSchema; -import io.airbyte.config.Configs; import io.airbyte.config.EnvConfigs; import io.airbyte.config.StandardSyncState; import io.airbyte.config.State; @@ -51,15 +50,24 @@ public class V0_30_22_001__Store_last_sync_state extends BaseJavaMigration { static final Field COLUMN_CREATED_AT = DSL.field("created_at", SQLDataType.TIMESTAMPWITHTIMEZONE); static final Field COLUMN_UPDATED_AT = DSL.field("updated_at", SQLDataType.TIMESTAMPWITHTIMEZONE); - private final Configs configs; + private final String databaseUser; + private final String databasePassword; + private final String databaseUrl; public V0_30_22_001__Store_last_sync_state() { - this.configs = new EnvConfigs(); + // EnvConfigs left in place for migration purposes as FlyWay prevents injection, but isolated to + // local scope. + final EnvConfigs configs = new EnvConfigs(); + this.databaseUser = configs.getDatabaseUser(); + this.databasePassword = configs.getDatabasePassword(); + this.databaseUrl = configs.getDatabaseUrl(); } @VisibleForTesting - V0_30_22_001__Store_last_sync_state(final Configs configs) { - this.configs = configs; + V0_30_22_001__Store_last_sync_state(final String databaseUser, final String databasePassword, final String databaseUrl) { + this.databaseUser = databaseUser; + this.databasePassword = databasePassword; + this.databaseUrl = databaseUrl; } @Override @@ -67,7 +75,7 @@ public void migrate(final Context context) throws Exception { LOGGER.info("Running migration: {}", this.getClass().getSimpleName()); final DSLContext ctx = DSL.using(context.getConnection()); - final Optional jobsDatabase = getJobsDatabase(configs); + final Optional jobsDatabase = getJobsDatabase(databaseUser, databasePassword, databaseUrl); if (jobsDatabase.isPresent()) { copyData(ctx, getStandardSyncStates(jobsDatabase.get()), OffsetDateTime.now()); } @@ -100,16 +108,15 @@ static void copyData(final DSLContext ctx, final Set standard * data from the job database). */ @VisibleForTesting - static Optional getJobsDatabase(final Configs configs) { + static Optional getJobsDatabase(final String databaseUser, final String databasePassword, final String databaseUrl) { try { + if (databaseUrl == null || "".equals(databaseUrl.trim())) { + throw new IllegalArgumentException("The databaseUrl cannot be empty."); + } // If the environment variables exist, it means the migration is run in production. // Connect to the official job database. - final Database jobsDatabase = new JobsDatabaseInstance( - configs.getDatabaseUser(), - configs.getDatabasePassword(), - configs.getDatabaseUrl()) - .getInitialized(); - LOGGER.info("[{}] Connected to jobs database: {}", MIGRATION_NAME, configs.getDatabaseUrl()); + final Database jobsDatabase = new JobsDatabaseInstance(databaseUser, databasePassword, databaseUrl).getInitialized(); + LOGGER.info("[{}] Connected to jobs database: {}", MIGRATION_NAME, databaseUrl); return Optional.of(jobsDatabase); } catch (final IllegalArgumentException e) { // If the environment variables do not exist, it means the migration is run in development. diff --git a/airbyte-db/lib/src/test/java/io/airbyte/db/instance/configs/migrations/V0_30_22_001__Store_last_sync_state_test.java b/airbyte-db/lib/src/test/java/io/airbyte/db/instance/configs/migrations/V0_30_22_001__Store_last_sync_state_test.java index e7fbd7e371a77..22115a9f45324 100644 --- a/airbyte-db/lib/src/test/java/io/airbyte/db/instance/configs/migrations/V0_30_22_001__Store_last_sync_state_test.java +++ b/airbyte-db/lib/src/test/java/io/airbyte/db/instance/configs/migrations/V0_30_22_001__Store_last_sync_state_test.java @@ -24,7 +24,6 @@ import io.airbyte.commons.json.Jsons; import io.airbyte.config.ConfigSchema; import io.airbyte.config.Configs; -import io.airbyte.config.EnvConfigs; import io.airbyte.config.JobOutput; import io.airbyte.config.JobOutput.OutputType; import io.airbyte.config.StandardSyncOutput; @@ -39,6 +38,7 @@ import java.util.Collections; import java.util.Set; import java.util.UUID; +import java.util.concurrent.TimeUnit; import javax.annotation.Nullable; import org.flywaydb.core.api.configuration.Configuration; import org.flywaydb.core.api.migration.Context; @@ -52,6 +52,7 @@ import org.junit.jupiter.api.Order; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestMethodOrder; +import org.junit.jupiter.api.Timeout; @TestMethodOrder(MethodOrderer.OrderAnnotation.class) class V0_30_22_001__Store_last_sync_state_test extends AbstractConfigsDatabaseTest { @@ -86,6 +87,8 @@ class V0_30_22_001__Store_last_sync_state_test extends AbstractConfigsDatabaseTe private static Database jobDatabase; @BeforeAll + @Timeout(value = 2, + unit = TimeUnit.MINUTES) public static void setupJobDatabase() throws Exception { jobDatabase = new JobsDatabaseInstance( container.getUsername(), @@ -97,8 +100,7 @@ public static void setupJobDatabase() throws Exception { @Test @Order(10) public void testGetJobsDatabase() { - // when there is no database environment variable, the return value is empty - assertTrue(V0_30_22_001__Store_last_sync_state.getJobsDatabase(new EnvConfigs()).isEmpty()); + assertTrue(V0_30_22_001__Store_last_sync_state.getJobsDatabase("", "", "").isEmpty()); // when there is database environment variable, return the database final Configs configs = mock(Configs.class); @@ -106,7 +108,8 @@ public void testGetJobsDatabase() { when(configs.getDatabasePassword()).thenReturn(container.getPassword()); when(configs.getDatabaseUrl()).thenReturn(container.getJdbcUrl()); - assertTrue(V0_30_22_001__Store_last_sync_state.getJobsDatabase(configs).isPresent()); + assertTrue(V0_30_22_001__Store_last_sync_state + .getJobsDatabase(configs.getDatabaseUser(), configs.getDatabasePassword(), configs.getDatabaseUrl()).isPresent()); } @Test @@ -180,12 +183,7 @@ public void testMigration() throws Exception { .where(COLUMN_CONFIG_TYPE.eq(ConfigSchema.STANDARD_SYNC_STATE.name())) .execute()); - final Configs configs = mock(Configs.class); - when(configs.getDatabaseUser()).thenReturn(container.getUsername()); - when(configs.getDatabasePassword()).thenReturn(container.getPassword()); - when(configs.getDatabaseUrl()).thenReturn(container.getJdbcUrl()); - - final var migration = new V0_30_22_001__Store_last_sync_state(configs); + final var migration = new V0_30_22_001__Store_last_sync_state(container.getUsername(), container.getPassword(), container.getJdbcUrl()); // this context is a flyway class; only the getConnection method is needed to run the migration final Context context = new Context() { diff --git a/airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/JobSubmitter.java b/airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/JobSubmitter.java index 839d3dca4a949..c35a56fc57779 100644 --- a/airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/JobSubmitter.java +++ b/airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/JobSubmitter.java @@ -8,8 +8,10 @@ import com.google.common.collect.Sets; import io.airbyte.commons.concurrency.LifecycledCallable; import io.airbyte.commons.enums.Enums; +import io.airbyte.config.Configs.WorkerEnvironment; import io.airbyte.config.JobConfig.ConfigType; import io.airbyte.config.helpers.LogClientSingleton; +import io.airbyte.config.helpers.LogConfigs; import io.airbyte.scheduler.app.worker_run.TemporalWorkerRunFactory; import io.airbyte.scheduler.app.worker_run.WorkerRun; import io.airbyte.scheduler.models.Job; @@ -35,6 +37,8 @@ public class JobSubmitter implements Runnable { private final TemporalWorkerRunFactory temporalWorkerRunFactory; private final JobTracker jobTracker; private final JobNotifier jobNotifier; + private final WorkerEnvironment workerEnvironment; + private final LogConfigs logConfigs; // See attemptJobSubmit() to understand the need for this Concurrent Set. private final Set runningJobs = Sets.newConcurrentHashSet(); @@ -43,12 +47,16 @@ public JobSubmitter(final ExecutorService threadPool, final JobPersistence persistence, final TemporalWorkerRunFactory temporalWorkerRunFactory, final JobTracker jobTracker, - final JobNotifier jobNotifier) { + final JobNotifier jobNotifier, + final WorkerEnvironment workerEnvironment, + final LogConfigs logConfigs) { this.threadPool = threadPool; this.persistence = persistence; this.temporalWorkerRunFactory = temporalWorkerRunFactory; this.jobTracker = jobTracker; this.jobNotifier = jobNotifier; + this.workerEnvironment = workerEnvironment; + this.logConfigs = logConfigs; } @Override @@ -101,6 +109,7 @@ synchronized private Consumer attemptJobSubmit() { @VisibleForTesting void submitJob(final Job job) { + final WorkerRun workerRun = temporalWorkerRunFactory.create(job); // we need to know the attempt number before we begin the job lifecycle. thus we state what the // attempt number should be. if it is not, that the lifecycle will fail. this should not happen as @@ -114,7 +123,7 @@ void submitJob(final Job job) { final Path logFilePath = workerRun.getJobRoot().resolve(LogClientSingleton.LOG_FILENAME); final long persistedAttemptId = persistence.createAttempt(job.getId(), logFilePath); assertSameIds(attemptNumber, persistedAttemptId); - LogClientSingleton.setJobMdc(workerRun.getJobRoot()); + LogClientSingleton.getInstance().setJobMdc(workerEnvironment, logConfigs, workerRun.getJobRoot()); }) .setOnSuccess(output -> { LOGGER.debug("Job id {} succeeded", job.getId()); diff --git a/airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/SchedulerApp.java b/airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/SchedulerApp.java index 5081a5b5f832d..7830b35c0ef9b 100644 --- a/airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/SchedulerApp.java +++ b/airbyte-scheduler/app/src/main/java/io/airbyte/scheduler/app/SchedulerApp.java @@ -15,8 +15,10 @@ import io.airbyte.commons.concurrency.GracefulShutdownHandler; import io.airbyte.commons.version.AirbyteVersion; import io.airbyte.config.Configs; +import io.airbyte.config.Configs.WorkerEnvironment; import io.airbyte.config.EnvConfigs; import io.airbyte.config.helpers.LogClientSingleton; +import io.airbyte.config.helpers.LogConfigs; import io.airbyte.config.persistence.ConfigPersistence; import io.airbyte.config.persistence.ConfigRepository; import io.airbyte.config.persistence.DatabaseConfigPersistence; @@ -81,6 +83,8 @@ public class SchedulerApp { private final int submitterNumThreads; private final int maxSyncJobAttempts; private final String airbyteVersionOrWarnings; + private final WorkerEnvironment workerEnvironment; + private final LogConfigs logConfigs; public SchedulerApp(final Path workspaceRoot, final JobPersistence jobPersistence, @@ -90,7 +94,9 @@ public SchedulerApp(final Path workspaceRoot, final TemporalClient temporalClient, final Integer submitterNumThreads, final Integer maxSyncJobAttempts, - final String airbyteVersionOrWarnings) { + final String airbyteVersionOrWarnings, + final WorkerEnvironment workerEnvironment, + final LogConfigs logConfigs) { this.workspaceRoot = workspaceRoot; this.jobPersistence = jobPersistence; this.configRepository = configRepository; @@ -100,6 +106,8 @@ public SchedulerApp(final Path workspaceRoot, this.submitterNumThreads = submitterNumThreads; this.maxSyncJobAttempts = maxSyncJobAttempts; this.airbyteVersionOrWarnings = airbyteVersionOrWarnings; + this.workerEnvironment = workerEnvironment; + this.logConfigs = logConfigs; } public void start() throws IOException { @@ -116,7 +124,7 @@ public void start() throws IOException { jobPersistence, temporalWorkerRunFactory, new JobTracker(configRepository, jobPersistence, trackingClient), - jobNotifier); + jobNotifier, workerEnvironment, logConfigs); final Map mdc = MDC.getCopyOfContextMap(); @@ -187,7 +195,8 @@ public static void main(final String[] args) throws IOException, InterruptedExce final Configs configs = new EnvConfigs(); - LogClientSingleton.setWorkspaceMdc(LogClientSingleton.getSchedulerLogsRoot(configs)); + LogClientSingleton.getInstance().setWorkspaceMdc(configs.getWorkerEnvironment(), configs.getLogConfigs(), + LogClientSingleton.getInstance().getSchedulerLogsRoot(configs.getWorkspaceRoot())); final Path workspaceRoot = configs.getWorkspaceRoot(); LOGGER.info("workspaceRoot = " + workspaceRoot); @@ -250,7 +259,7 @@ public static void main(final String[] args) throws IOException, InterruptedExce temporalClient, Integer.parseInt(configs.getSubmitterNumThreads()), configs.getMaxSyncJobAttempts(), - configs.getAirbyteVersionOrWarning()) + configs.getAirbyteVersionOrWarning(), configs.getWorkerEnvironment(), configs.getLogConfigs()) .start(); } diff --git a/airbyte-scheduler/app/src/test/java/io/airbyte/scheduler/app/JobSubmitterTest.java b/airbyte-scheduler/app/src/test/java/io/airbyte/scheduler/app/JobSubmitterTest.java index 5a4b5220b004f..4da251262da22 100644 --- a/airbyte-scheduler/app/src/test/java/io/airbyte/scheduler/app/JobSubmitterTest.java +++ b/airbyte-scheduler/app/src/test/java/io/airbyte/scheduler/app/JobSubmitterTest.java @@ -24,8 +24,10 @@ import com.google.common.collect.ImmutableMap; import com.google.common.util.concurrent.MoreExecutors; +import io.airbyte.config.Configs.WorkerEnvironment; import io.airbyte.config.JobOutput; import io.airbyte.config.helpers.LogClientSingleton; +import io.airbyte.config.helpers.LogConfiguration; import io.airbyte.scheduler.app.worker_run.TemporalWorkerRunFactory; import io.airbyte.scheduler.app.worker_run.WorkerRun; import io.airbyte.scheduler.models.Job; @@ -91,7 +93,7 @@ public void setup() throws IOException { persistence, workerRunFactory, jobTracker, - jobNotifier)); + jobNotifier, WorkerEnvironment.DOCKER, LogConfiguration.EMPTY)); } @Test diff --git a/airbyte-server/src/main/java/io/airbyte/server/ConfigurationApiFactory.java b/airbyte-server/src/main/java/io/airbyte/server/ConfigurationApiFactory.java index cb1a52c61e2ae..115031c7c4d7a 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/ConfigurationApiFactory.java +++ b/airbyte-server/src/main/java/io/airbyte/server/ConfigurationApiFactory.java @@ -6,7 +6,9 @@ import io.airbyte.analytics.TrackingClient; import io.airbyte.commons.io.FileTtlManager; -import io.airbyte.config.Configs; +import io.airbyte.commons.version.AirbyteVersion; +import io.airbyte.config.Configs.WorkerEnvironment; +import io.airbyte.config.helpers.LogConfigs; import io.airbyte.config.persistence.ConfigPersistence; import io.airbyte.config.persistence.ConfigRepository; import io.airbyte.db.Database; @@ -15,6 +17,7 @@ import io.airbyte.scheduler.persistence.JobPersistence; import io.airbyte.server.apis.ConfigurationApi; import io.temporal.serviceclient.WorkflowServiceStubs; +import java.nio.file.Path; import java.util.Map; import org.glassfish.hk2.api.Factory; import org.slf4j.MDC; @@ -27,12 +30,16 @@ public class ConfigurationApiFactory implements Factory { private static ConfigPersistence seed; private static SchedulerJobClient schedulerJobClient; private static CachingSynchronousSchedulerClient synchronousSchedulerClient; - private static Configs configs; private static FileTtlManager archiveTtlManager; private static Map mdc; private static Database configsDatabase; private static Database jobsDatabase; private static TrackingClient trackingClient; + private static WorkerEnvironment workerEnvironment; + private static LogConfigs logConfigs; + private static Path workspaceRoot; + private static String webappUrl; + private static AirbyteVersion airbyteVersion; public static void setValues( final WorkflowServiceStubs temporalService, @@ -41,24 +48,32 @@ public static void setValues( final ConfigPersistence seed, final SchedulerJobClient schedulerJobClient, final CachingSynchronousSchedulerClient synchronousSchedulerClient, - final Configs configs, final FileTtlManager archiveTtlManager, final Map mdc, final Database configsDatabase, final Database jobsDatabase, - final TrackingClient trackingClient) { + final TrackingClient trackingClient, + final WorkerEnvironment workerEnvironment, + final LogConfigs logConfigs, + final String webappUrl, + final AirbyteVersion airbyteVersion, + final Path workspaceRoot) { ConfigurationApiFactory.configRepository = configRepository; ConfigurationApiFactory.jobPersistence = jobPersistence; ConfigurationApiFactory.seed = seed; ConfigurationApiFactory.schedulerJobClient = schedulerJobClient; ConfigurationApiFactory.synchronousSchedulerClient = synchronousSchedulerClient; - ConfigurationApiFactory.configs = configs; ConfigurationApiFactory.archiveTtlManager = archiveTtlManager; ConfigurationApiFactory.mdc = mdc; ConfigurationApiFactory.temporalService = temporalService; ConfigurationApiFactory.configsDatabase = configsDatabase; ConfigurationApiFactory.jobsDatabase = jobsDatabase; ConfigurationApiFactory.trackingClient = trackingClient; + ConfigurationApiFactory.workerEnvironment = workerEnvironment; + ConfigurationApiFactory.logConfigs = logConfigs; + ConfigurationApiFactory.workspaceRoot = workspaceRoot; + ConfigurationApiFactory.webappUrl = webappUrl; + ConfigurationApiFactory.airbyteVersion = airbyteVersion; } @Override @@ -71,12 +86,16 @@ public ConfigurationApi provide() { ConfigurationApiFactory.seed, ConfigurationApiFactory.schedulerJobClient, ConfigurationApiFactory.synchronousSchedulerClient, - ConfigurationApiFactory.configs, ConfigurationApiFactory.archiveTtlManager, ConfigurationApiFactory.temporalService, ConfigurationApiFactory.configsDatabase, ConfigurationApiFactory.jobsDatabase, - ConfigurationApiFactory.trackingClient); + ConfigurationApiFactory.trackingClient, + ConfigurationApiFactory.workerEnvironment, + ConfigurationApiFactory.logConfigs, + ConfigurationApiFactory.webappUrl, + ConfigurationApiFactory.airbyteVersion, + ConfigurationApiFactory.workspaceRoot); } @Override diff --git a/airbyte-server/src/main/java/io/airbyte/server/ServerApp.java b/airbyte-server/src/main/java/io/airbyte/server/ServerApp.java index 3c8b1196d90a5..271c7d6bf8856 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/ServerApp.java +++ b/airbyte-server/src/main/java/io/airbyte/server/ServerApp.java @@ -157,7 +157,8 @@ private static void createWorkspaceIfNoneExists(final ConfigRepository configRep public static ServerRunnable getServer(final ServerFactory apiFactory, final ConfigPersistence seed) throws Exception { final Configs configs = new EnvConfigs(); - LogClientSingleton.setWorkspaceMdc(LogClientSingleton.getServerLogsRoot(configs)); + LogClientSingleton.getInstance().setWorkspaceMdc(configs.getWorkerEnvironment(), configs.getLogConfigs(), + LogClientSingleton.getInstance().getServerLogsRoot(configs.getWorkspaceRoot())); LOGGER.info("Creating Staged Resource folder..."); ConfigDumpImporter.initStagedResourceFolder(); @@ -243,8 +244,12 @@ public static ServerRunnable getServer(final ServerFactory apiFactory, final Con seed, configDatabase, jobDatabase, - configs, - trackingClient); + trackingClient, + configs.getWorkerEnvironment(), + configs.getLogConfigs(), + configs.getWebappUrl(), + configs.getAirbyteVersion(), + configs.getWorkspaceRoot()); } else { LOGGER.info("Start serving version mismatch errors. Automatic migration either failed or didn't run"); return new VersionMismatchServer(airbyteVersion, airbyteDatabaseVersion.orElseThrow(), PORT); diff --git a/airbyte-server/src/main/java/io/airbyte/server/ServerFactory.java b/airbyte-server/src/main/java/io/airbyte/server/ServerFactory.java index d58f0fce32d81..28e1f302afcc7 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/ServerFactory.java +++ b/airbyte-server/src/main/java/io/airbyte/server/ServerFactory.java @@ -6,7 +6,9 @@ import io.airbyte.analytics.TrackingClient; import io.airbyte.commons.io.FileTtlManager; -import io.airbyte.config.Configs; +import io.airbyte.commons.version.AirbyteVersion; +import io.airbyte.config.Configs.WorkerEnvironment; +import io.airbyte.config.helpers.LogConfigs; import io.airbyte.config.persistence.ConfigPersistence; import io.airbyte.config.persistence.ConfigRepository; import io.airbyte.db.Database; @@ -15,6 +17,7 @@ import io.airbyte.scheduler.persistence.JobPersistence; import io.airbyte.server.apis.ConfigurationApi; import io.temporal.serviceclient.WorkflowServiceStubs; +import java.nio.file.Path; import java.util.Set; import java.util.concurrent.TimeUnit; import org.slf4j.MDC; @@ -29,8 +32,12 @@ ServerRunnable create(SchedulerJobClient schedulerJobClient, ConfigPersistence seed, Database configsDatabase, Database jobsDatabase, - Configs configs, - TrackingClient trackingClient); + TrackingClient trackingClient, + WorkerEnvironment workerEnvironment, + LogConfigs logConfigs, + String webappUrl, + AirbyteVersion airbyteVersion, + Path workspaceRoot); class Api implements ServerFactory { @@ -43,8 +50,12 @@ public ServerRunnable create(final SchedulerJobClient schedulerJobClient, final ConfigPersistence seed, final Database configsDatabase, final Database jobsDatabase, - final Configs configs, - final TrackingClient trackingClient) { + final TrackingClient trackingClient, + final WorkerEnvironment workerEnvironment, + final LogConfigs logConfigs, + final String webappUrl, + final AirbyteVersion airbyteVersion, + final Path workspaceRoot) { // set static values for factory ConfigurationApiFactory.setValues( temporalService, @@ -53,19 +64,23 @@ public ServerRunnable create(final SchedulerJobClient schedulerJobClient, seed, schedulerJobClient, cachingSchedulerClient, - configs, new FileTtlManager(10, TimeUnit.MINUTES, 10), MDC.getCopyOfContextMap(), configsDatabase, jobsDatabase, - trackingClient); + trackingClient, + workerEnvironment, + logConfigs, + webappUrl, + airbyteVersion, + workspaceRoot); // server configurations final Set> componentClasses = Set.of(ConfigurationApi.class); final Set components = Set.of(new CorsFilter(), new ConfigurationApiBinder()); // construct server - return new ServerApp(configs.getAirbyteVersion(), componentClasses, components); + return new ServerApp(airbyteVersion, componentClasses, components); } } diff --git a/airbyte-server/src/main/java/io/airbyte/server/apis/ConfigurationApi.java b/airbyte-server/src/main/java/io/airbyte/server/apis/ConfigurationApi.java index 259bbcf2de559..62ccdebe87e66 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/apis/ConfigurationApi.java +++ b/airbyte-server/src/main/java/io/airbyte/server/apis/ConfigurationApi.java @@ -81,7 +81,9 @@ import io.airbyte.api.model.WorkspaceReadList; import io.airbyte.api.model.WorkspaceUpdate; import io.airbyte.commons.io.FileTtlManager; -import io.airbyte.config.Configs; +import io.airbyte.commons.version.AirbyteVersion; +import io.airbyte.config.Configs.WorkerEnvironment; +import io.airbyte.config.helpers.LogConfigs; import io.airbyte.config.persistence.ConfigNotFoundException; import io.airbyte.config.persistence.ConfigPersistence; import io.airbyte.config.persistence.ConfigRepository; @@ -118,6 +120,7 @@ import io.temporal.serviceclient.WorkflowServiceStubs; import java.io.File; import java.io.IOException; +import java.nio.file.Path; import java.util.Map; @javax.ws.rs.Path("/v1") @@ -141,23 +144,33 @@ public class ConfigurationApi implements io.airbyte.api.V1Api { private final OpenApiConfigHandler openApiConfigHandler; private final DbMigrationHandler dbMigrationHandler; private final OAuthHandler oAuthHandler; - private final Configs configs; + private final WorkerEnvironment workerEnvironment; + private final LogConfigs logConfigs; + private final Path workspaceRoot; public ConfigurationApi(final ConfigRepository configRepository, final JobPersistence jobPersistence, final ConfigPersistence seed, final SchedulerJobClient schedulerJobClient, final CachingSynchronousSchedulerClient synchronousSchedulerClient, - final Configs configs, final FileTtlManager archiveTtlManager, final WorkflowServiceStubs temporalService, final Database configsDatabase, final Database jobsDatabase, - final TrackingClient trackingClient) { + final TrackingClient trackingClient, + final WorkerEnvironment workerEnvironment, + final LogConfigs logConfigs, + final String webappUrl, + final AirbyteVersion airbyteVersion, + final Path workspaceRoot) { + this.workerEnvironment = workerEnvironment; + this.logConfigs = logConfigs; + this.workspaceRoot = workspaceRoot; + final SpecFetcher specFetcher = new SpecFetcher(synchronousSchedulerClient); final JsonSchemaValidator schemaValidator = new JsonSchemaValidator(); final JobNotifier jobNotifier = new JobNotifier( - configs.getWebappUrl(), + webappUrl, configRepository, new WorkspaceHelper(configRepository, jobPersistence), trackingClient); @@ -168,7 +181,7 @@ public ConfigurationApi(final ConfigRepository configRepository, jobPersistence, jobNotifier, temporalService, - new OAuthConfigSupplier(configRepository, false, trackingClient)); + new OAuthConfigSupplier(configRepository, false, trackingClient), workerEnvironment, logConfigs); final WorkspaceHelper workspaceHelper = new WorkspaceHelper(configRepository, jobPersistence); sourceDefinitionsHandler = new SourceDefinitionsHandler(configRepository, synchronousSchedulerClient); connectionsHandler = new ConnectionsHandler(configRepository, workspaceHelper, trackingClient); @@ -177,7 +190,7 @@ public ConfigurationApi(final ConfigRepository configRepository, destinationHandler = new DestinationHandler(configRepository, schemaValidator, specFetcher, connectionsHandler); sourceHandler = new SourceHandler(configRepository, schemaValidator, specFetcher, connectionsHandler); workspacesHandler = new WorkspacesHandler(configRepository, connectionsHandler, destinationHandler, sourceHandler); - jobHistoryHandler = new JobHistoryHandler(jobPersistence); + jobHistoryHandler = new JobHistoryHandler(jobPersistence, workerEnvironment, logConfigs); oAuthHandler = new OAuthHandler(configRepository, trackingClient); webBackendConnectionsHandler = new WebBackendConnectionsHandler( connectionsHandler, @@ -190,7 +203,7 @@ public ConfigurationApi(final ConfigRepository configRepository, webBackendDestinationsHandler = new WebBackendDestinationsHandler(destinationHandler, configRepository, trackingClient); healthCheckHandler = new HealthCheckHandler(configRepository); archiveHandler = new ArchiveHandler( - configs.getAirbyteVersion(), + airbyteVersion, configRepository, jobPersistence, seed, @@ -201,7 +214,6 @@ public ConfigurationApi(final ConfigRepository configRepository, logsHandler = new LogsHandler(); openApiConfigHandler = new OpenApiConfigHandler(); dbMigrationHandler = new DbMigrationHandler(configsDatabase, jobsDatabase); - this.configs = configs; } // WORKSPACE @@ -578,7 +590,7 @@ public JobInfoRead getJobInfo(final JobIdRequestBody jobIdRequestBody) { @Override public File getLogs(final LogsRequestBody logsRequestBody) { - return execute(() -> logsHandler.getLogs(configs, logsRequestBody)); + return execute(() -> logsHandler.getLogs(workspaceRoot, workerEnvironment, logConfigs, logsRequestBody)); } @Override diff --git a/airbyte-server/src/main/java/io/airbyte/server/converters/JobConverter.java b/airbyte-server/src/main/java/io/airbyte/server/converters/JobConverter.java index 3202edf11b580..9841a41d16eb8 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/converters/JobConverter.java +++ b/airbyte-server/src/main/java/io/airbyte/server/converters/JobConverter.java @@ -15,11 +15,12 @@ import io.airbyte.api.model.LogRead; import io.airbyte.api.model.SynchronousJobRead; import io.airbyte.commons.enums.Enums; -import io.airbyte.config.EnvConfigs; +import io.airbyte.config.Configs.WorkerEnvironment; import io.airbyte.config.JobOutput; import io.airbyte.config.StandardSyncOutput; import io.airbyte.config.StandardSyncSummary; import io.airbyte.config.helpers.LogClientSingleton; +import io.airbyte.config.helpers.LogConfigs; import io.airbyte.scheduler.client.SynchronousJobMetadata; import io.airbyte.scheduler.client.SynchronousResponse; import io.airbyte.scheduler.models.Attempt; @@ -32,10 +33,18 @@ public class JobConverter { private static final int LOG_TAIL_SIZE = 1000000; - public static JobInfoRead getJobInfoRead(final Job job) { + private final WorkerEnvironment workerEnvironment; + private final LogConfigs logConfigs; + + public JobConverter(final WorkerEnvironment workerEnvironment, final LogConfigs logConfigs) { + this.workerEnvironment = workerEnvironment; + this.logConfigs = logConfigs; + } + + public JobInfoRead getJobInfoRead(final Job job) { return new JobInfoRead() .job(getJobWithAttemptsRead(job).getJob()) - .attempts(job.getAttempts().stream().map(JobConverter::getAttemptInfoRead).collect(Collectors.toList())); + .attempts(job.getAttempts().stream().map(attempt -> getAttemptInfoRead(attempt)).collect(Collectors.toList())); } public static JobWithAttemptsRead getJobWithAttemptsRead(final Job job) { @@ -50,10 +59,10 @@ public static JobWithAttemptsRead getJobWithAttemptsRead(final Job job) { .createdAt(job.getCreatedAtInSecond()) .updatedAt(job.getUpdatedAtInSecond()) .status(Enums.convertTo(job.getStatus(), JobStatus.class))) - .attempts(job.getAttempts().stream().map(JobConverter::getAttemptRead).collect(Collectors.toList())); + .attempts(job.getAttempts().stream().map(attempt -> getAttemptRead(attempt)).collect(Collectors.toList())); } - public static AttemptInfoRead getAttemptInfoRead(final Attempt attempt) { + public AttemptInfoRead getAttemptInfoRead(final Attempt attempt) { return new AttemptInfoRead() .attempt(getAttemptRead(attempt)) .logs(getLogRead(attempt.getLogPath())); @@ -78,20 +87,19 @@ public static AttemptRead getAttemptRead(final Attempt attempt) { .endedAt(attempt.getEndedAtInSecond().orElse(null)); } - public static LogRead getLogRead(final Path logPath) { + public LogRead getLogRead(final Path logPath) { try { - final var logs = LogClientSingleton.getJobLogFile(new EnvConfigs(), logPath); - return new LogRead().logLines(logs); + return new LogRead().logLines(LogClientSingleton.getInstance().getJobLogFile(workerEnvironment, logConfigs, logPath)); } catch (final IOException e) { throw new RuntimeException(e); } } - public static SynchronousJobRead getSynchronousJobRead(final SynchronousResponse response) { + public SynchronousJobRead getSynchronousJobRead(final SynchronousResponse response) { return getSynchronousJobRead(response.getMetadata()); } - public static SynchronousJobRead getSynchronousJobRead(final SynchronousJobMetadata metadata) { + public SynchronousJobRead getSynchronousJobRead(final SynchronousJobMetadata metadata) { final JobConfigType configType = Enums.convertTo(metadata.getConfigType(), JobConfigType.class); return new SynchronousJobRead() @@ -101,7 +109,7 @@ public static SynchronousJobRead getSynchronousJobRead(final SynchronousJobMetad .createdAt(metadata.getCreatedAt()) .endedAt(metadata.getEndedAt()) .succeeded(metadata.isSucceeded()) - .logs(JobConverter.getLogRead(metadata.getLogPath())); + .logs(getLogRead(metadata.getLogPath())); } } diff --git a/airbyte-server/src/main/java/io/airbyte/server/handlers/JobHistoryHandler.java b/airbyte-server/src/main/java/io/airbyte/server/handlers/JobHistoryHandler.java index e1b6128f52240..0282c79490391 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/handlers/JobHistoryHandler.java +++ b/airbyte-server/src/main/java/io/airbyte/server/handlers/JobHistoryHandler.java @@ -11,8 +11,10 @@ import io.airbyte.api.model.JobReadList; import io.airbyte.api.model.JobWithAttemptsRead; import io.airbyte.commons.enums.Enums; +import io.airbyte.config.Configs.WorkerEnvironment; import io.airbyte.config.JobConfig; import io.airbyte.config.JobConfig.ConfigType; +import io.airbyte.config.helpers.LogConfigs; import io.airbyte.scheduler.models.Job; import io.airbyte.scheduler.persistence.JobPersistence; import io.airbyte.server.converters.JobConverter; @@ -25,8 +27,10 @@ public class JobHistoryHandler { public static final int DEFAULT_PAGE_SIZE = 200; private final JobPersistence jobPersistence; + private final JobConverter jobConverter; - public JobHistoryHandler(final JobPersistence jobPersistence) { + public JobHistoryHandler(final JobPersistence jobPersistence, final WorkerEnvironment workerEnvironment, final LogConfigs logConfigs) { + jobConverter = new JobConverter(workerEnvironment, logConfigs); this.jobPersistence = jobPersistence; } @@ -47,15 +51,14 @@ public JobReadList listJobsFor(final JobListRequestBody request) throws IOExcept : DEFAULT_PAGE_SIZE, (request.getPagination() != null && request.getPagination().getRowOffset() != null) ? request.getPagination().getRowOffset() : 0) .stream() - .map(JobConverter::getJobWithAttemptsRead) + .map(attempt -> jobConverter.getJobWithAttemptsRead(attempt)) .collect(Collectors.toList()); return new JobReadList().jobs(jobReads); } public JobInfoRead getJobInfo(final JobIdRequestBody jobIdRequestBody) throws IOException { final Job job = jobPersistence.getJob(jobIdRequestBody.getId()); - - return JobConverter.getJobInfoRead(job); + return jobConverter.getJobInfoRead(job); } } diff --git a/airbyte-server/src/main/java/io/airbyte/server/handlers/LogsHandler.java b/airbyte-server/src/main/java/io/airbyte/server/handlers/LogsHandler.java index 57fe0650751ae..5d7a86a2cdd71 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/handlers/LogsHandler.java +++ b/airbyte-server/src/main/java/io/airbyte/server/handlers/LogsHandler.java @@ -5,9 +5,11 @@ package io.airbyte.server.handlers; import io.airbyte.api.model.LogsRequestBody; -import io.airbyte.config.Configs; +import io.airbyte.config.Configs.WorkerEnvironment; import io.airbyte.config.helpers.LogClientSingleton; +import io.airbyte.config.helpers.LogConfigs; import java.io.File; +import java.nio.file.Path; /** * This handler is only responsible for server and scheduler logs. Jobs logs paths are determined by @@ -15,13 +17,16 @@ */ public class LogsHandler { - public File getLogs(final Configs configs, final LogsRequestBody logsRequestBody) { + public File getLogs(final Path workspaceRoot, + final WorkerEnvironment workerEnvironment, + final LogConfigs logConfigs, + final LogsRequestBody logsRequestBody) { switch (logsRequestBody.getLogType()) { case SERVER -> { - return LogClientSingleton.getServerLogFile(configs); + return LogClientSingleton.getInstance().getServerLogFile(workspaceRoot, workerEnvironment, logConfigs); } case SCHEDULER -> { - return LogClientSingleton.getSchedulerLogFile(configs); + return LogClientSingleton.getInstance().getSchedulerLogFile(workspaceRoot, workerEnvironment, logConfigs); } default -> throw new IllegalStateException("Unexpected value: " + logsRequestBody.getLogType()); } diff --git a/airbyte-server/src/main/java/io/airbyte/server/handlers/SchedulerHandler.java b/airbyte-server/src/main/java/io/airbyte/server/handlers/SchedulerHandler.java index 2a0a0a8cb468e..378a793282f22 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/handlers/SchedulerHandler.java +++ b/airbyte-server/src/main/java/io/airbyte/server/handlers/SchedulerHandler.java @@ -28,6 +28,7 @@ import io.airbyte.api.model.SourceUpdate; import io.airbyte.commons.docker.DockerUtils; import io.airbyte.commons.enums.Enums; +import io.airbyte.config.Configs.WorkerEnvironment; import io.airbyte.config.DestinationConnection; import io.airbyte.config.SourceConnection; import io.airbyte.config.StandardCheckConnectionOutput; @@ -36,6 +37,7 @@ import io.airbyte.config.StandardSync; import io.airbyte.config.StandardSyncOperation; import io.airbyte.config.State; +import io.airbyte.config.helpers.LogConfigs; import io.airbyte.config.persistence.ConfigNotFoundException; import io.airbyte.config.persistence.ConfigRepository; import io.airbyte.protocol.models.AirbyteCatalog; @@ -79,6 +81,9 @@ public class SchedulerHandler { private final JobNotifier jobNotifier; private final WorkflowServiceStubs temporalService; private final OAuthConfigSupplier oAuthConfigSupplier; + private final JobConverter jobConverter; + private final WorkerEnvironment workerEnvironment; + private final LogConfigs logConfigs; public SchedulerHandler(final ConfigRepository configRepository, final SchedulerJobClient schedulerJobClient, @@ -86,7 +91,9 @@ public SchedulerHandler(final ConfigRepository configRepository, final JobPersistence jobPersistence, final JobNotifier jobNotifier, final WorkflowServiceStubs temporalService, - final OAuthConfigSupplier oAuthConfigSupplier) { + final OAuthConfigSupplier oAuthConfigSupplier, + final WorkerEnvironment workerEnvironment, + final LogConfigs logConfigs) { this( configRepository, schedulerJobClient, @@ -97,7 +104,9 @@ public SchedulerHandler(final ConfigRepository configRepository, jobPersistence, jobNotifier, temporalService, - oAuthConfigSupplier); + oAuthConfigSupplier, + workerEnvironment, + logConfigs); } @VisibleForTesting @@ -110,7 +119,9 @@ public SchedulerHandler(final ConfigRepository configRepository, final JobPersistence jobPersistence, final JobNotifier jobNotifier, final WorkflowServiceStubs temporalService, - final OAuthConfigSupplier oAuthConfigSupplier) { + final OAuthConfigSupplier oAuthConfigSupplier, + final WorkerEnvironment workerEnvironment, + final LogConfigs logConfigs) { this.configRepository = configRepository; this.schedulerJobClient = schedulerJobClient; this.synchronousSchedulerClient = synchronousSchedulerClient; @@ -121,6 +132,9 @@ public SchedulerHandler(final ConfigRepository configRepository, this.jobNotifier = jobNotifier; this.temporalService = temporalService; this.oAuthConfigSupplier = oAuthConfigSupplier; + this.workerEnvironment = workerEnvironment; + this.logConfigs = logConfigs; + this.jobConverter = new JobConverter(workerEnvironment, logConfigs); } public CheckConnectionRead checkSourceConnectionFromSourceId(final SourceIdRequestBody sourceIdRequestBody) @@ -226,9 +240,9 @@ public SourceDiscoverSchemaRead discoverSchemaForSourceFromSourceCreate(final So return discoverJobToOutput(response); } - private static SourceDiscoverSchemaRead discoverJobToOutput(final SynchronousResponse response) { + private SourceDiscoverSchemaRead discoverJobToOutput(final SynchronousResponse response) { final SourceDiscoverSchemaRead sourceDiscoverSchemaRead = new SourceDiscoverSchemaRead() - .jobInfo(JobConverter.getSynchronousJobRead(response)); + .jobInfo(jobConverter.getSynchronousJobRead(response)); if (response.isSuccess()) { sourceDiscoverSchemaRead.catalog(CatalogConverter.toApi(response.getOutput())); @@ -244,7 +258,7 @@ public SourceDefinitionSpecificationRead getSourceDefinitionSpecification(final final SynchronousResponse response = specFetcher.getSpecJobResponse(source); final ConnectorSpecification spec = response.getOutput(); final SourceDefinitionSpecificationRead specRead = new SourceDefinitionSpecificationRead() - .jobInfo(JobConverter.getSynchronousJobRead(response)) + .jobInfo(jobConverter.getSynchronousJobRead(response)) .connectionSpecification(spec.getConnectionSpecification()) .documentationUrl(spec.getDocumentationUrl().toString()) .sourceDefinitionId(sourceDefinitionId); @@ -257,7 +271,8 @@ public SourceDefinitionSpecificationRead getSourceDefinitionSpecification(final return specRead; } - public DestinationDefinitionSpecificationRead getDestinationSpecification(final DestinationDefinitionIdRequestBody destinationDefinitionIdRequestBody) + public DestinationDefinitionSpecificationRead getDestinationSpecification( + final DestinationDefinitionIdRequestBody destinationDefinitionIdRequestBody) throws ConfigNotFoundException, IOException, JsonValidationException { final UUID destinationDefinitionId = destinationDefinitionIdRequestBody.getDestinationDefinitionId(); final StandardDestinationDefinition destination = configRepository.getStandardDestinationDefinition(destinationDefinitionId); @@ -265,7 +280,7 @@ public DestinationDefinitionSpecificationRead getDestinationSpecification(final final ConnectorSpecification spec = response.getOutput(); final DestinationDefinitionSpecificationRead specRead = new DestinationDefinitionSpecificationRead() - .jobInfo(JobConverter.getSynchronousJobRead(response)) + .jobInfo(jobConverter.getSynchronousJobRead(response)) .supportedDestinationSyncModes(Enums.convertListTo(spec.getSupportedDestinationSyncModes(), DestinationSyncMode.class)) .connectionSpecification(spec.getConnectionSpecification()) .documentationUrl(spec.getDocumentationUrl().toString()) @@ -320,7 +335,7 @@ public JobInfoRead syncConnection(final ConnectionIdRequestBody connectionIdRequ destinationImageName, standardSyncOperations); - return JobConverter.getJobInfoRead(job); + return jobConverter.getJobInfoRead(job); } public JobInfoRead resetConnection(final ConnectionIdRequestBody connectionIdRequestBody) @@ -341,7 +356,7 @@ public JobInfoRead resetConnection(final ConnectionIdRequestBody connectionIdReq final Job job = schedulerJobClient.createOrGetActiveResetConnectionJob(destination, standardSync, destinationImageName, standardSyncOperations); - return JobConverter.getJobInfoRead(job); + return jobConverter.getJobInfoRead(job); } public ConnectionState getState(final ConnectionIdRequestBody connectionIdRequestBody) throws IOException { @@ -366,7 +381,7 @@ public JobInfoRead cancelJob(final JobIdRequestBody jobIdRequestBody) throws IOE final Job job = jobPersistence.getJob(jobId); jobNotifier.failJob("job was cancelled", job); - return JobConverter.getJobInfoRead(job); + return jobConverter.getJobInfoRead(job); } private void cancelTemporalWorkflowIfPresent(final long jobId) throws IOException { @@ -390,7 +405,7 @@ private void cancelTemporalWorkflowIfPresent(final long jobId) throws IOExceptio private CheckConnectionRead reportConnectionStatus(final SynchronousResponse response) { final CheckConnectionRead checkConnectionRead = new CheckConnectionRead() - .jobInfo(JobConverter.getSynchronousJobRead(response)); + .jobInfo(jobConverter.getSynchronousJobRead(response)); if (response.isSuccess()) { checkConnectionRead diff --git a/airbyte-server/src/test/java/io/airbyte/server/RequestLoggerTest.java b/airbyte-server/src/test/java/io/airbyte/server/RequestLoggerTest.java index 7ac21ccd94f0a..acd6109ac75b4 100644 --- a/airbyte-server/src/test/java/io/airbyte/server/RequestLoggerTest.java +++ b/airbyte-server/src/test/java/io/airbyte/server/RequestLoggerTest.java @@ -5,7 +5,9 @@ package io.airbyte.server; import io.airbyte.commons.io.IOs; +import io.airbyte.config.Configs.WorkerEnvironment; import io.airbyte.config.helpers.LogClientSingleton; +import io.airbyte.config.helpers.LogConfiguration; import java.io.ByteArrayInputStream; import java.io.IOException; import java.nio.file.Files; @@ -92,7 +94,9 @@ public void test(final String inputByteBuffer, final String contentType, final i // set up the mdc so that actually log to a file, so that we can verify that file logging captures // threads. final Path jobRoot = Files.createTempDirectory(Path.of("/tmp"), "mdc_test"); - LogClientSingleton.setJobMdc(jobRoot); + LogClientSingleton.getInstance().setJobMdc(WorkerEnvironment.DOCKER, + LogConfiguration.EMPTY, + jobRoot); // We have to instanciate the logger here, because the MDC config has been changed to log in a // temporary file. diff --git a/airbyte-server/src/test/java/io/airbyte/server/apis/ConfigurationApiTest.java b/airbyte-server/src/test/java/io/airbyte/server/apis/ConfigurationApiTest.java index cdb7b8c07d030..48e0dc8094570 100644 --- a/airbyte-server/src/test/java/io/airbyte/server/apis/ConfigurationApiTest.java +++ b/airbyte-server/src/test/java/io/airbyte/server/apis/ConfigurationApiTest.java @@ -12,6 +12,8 @@ import io.airbyte.commons.io.FileTtlManager; import io.airbyte.commons.version.AirbyteVersion; import io.airbyte.config.Configs; +import io.airbyte.config.Configs.WorkerEnvironment; +import io.airbyte.config.helpers.LogConfiguration; import io.airbyte.config.persistence.ConfigPersistence; import io.airbyte.config.persistence.ConfigRepository; import io.airbyte.db.Database; @@ -19,6 +21,7 @@ import io.airbyte.scheduler.client.SchedulerJobClient; import io.airbyte.scheduler.persistence.JobPersistence; import io.temporal.serviceclient.WorkflowServiceStubs; +import java.nio.file.Path; import org.junit.jupiter.api.Test; public class ConfigurationApiTest { @@ -35,12 +38,16 @@ void testImportDefinitions() { mock(ConfigPersistence.class), mock(SchedulerJobClient.class), mock(CachingSynchronousSchedulerClient.class), - configs, mock(FileTtlManager.class), mock(WorkflowServiceStubs.class), mock(Database.class), mock(Database.class), - mock(TrackingClient.class)); + mock(TrackingClient.class), + WorkerEnvironment.DOCKER, + LogConfiguration.EMPTY, + "http://localhost", + new AirbyteVersion("0.1.0-alpha"), + Path.of("")); assertTrue(configurationApi.canImportDefinitons()); } diff --git a/airbyte-server/src/test/java/io/airbyte/server/converters/JobConverterTest.java b/airbyte-server/src/test/java/io/airbyte/server/converters/JobConverterTest.java index 619c43cec3a82..6dcd9e5b3865d 100644 --- a/airbyte-server/src/test/java/io/airbyte/server/converters/JobConverterTest.java +++ b/airbyte-server/src/test/java/io/airbyte/server/converters/JobConverterTest.java @@ -18,8 +18,10 @@ import io.airbyte.api.model.JobWithAttemptsRead; import io.airbyte.api.model.LogRead; import io.airbyte.commons.enums.Enums; +import io.airbyte.config.Configs.WorkerEnvironment; import io.airbyte.config.JobCheckConnectionConfig; import io.airbyte.config.JobConfig; +import io.airbyte.config.helpers.LogConfiguration; import io.airbyte.scheduler.models.Attempt; import io.airbyte.scheduler.models.AttemptStatus; import io.airbyte.scheduler.models.Job; @@ -45,6 +47,7 @@ class JobConverterTest { private static final Path LOG_PATH = Path.of("log_path"); private static final long CREATED_AT = System.currentTimeMillis() / 1000; + private JobConverter jobConverter; private Job job; private static final JobInfoRead JOB_INFO = @@ -71,6 +74,7 @@ class JobConverterTest { @BeforeEach public void setUp() { + jobConverter = new JobConverter(WorkerEnvironment.DOCKER, LogConfiguration.EMPTY); job = mock(Job.class); final Attempt attempt = mock(Attempt.class); when(job.getId()).thenReturn(JOB_ID); @@ -91,17 +95,17 @@ public void setUp() { @Test public void testGetJobInfoRead() { - assertEquals(JOB_INFO, JobConverter.getJobInfoRead(job)); + assertEquals(JOB_INFO, jobConverter.getJobInfoRead(job)); } @Test public void testGetJobWithAttemptsRead() { - assertEquals(JOB_WITH_ATTEMPTS_READ, JobConverter.getJobWithAttemptsRead(job)); + assertEquals(JOB_WITH_ATTEMPTS_READ, jobConverter.getJobWithAttemptsRead(job)); } @Test public void testGetJobRead() { - final JobWithAttemptsRead jobReadActual = JobConverter.getJobWithAttemptsRead(job); + final JobWithAttemptsRead jobReadActual = jobConverter.getJobWithAttemptsRead(job); assertEquals(JOB_WITH_ATTEMPTS_READ, jobReadActual); } diff --git a/airbyte-server/src/test/java/io/airbyte/server/handlers/JobHistoryHandlerTest.java b/airbyte-server/src/test/java/io/airbyte/server/handlers/JobHistoryHandlerTest.java index 44d8a102fc0e8..bb3ed756cbf1e 100644 --- a/airbyte-server/src/test/java/io/airbyte/server/handlers/JobHistoryHandlerTest.java +++ b/airbyte-server/src/test/java/io/airbyte/server/handlers/JobHistoryHandlerTest.java @@ -22,9 +22,11 @@ import io.airbyte.api.model.LogRead; import io.airbyte.api.model.Pagination; import io.airbyte.commons.enums.Enums; +import io.airbyte.config.Configs.WorkerEnvironment; import io.airbyte.config.JobCheckConnectionConfig; import io.airbyte.config.JobConfig; import io.airbyte.config.JobConfig.ConfigType; +import io.airbyte.config.helpers.LogConfiguration; import io.airbyte.scheduler.models.Attempt; import io.airbyte.scheduler.models.AttemptStatus; import io.airbyte.scheduler.models.Job; @@ -101,7 +103,7 @@ public void setUp() { CREATED_AT); jobPersistence = mock(JobPersistence.class); - jobHistoryHandler = new JobHistoryHandler(jobPersistence); + jobHistoryHandler = new JobHistoryHandler(jobPersistence, WorkerEnvironment.DOCKER, LogConfiguration.EMPTY); } @Nested diff --git a/airbyte-server/src/test/java/io/airbyte/server/handlers/LogsHandlerTest.java b/airbyte-server/src/test/java/io/airbyte/server/handlers/LogsHandlerTest.java index e197c454e0c54..425e454273ad8 100644 --- a/airbyte-server/src/test/java/io/airbyte/server/handlers/LogsHandlerTest.java +++ b/airbyte-server/src/test/java/io/airbyte/server/handlers/LogsHandlerTest.java @@ -13,6 +13,7 @@ import io.airbyte.config.Configs; import io.airbyte.config.Configs.WorkerEnvironment; import io.airbyte.config.helpers.LogClientSingleton; +import io.airbyte.config.helpers.LogConfiguration; import java.io.File; import java.nio.file.Path; import org.junit.jupiter.api.Test; @@ -24,9 +25,11 @@ public void testServerLogs() { final Configs configs = mock(Configs.class); when(configs.getWorkspaceRoot()).thenReturn(Path.of("/workspace")); when(configs.getWorkerEnvironment()).thenReturn(WorkerEnvironment.DOCKER); + when(configs.getLogConfigs()).thenReturn(LogConfiguration.EMPTY); final File expected = Path.of(String.format("/workspace/server/logs/%s", LogClientSingleton.LOG_FILENAME)).toFile(); - final File actual = new LogsHandler().getLogs(configs, new LogsRequestBody().logType(LogType.SERVER)); + final File actual = new LogsHandler().getLogs(configs.getWorkspaceRoot(), configs.getWorkerEnvironment(), + configs.getLogConfigs(), new LogsRequestBody().logType(LogType.SERVER)); assertEquals(expected, actual); } @@ -36,9 +39,11 @@ public void testSchedulerLogs() { final Configs configs = mock(Configs.class); when(configs.getWorkspaceRoot()).thenReturn(Path.of("/workspace")); when(configs.getWorkerEnvironment()).thenReturn(WorkerEnvironment.DOCKER); + when(configs.getLogConfigs()).thenReturn(LogConfiguration.EMPTY); final File expected = Path.of(String.format("/workspace/scheduler/logs/%s", LogClientSingleton.LOG_FILENAME)).toFile(); - final File actual = new LogsHandler().getLogs(configs, new LogsRequestBody().logType(LogType.SCHEDULER)); + final File actual = new LogsHandler().getLogs(configs.getWorkspaceRoot(), configs.getWorkerEnvironment(), + configs.getLogConfigs(), new LogsRequestBody().logType(LogType.SCHEDULER)); assertEquals(expected, actual); } diff --git a/airbyte-server/src/test/java/io/airbyte/server/handlers/SchedulerHandlerTest.java b/airbyte-server/src/test/java/io/airbyte/server/handlers/SchedulerHandlerTest.java index ca82d1c17d380..0eabedfff9b3a 100644 --- a/airbyte-server/src/test/java/io/airbyte/server/handlers/SchedulerHandlerTest.java +++ b/airbyte-server/src/test/java/io/airbyte/server/handlers/SchedulerHandlerTest.java @@ -35,6 +35,7 @@ import io.airbyte.commons.enums.Enums; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.lang.Exceptions; +import io.airbyte.config.Configs.WorkerEnvironment; import io.airbyte.config.DestinationConnection; import io.airbyte.config.JobConfig; import io.airbyte.config.JobConfig.ConfigType; @@ -48,6 +49,7 @@ import io.airbyte.config.StandardSyncOperation; import io.airbyte.config.StandardSyncOperation.OperatorType; import io.airbyte.config.State; +import io.airbyte.config.helpers.LogConfiguration; import io.airbyte.config.persistence.ConfigNotFoundException; import io.airbyte.config.persistence.ConfigRepository; import io.airbyte.protocol.models.AirbyteCatalog; @@ -153,7 +155,9 @@ void setup() { jobPersistence, jobNotifier, mock(WorkflowServiceStubs.class), - mock(OAuthConfigSupplier.class)); + mock(OAuthConfigSupplier.class), + WorkerEnvironment.DOCKER, + LogConfiguration.EMPTY); } @Test diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/WorkerApp.java b/airbyte-workers/src/main/java/io/airbyte/workers/WorkerApp.java index 8cfbd7ecb4410..1e9ee445d51f2 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/WorkerApp.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/WorkerApp.java @@ -9,6 +9,7 @@ import io.airbyte.config.EnvConfigs; import io.airbyte.config.MaxWorkersConfig; import io.airbyte.config.helpers.LogClientSingleton; +import io.airbyte.config.helpers.LogConfigs; import io.airbyte.config.persistence.ConfigPersistence; import io.airbyte.config.persistence.ConfigRepository; import io.airbyte.config.persistence.DatabaseConfigPersistence; @@ -57,24 +58,40 @@ public class WorkerApp { private final ProcessFactory processFactory; private final SecretsHydrator secretsHydrator; private final WorkflowServiceStubs temporalService; + private final ConfigRepository configRepository; private final MaxWorkersConfig maxWorkers; private final WorkerEnvironment workerEnvironment; - private final ConfigRepository configRepository; + private final LogConfigs logConfigs; + private final String databaseUser; + private final String databasePassword; + private final String databaseUrl; + private final String airbyteVersion; public WorkerApp(final Path workspaceRoot, final ProcessFactory processFactory, final SecretsHydrator secretsHydrator, final WorkflowServiceStubs temporalService, final MaxWorkersConfig maxWorkers, + final ConfigRepository configRepository, final WorkerEnvironment workerEnvironment, - final ConfigRepository configRepository) { + final LogConfigs logConfigs, + final String databaseUser, + final String databasePassword, + final String databaseUrl, + final String airbyteVersion) { + this.workspaceRoot = workspaceRoot; this.processFactory = processFactory; this.secretsHydrator = secretsHydrator; this.temporalService = temporalService; this.maxWorkers = maxWorkers; - this.workerEnvironment = workerEnvironment; this.configRepository = configRepository; + this.workerEnvironment = workerEnvironment; + this.logConfigs = logConfigs; + this.databaseUser = databaseUser; + this.databasePassword = databasePassword; + this.databaseUrl = databaseUrl; + this.airbyteVersion = airbyteVersion; } public void start() { @@ -93,25 +110,34 @@ public void start() { final Worker specWorker = factory.newWorker(TemporalJobType.GET_SPEC.name(), getWorkerOptions(maxWorkers.getMaxSpecWorkers())); specWorker.registerWorkflowImplementationTypes(SpecWorkflowImpl.class); - specWorker.registerActivitiesImplementations(new SpecActivityImpl(processFactory, workspaceRoot)); + specWorker.registerActivitiesImplementations( + new SpecActivityImpl(processFactory, workspaceRoot, workerEnvironment, logConfigs, databaseUser, databasePassword, databaseUrl, + airbyteVersion)); final Worker checkConnectionWorker = factory.newWorker(TemporalJobType.CHECK_CONNECTION.name(), getWorkerOptions(maxWorkers.getMaxCheckWorkers())); checkConnectionWorker.registerWorkflowImplementationTypes(CheckConnectionWorkflowImpl.class); checkConnectionWorker - .registerActivitiesImplementations(new CheckConnectionActivityImpl(processFactory, secretsHydrator, workspaceRoot)); + .registerActivitiesImplementations( + new CheckConnectionActivityImpl(processFactory, secretsHydrator, workspaceRoot, workerEnvironment, logConfigs, databaseUser, + databasePassword, databaseUrl, airbyteVersion)); final Worker discoverWorker = factory.newWorker(TemporalJobType.DISCOVER_SCHEMA.name(), getWorkerOptions(maxWorkers.getMaxDiscoverWorkers())); discoverWorker.registerWorkflowImplementationTypes(DiscoverCatalogWorkflowImpl.class); discoverWorker - .registerActivitiesImplementations(new DiscoverCatalogActivityImpl(processFactory, secretsHydrator, workspaceRoot)); + .registerActivitiesImplementations( + new DiscoverCatalogActivityImpl(processFactory, secretsHydrator, workspaceRoot, workerEnvironment, logConfigs, databaseUser, + databasePassword, databaseUrl, airbyteVersion)); final Worker syncWorker = factory.newWorker(TemporalJobType.SYNC.name(), getWorkerOptions(maxWorkers.getMaxSyncWorkers())); syncWorker.registerWorkflowImplementationTypes(SyncWorkflow.WorkflowImpl.class); syncWorker.registerActivitiesImplementations( - new SyncWorkflow.ReplicationActivityImpl(processFactory, secretsHydrator, workspaceRoot), - new SyncWorkflow.NormalizationActivityImpl(processFactory, secretsHydrator, workspaceRoot, workerEnvironment), - new SyncWorkflow.DbtTransformationActivityImpl(processFactory, secretsHydrator, workspaceRoot), + new SyncWorkflow.ReplicationActivityImpl(processFactory, secretsHydrator, workspaceRoot, workerEnvironment, logConfigs, databaseUser, + databasePassword, databaseUrl, airbyteVersion), + new SyncWorkflow.NormalizationActivityImpl(processFactory, secretsHydrator, workspaceRoot, workerEnvironment, logConfigs, databaseUser, + databasePassword, databaseUrl, airbyteVersion), + new SyncWorkflow.DbtTransformationActivityImpl(processFactory, secretsHydrator, workspaceRoot, workerEnvironment, logConfigs, databaseUser, + databasePassword, databaseUrl, airbyteVersion), new SyncWorkflow.PersistStateActivityImpl(workspaceRoot, configRepository)); factory.start(); } @@ -142,7 +168,8 @@ private static WorkerOptions getWorkerOptions(final int max) { public static void main(final String[] args) throws IOException, InterruptedException { final Configs configs = new EnvConfigs(); - LogClientSingleton.setWorkspaceMdc(LogClientSingleton.getSchedulerLogsRoot(configs)); + LogClientSingleton.getInstance().setWorkspaceMdc(configs.getWorkerEnvironment(), configs.getLogConfigs(), + LogClientSingleton.getInstance().getSchedulerLogsRoot(configs.getWorkspaceRoot())); final Path workspaceRoot = configs.getWorkspaceRoot(); LOGGER.info("workspaceRoot = " + workspaceRoot); @@ -172,8 +199,13 @@ public static void main(final String[] args) throws IOException, InterruptedExce secretsHydrator, temporalService, configs.getMaxWorkers(), + configRepository, configs.getWorkerEnvironment(), - configRepository).start(); + configs.getLogConfigs(), + configs.getDatabaseUser(), + configs.getDatabasePassword(), + configs.getDatabaseUrl(), + configs.getAirbyteVersionOrWarning()).start(); } } diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/SyncWorkflow.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/SyncWorkflow.java index c2ccdc20dd310..4f27c1d35e1a4 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/SyncWorkflow.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/SyncWorkflow.java @@ -21,6 +21,7 @@ import io.airbyte.config.StandardSyncOutput; import io.airbyte.config.StandardSyncSummary; import io.airbyte.config.State; +import io.airbyte.config.helpers.LogConfigs; import io.airbyte.config.persistence.ConfigRepository; import io.airbyte.config.persistence.split_secrets.SecretsHydrator; import io.airbyte.scheduler.models.IntegrationLauncherConfig; @@ -156,20 +157,49 @@ class ReplicationActivityImpl implements ReplicationActivity { private final SecretsHydrator secretsHydrator; private final Path workspaceRoot; private final AirbyteConfigValidator validator; - - public ReplicationActivityImpl(final ProcessFactory processFactory, final SecretsHydrator secretsHydrator, final Path workspaceRoot) { - this(processFactory, secretsHydrator, workspaceRoot, new AirbyteConfigValidator()); + private final WorkerEnvironment workerEnvironment; + private final LogConfigs logConfigs; + + private final String databaseUser; + private final String databasePassword; + private final String databaseUrl; + private final String airbyteVersion; + + public ReplicationActivityImpl( + final ProcessFactory processFactory, + final SecretsHydrator secretsHydrator, + final Path workspaceRoot, + final WorkerEnvironment workerEnvironment, + final LogConfigs logConfigs, + final String databaseUser, + final String databasePassword, + final String databaseUrl, + final String airbyteVersion) { + this(processFactory, secretsHydrator, workspaceRoot, workerEnvironment, logConfigs, new AirbyteConfigValidator(), databaseUser, + databasePassword, databaseUrl, airbyteVersion); } @VisibleForTesting ReplicationActivityImpl(final ProcessFactory processFactory, final SecretsHydrator secretsHydrator, final Path workspaceRoot, - final AirbyteConfigValidator validator) { + final WorkerEnvironment workerEnvironment, + final LogConfigs logConfigs, + final AirbyteConfigValidator validator, + final String databaseUser, + final String databasePassword, + final String databaseUrl, + final String airbyteVersion) { this.processFactory = processFactory; this.secretsHydrator = secretsHydrator; this.workspaceRoot = workspaceRoot; this.validator = validator; + this.workerEnvironment = workerEnvironment; + this.logConfigs = logConfigs; + this.databaseUser = databaseUser; + this.databasePassword = databasePassword; + this.databaseUrl = databaseUrl; + this.airbyteVersion = airbyteVersion; } @Override @@ -191,11 +221,11 @@ public StandardSyncOutput replicate(final JobRunConfig jobRunConfig, }; final TemporalAttemptExecution temporalAttempt = new TemporalAttemptExecution<>( - workspaceRoot, + workspaceRoot, workerEnvironment, logConfigs, jobRunConfig, getWorkerFactory(sourceLauncherConfig, destinationLauncherConfig, jobRunConfig, syncInput), inputSupplier, - new CancellationHandler.TemporalCancellationHandler()); + new CancellationHandler.TemporalCancellationHandler(), databaseUser, databasePassword, databaseUrl, airbyteVersion); final ReplicationOutput attemptOutput = temporalAttempt.get(); final StandardSyncOutput standardSyncOutput = reduceReplicationOutput(attemptOutput); @@ -280,12 +310,23 @@ class NormalizationActivityImpl implements NormalizationActivity { private final Path workspaceRoot; private final AirbyteConfigValidator validator; private final WorkerEnvironment workerEnvironment; + private final LogConfigs logConfigs; + private final String databaseUser; + private final String databasePassword; + private final String databaseUrl; + private final String airbyteVersion; public NormalizationActivityImpl(final ProcessFactory processFactory, final SecretsHydrator secretsHydrator, final Path workspaceRoot, - final WorkerEnvironment workerEnvironment) { - this(processFactory, secretsHydrator, workspaceRoot, new AirbyteConfigValidator(), workerEnvironment); + final WorkerEnvironment workerEnvironment, + final LogConfigs logConfig, + final String databaseUser, + final String databasePassword, + final String databaseUrl, + final String airbyteVersion) { + this(processFactory, secretsHydrator, workspaceRoot, new AirbyteConfigValidator(), workerEnvironment, logConfig, databaseUser, databasePassword, + databaseUrl, airbyteVersion); } @VisibleForTesting @@ -293,12 +334,22 @@ public NormalizationActivityImpl(final ProcessFactory processFactory, final SecretsHydrator secretsHydrator, final Path workspaceRoot, final AirbyteConfigValidator validator, - final WorkerEnvironment workerEnvironment) { + final WorkerEnvironment workerEnvironment, + final LogConfigs logConfigs, + final String databaseUser, + final String databasePassword, + final String databaseUrl, + final String airbyteVersion) { this.processFactory = processFactory; this.secretsHydrator = secretsHydrator; this.workspaceRoot = workspaceRoot; this.validator = validator; this.workerEnvironment = workerEnvironment; + this.logConfigs = logConfigs; + this.databaseUser = databaseUser; + this.databasePassword = databasePassword; + this.databaseUrl = databaseUrl; + this.airbyteVersion = airbyteVersion; } @Override @@ -315,11 +366,11 @@ public Void normalize(final JobRunConfig jobRunConfig, }; final TemporalAttemptExecution temporalAttemptExecution = new TemporalAttemptExecution<>( - workspaceRoot, + workspaceRoot, workerEnvironment, logConfigs, jobRunConfig, getWorkerFactory(destinationLauncherConfig, jobRunConfig), inputSupplier, - new CancellationHandler.TemporalCancellationHandler()); + new CancellationHandler.TemporalCancellationHandler(), databaseUser, databasePassword, databaseUrl, airbyteVersion); return temporalAttemptExecution.get(); } @@ -356,23 +407,47 @@ class DbtTransformationActivityImpl implements DbtTransformationActivity { private final SecretsHydrator secretsHydrator; private final Path workspaceRoot; private final AirbyteConfigValidator validator; + private final WorkerEnvironment workerEnvironment; + private final LogConfigs logConfigs; + private final String databaseUser; + private final String databasePassword; + private final String databaseUrl; + private final String airbyteVersion; - public DbtTransformationActivityImpl( - final ProcessFactory processFactory, + public DbtTransformationActivityImpl(final ProcessFactory processFactory, final SecretsHydrator secretsHydrator, - final Path workspaceRoot) { - this(processFactory, secretsHydrator, workspaceRoot, new AirbyteConfigValidator()); + final Path workspaceRoot, + final WorkerEnvironment workerEnvironment, + final LogConfigs logConfigs, + final String databaseUser, + final String databasePassword, + final String databaseUrl, + final String airbyteVersion) { + this(processFactory, secretsHydrator, workspaceRoot, new AirbyteConfigValidator(), workerEnvironment, logConfigs, databaseUser, + databasePassword, databaseUrl, airbyteVersion); } @VisibleForTesting DbtTransformationActivityImpl(final ProcessFactory processFactory, final SecretsHydrator secretsHydrator, final Path workspaceRoot, - final AirbyteConfigValidator validator) { + final AirbyteConfigValidator validator, + final WorkerEnvironment workerEnvironment, + final LogConfigs logConfigs, + final String databaseUser, + final String databasePassword, + final String databaseUrl, + final String airbyteVersion) { this.processFactory = processFactory; this.secretsHydrator = secretsHydrator; this.workspaceRoot = workspaceRoot; this.validator = validator; + this.workerEnvironment = workerEnvironment; + this.logConfigs = logConfigs; + this.databaseUser = databaseUser; + this.databasePassword = databasePassword; + this.databaseUrl = databaseUrl; + this.airbyteVersion = airbyteVersion; } @Override @@ -390,11 +465,11 @@ public Void run(final JobRunConfig jobRunConfig, }; final TemporalAttemptExecution temporalAttemptExecution = new TemporalAttemptExecution<>( - workspaceRoot, + workspaceRoot, workerEnvironment, logConfigs, jobRunConfig, getWorkerFactory(destinationLauncherConfig, jobRunConfig, resourceRequirements), inputSupplier, - new CancellationHandler.TemporalCancellationHandler()); + new CancellationHandler.TemporalCancellationHandler(), databaseUser, databasePassword, databaseUrl, airbyteVersion); return temporalAttemptExecution.get(); } diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/TemporalAttemptExecution.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/TemporalAttemptExecution.java index d3ec246d936cb..c3021e8960181 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/TemporalAttemptExecution.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/TemporalAttemptExecution.java @@ -6,9 +6,9 @@ import com.google.common.annotations.VisibleForTesting; import io.airbyte.commons.functional.CheckedSupplier; -import io.airbyte.config.Configs; -import io.airbyte.config.EnvConfigs; +import io.airbyte.config.Configs.WorkerEnvironment; import io.airbyte.config.helpers.LogClientSingleton; +import io.airbyte.config.helpers.LogConfigs; import io.airbyte.db.Database; import io.airbyte.db.instance.jobs.JobsDatabaseInstance; import io.airbyte.scheduler.models.JobRunConfig; @@ -43,47 +43,69 @@ public class TemporalAttemptExecution implements Supplier private static final Duration HEARTBEAT_INTERVAL = Duration.ofSeconds(10); private final JobRunConfig jobRunConfig; + private final WorkerEnvironment workerEnvironment; + private final LogConfigs logConfigs; private final Path jobRoot; private final CheckedSupplier, Exception> workerSupplier; private final Supplier inputSupplier; private final Consumer mdcSetter; private final CancellationHandler cancellationHandler; private final Supplier workflowIdProvider; - private final Configs configs; + private final String databaseUser; + private final String databasePassword; + private final String databaseUrl; + private final String airbyteVersion; public TemporalAttemptExecution(final Path workspaceRoot, + final WorkerEnvironment workerEnvironment, + final LogConfigs logConfigs, final JobRunConfig jobRunConfig, final CheckedSupplier, Exception> workerSupplier, final Supplier inputSupplier, - final CancellationHandler cancellationHandler) { + final CancellationHandler cancellationHandler, + final String databaseUser, + final String databasePassword, + final String databaseUrl, + final String airbyteVersion) { this( - workspaceRoot, + workspaceRoot, workerEnvironment, logConfigs, jobRunConfig, workerSupplier, inputSupplier, - LogClientSingleton::setJobMdc, - cancellationHandler, - () -> Activity.getExecutionContext().getInfo().getWorkflowId(), - new EnvConfigs()); + (path -> LogClientSingleton.getInstance().setJobMdc(workerEnvironment, logConfigs, path)), + cancellationHandler, databaseUser, databasePassword, databaseUrl, + () -> Activity.getExecutionContext().getInfo().getWorkflowId(), airbyteVersion); } @VisibleForTesting TemporalAttemptExecution(final Path workspaceRoot, + final WorkerEnvironment workerEnvironment, + final LogConfigs logConfigs, final JobRunConfig jobRunConfig, final CheckedSupplier, Exception> workerSupplier, final Supplier inputSupplier, final Consumer mdcSetter, final CancellationHandler cancellationHandler, + final String databaseUser, + final String databasePassword, + final String databaseUrl, final Supplier workflowIdProvider, - final Configs configs) { + final String airbyteVersion) { this.jobRunConfig = jobRunConfig; + this.workerEnvironment = workerEnvironment; + this.logConfigs = logConfigs; + this.jobRoot = WorkerUtils.getJobRoot(workspaceRoot, jobRunConfig.getJobId(), jobRunConfig.getAttemptId()); this.workerSupplier = workerSupplier; this.inputSupplier = inputSupplier; this.mdcSetter = mdcSetter; this.cancellationHandler = cancellationHandler; this.workflowIdProvider = workflowIdProvider; - this.configs = configs; + + this.databaseUser = databaseUser; + this.databasePassword = databasePassword; + this.databaseUrl = databaseUrl; + this.airbyteVersion = airbyteVersion; } @Override @@ -91,10 +113,10 @@ public OUTPUT get() { try { mdcSetter.accept(jobRoot); - LOGGER.info("Executing worker wrapper. Airbyte version: {}", new EnvConfigs().getAirbyteVersionOrWarning()); + LOGGER.info("Executing worker wrapper. Airbyte version: {}", airbyteVersion); // TODO(Davin): This will eventually run into scaling problems, since it opens a DB connection per // workflow. See https://github.com/airbytehq/airbyte/issues/5936. - saveWorkflowIdForCancellation(); + saveWorkflowIdForCancellation(databaseUser, databasePassword, databaseUrl); final Worker worker = workerSupplier.get(); final CompletableFuture outputFuture = new CompletableFuture<>(); @@ -120,16 +142,16 @@ public OUTPUT get() { } } - private void saveWorkflowIdForCancellation() throws IOException { + private void saveWorkflowIdForCancellation(final String databaseUser, final String databasePassword, final String databaseUrl) throws IOException { // If the jobId is not a number, it means the job is a synchronous job. No attempt is created for // it, and it cannot be cancelled, so do not save the workflowId. See // SynchronousSchedulerClient.java // for info. if (NumberUtils.isCreatable(jobRunConfig.getJobId())) { final Database jobDatabase = new JobsDatabaseInstance( - configs.getDatabaseUser(), - configs.getDatabasePassword(), - configs.getDatabaseUrl()) + databaseUser, + databasePassword, + databaseUrl) .getInitialized(); final JobPersistence jobPersistence = new DefaultJobPersistence(jobDatabase); final String workflowId = workflowIdProvider.get(); @@ -156,10 +178,10 @@ private Thread getWorkerThread(final Worker worker, final Complet * requests are routed to the Temporal Scheduler via the cancelJob function in * SchedulerHandler.java. This manifests as a {@link io.temporal.client.ActivityCompletionException} * when the {@link CancellationHandler} heartbeats to the Temporal Scheduler. - * + *

* The callback defined in this function is executed after the above exception is caught, and * defines the clean up operations executed as part of cancel. - * + *

* See {@link CancellationHandler} for more info. */ private Runnable getCancellationChecker(final Worker worker, diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/check/connection/CheckConnectionActivityImpl.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/check/connection/CheckConnectionActivityImpl.java index 438b2faad77bd..7b1730e9e22b4 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/check/connection/CheckConnectionActivityImpl.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/check/connection/CheckConnectionActivityImpl.java @@ -6,8 +6,10 @@ import com.fasterxml.jackson.databind.JsonNode; import io.airbyte.commons.functional.CheckedSupplier; +import io.airbyte.config.Configs.WorkerEnvironment; import io.airbyte.config.StandardCheckConnectionInput; import io.airbyte.config.StandardCheckConnectionOutput; +import io.airbyte.config.helpers.LogConfigs; import io.airbyte.config.persistence.split_secrets.SecretsHydrator; import io.airbyte.scheduler.models.IntegrationLauncherConfig; import io.airbyte.scheduler.models.JobRunConfig; @@ -27,11 +29,31 @@ public class CheckConnectionActivityImpl implements CheckConnectionActivity { private final ProcessFactory processFactory; private final SecretsHydrator secretsHydrator; private final Path workspaceRoot; + private final WorkerEnvironment workerEnvironment; + private final LogConfigs logConfigs; + private final String databaseUser; + private final String databasePassword; + private final String databaseUrl; + private final String airbyteVersion; - public CheckConnectionActivityImpl(final ProcessFactory processFactory, final SecretsHydrator secretsHydrator, final Path workspaceRoot) { + public CheckConnectionActivityImpl(final ProcessFactory processFactory, + final SecretsHydrator secretsHydrator, + final Path workspaceRoot, + final WorkerEnvironment workerEnvironment, + final LogConfigs logConfigs, + final String databaseUser, + final String databasePassword, + final String databaseUrl, + final String airbyteVersion) { this.processFactory = processFactory; this.secretsHydrator = secretsHydrator; this.workspaceRoot = workspaceRoot; + this.workerEnvironment = workerEnvironment; + this.logConfigs = logConfigs; + this.databaseUser = databaseUser; + this.databasePassword = databasePassword; + this.databaseUrl = databaseUrl; + this.airbyteVersion = airbyteVersion; } public StandardCheckConnectionOutput run(final JobRunConfig jobRunConfig, @@ -47,11 +69,11 @@ public StandardCheckConnectionOutput run(final JobRunConfig jobRunConfig, final TemporalAttemptExecution temporalAttemptExecution = new TemporalAttemptExecution<>( - workspaceRoot, + workspaceRoot, workerEnvironment, logConfigs, jobRunConfig, getWorkerFactory(launcherConfig), inputSupplier, - new CancellationHandler.TemporalCancellationHandler()); + new CancellationHandler.TemporalCancellationHandler(), databaseUser, databasePassword, databaseUrl, airbyteVersion); return temporalAttemptExecution.get(); } diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/discover/catalog/DiscoverCatalogActivityImpl.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/discover/catalog/DiscoverCatalogActivityImpl.java index 9b972a2f3426d..d65d14b7fbb40 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/discover/catalog/DiscoverCatalogActivityImpl.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/discover/catalog/DiscoverCatalogActivityImpl.java @@ -6,7 +6,9 @@ import com.fasterxml.jackson.databind.JsonNode; import io.airbyte.commons.functional.CheckedSupplier; +import io.airbyte.config.Configs.WorkerEnvironment; import io.airbyte.config.StandardDiscoverCatalogInput; +import io.airbyte.config.helpers.LogConfigs; import io.airbyte.config.persistence.split_secrets.SecretsHydrator; import io.airbyte.protocol.models.AirbyteCatalog; import io.airbyte.scheduler.models.IntegrationLauncherConfig; @@ -29,11 +31,32 @@ public class DiscoverCatalogActivityImpl implements DiscoverCatalogActivity { private final ProcessFactory processFactory; private final SecretsHydrator secretsHydrator; private final Path workspaceRoot; + private final WorkerEnvironment workerEnvironment; + private final LogConfigs logConfigs; + private final String databaseUser; + private final String databasePassword; + private final String databaseUrl; + private final String airbyteVersion; - public DiscoverCatalogActivityImpl(final ProcessFactory processFactory, final SecretsHydrator secretsHydrator, final Path workspaceRoot) { + public DiscoverCatalogActivityImpl(final ProcessFactory processFactory, + final SecretsHydrator secretsHydrator, + final Path workspaceRoot, + final WorkerEnvironment workerEnvironment, + final LogConfigs logConfigs, + final String databaseUser, + final String databasePassword, + final String databaseUrl, + final String airbyteVersion) { this.processFactory = processFactory; this.secretsHydrator = secretsHydrator; this.workspaceRoot = workspaceRoot; + this.workerEnvironment = workerEnvironment; + this.logConfigs = logConfigs; + this.databaseUser = databaseUser; + this.databasePassword = databasePassword; + this.databaseUrl = databaseUrl; + this.airbyteVersion = airbyteVersion; + } public AirbyteCatalog run(final JobRunConfig jobRunConfig, @@ -49,10 +72,12 @@ public AirbyteCatalog run(final JobRunConfig jobRunConfig, final TemporalAttemptExecution temporalAttemptExecution = new TemporalAttemptExecution<>( workspaceRoot, + workerEnvironment, + logConfigs, jobRunConfig, getWorkerFactory(launcherConfig), inputSupplier, - new CancellationHandler.TemporalCancellationHandler()); + new CancellationHandler.TemporalCancellationHandler(), databaseUser, databasePassword, databaseUrl, airbyteVersion); return temporalAttemptExecution.get(); } diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/spec/SpecActivityImpl.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/spec/SpecActivityImpl.java index 2cae77228900d..159a15bb1bac7 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/spec/SpecActivityImpl.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/spec/SpecActivityImpl.java @@ -5,7 +5,9 @@ package io.airbyte.workers.temporal.spec; import io.airbyte.commons.functional.CheckedSupplier; +import io.airbyte.config.Configs.WorkerEnvironment; import io.airbyte.config.JobGetSpecConfig; +import io.airbyte.config.helpers.LogConfigs; import io.airbyte.protocol.models.ConnectorSpecification; import io.airbyte.scheduler.models.IntegrationLauncherConfig; import io.airbyte.scheduler.models.JobRunConfig; @@ -24,10 +26,29 @@ public class SpecActivityImpl implements SpecActivity { private final ProcessFactory processFactory; private final Path workspaceRoot; + private final WorkerEnvironment workerEnvironment; + private final LogConfigs logConfigs; + private final String databaseUser; + private final String databasePassword; + private final String databaseUrl; + private final String airbyteVersion; - public SpecActivityImpl(final ProcessFactory processFactory, final Path workspaceRoot) { + public SpecActivityImpl(final ProcessFactory processFactory, + final Path workspaceRoot, + final WorkerEnvironment workerEnvironment, + final LogConfigs logConfigs, + final String databaseUser, + final String databasePassword, + final String databaseUrl, + final String airbyteVersion) { this.processFactory = processFactory; this.workspaceRoot = workspaceRoot; + this.workerEnvironment = workerEnvironment; + this.logConfigs = logConfigs; + this.databaseUser = databaseUser; + this.databasePassword = databasePassword; + this.databaseUrl = databaseUrl; + this.airbyteVersion = airbyteVersion; } public ConnectorSpecification run(final JobRunConfig jobRunConfig, final IntegrationLauncherConfig launcherConfig) { @@ -35,15 +56,18 @@ public ConnectorSpecification run(final JobRunConfig jobRunConfig, final Integra final TemporalAttemptExecution temporalAttemptExecution = new TemporalAttemptExecution<>( workspaceRoot, + workerEnvironment, + logConfigs, jobRunConfig, getWorkerFactory(launcherConfig), inputSupplier, - new CancellationHandler.TemporalCancellationHandler()); + new CancellationHandler.TemporalCancellationHandler(), databaseUser, databasePassword, databaseUrl, airbyteVersion); return temporalAttemptExecution.get(); } - private CheckedSupplier, Exception> getWorkerFactory(final IntegrationLauncherConfig launcherConfig) { + private CheckedSupplier, Exception> getWorkerFactory( + final IntegrationLauncherConfig launcherConfig) { return () -> { final IntegrationLauncher integrationLauncher = new AirbyteIntegrationLauncher( launcherConfig.getJobId(), diff --git a/airbyte-workers/src/test/java/io/airbyte/workers/DefaultReplicationWorkerTest.java b/airbyte-workers/src/test/java/io/airbyte/workers/DefaultReplicationWorkerTest.java index 23d1a358cd65b..9fdba8f1833f9 100644 --- a/airbyte-workers/src/test/java/io/airbyte/workers/DefaultReplicationWorkerTest.java +++ b/airbyte-workers/src/test/java/io/airbyte/workers/DefaultReplicationWorkerTest.java @@ -22,6 +22,7 @@ import io.airbyte.commons.json.Jsons; import io.airbyte.commons.string.Strings; import io.airbyte.config.ConfigSchema; +import io.airbyte.config.Configs.WorkerEnvironment; import io.airbyte.config.ReplicationAttemptSummary; import io.airbyte.config.ReplicationOutput; import io.airbyte.config.StandardSync; @@ -31,6 +32,7 @@ import io.airbyte.config.WorkerDestinationConfig; import io.airbyte.config.WorkerSourceConfig; import io.airbyte.config.helpers.LogClientSingleton; +import io.airbyte.config.helpers.LogConfiguration; import io.airbyte.protocol.models.AirbyteMessage; import io.airbyte.validation.json.JsonSchemaValidator; import io.airbyte.workers.protocols.airbyte.AirbyteDestination; @@ -137,7 +139,7 @@ void testLoggingInThreads() throws IOException, WorkerException { // set up the mdc so that actually log to a file, so that we can verify that file logging captures // threads. final Path jobRoot = Files.createTempDirectory(Path.of("/tmp"), "mdc_test"); - LogClientSingleton.setJobMdc(jobRoot); + LogClientSingleton.getInstance().setJobMdc(WorkerEnvironment.DOCKER, LogConfiguration.EMPTY, jobRoot); final ReplicationWorker worker = new DefaultReplicationWorker( JOB_ID, diff --git a/airbyte-workers/src/test/java/io/airbyte/workers/normalization/DefaultNormalizationRunnerTest.java b/airbyte-workers/src/test/java/io/airbyte/workers/normalization/DefaultNormalizationRunnerTest.java index 554f91ad90a80..5bfc29630d14a 100644 --- a/airbyte-workers/src/test/java/io/airbyte/workers/normalization/DefaultNormalizationRunnerTest.java +++ b/airbyte-workers/src/test/java/io/airbyte/workers/normalization/DefaultNormalizationRunnerTest.java @@ -17,7 +17,9 @@ import io.airbyte.commons.io.IOs; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.logging.LoggingHelper.Color; +import io.airbyte.config.Configs.WorkerEnvironment; import io.airbyte.config.helpers.LogClientSingleton; +import io.airbyte.config.helpers.LogConfiguration; import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; import io.airbyte.workers.WorkerConstants; import io.airbyte.workers.WorkerException; @@ -45,7 +47,7 @@ class DefaultNormalizationRunnerTest { static { try { logJobRoot = Files.createTempDirectory(Path.of("/tmp"), "mdc_test"); - LogClientSingleton.setJobMdc(logJobRoot); + LogClientSingleton.getInstance().setJobMdc(WorkerEnvironment.DOCKER, LogConfiguration.EMPTY, logJobRoot); } catch (final IOException e) { e.printStackTrace(); } diff --git a/airbyte-workers/src/test/java/io/airbyte/workers/protocols/airbyte/DefaultAirbyteDestinationTest.java b/airbyte-workers/src/test/java/io/airbyte/workers/protocols/airbyte/DefaultAirbyteDestinationTest.java index 52d0b958fa7e5..4e4af48ae9c50 100644 --- a/airbyte-workers/src/test/java/io/airbyte/workers/protocols/airbyte/DefaultAirbyteDestinationTest.java +++ b/airbyte-workers/src/test/java/io/airbyte/workers/protocols/airbyte/DefaultAirbyteDestinationTest.java @@ -18,8 +18,10 @@ import io.airbyte.commons.io.IOs; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.logging.LoggingHelper.Color; +import io.airbyte.config.Configs.WorkerEnvironment; import io.airbyte.config.WorkerDestinationConfig; import io.airbyte.config.helpers.LogClientSingleton; +import io.airbyte.config.helpers.LogConfiguration; import io.airbyte.protocol.models.AirbyteMessage; import io.airbyte.workers.TestConfigHelpers; import io.airbyte.workers.WorkerConstants; @@ -60,7 +62,7 @@ class DefaultAirbyteDestinationTest { static { try { logJobRoot = Files.createTempDirectory(Path.of("/tmp"), "mdc_test"); - LogClientSingleton.setJobMdc(logJobRoot); + LogClientSingleton.getInstance().setJobMdc(WorkerEnvironment.DOCKER, LogConfiguration.EMPTY, logJobRoot); } catch (final IOException e) { e.printStackTrace(); } diff --git a/airbyte-workers/src/test/java/io/airbyte/workers/protocols/airbyte/DefaultAirbyteSourceTest.java b/airbyte-workers/src/test/java/io/airbyte/workers/protocols/airbyte/DefaultAirbyteSourceTest.java index 412f4ec3c3963..3d4bd7ae13fc6 100644 --- a/airbyte-workers/src/test/java/io/airbyte/workers/protocols/airbyte/DefaultAirbyteSourceTest.java +++ b/airbyte-workers/src/test/java/io/airbyte/workers/protocols/airbyte/DefaultAirbyteSourceTest.java @@ -20,9 +20,11 @@ import io.airbyte.commons.io.IOs; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.logging.LoggingHelper.Color; +import io.airbyte.config.Configs.WorkerEnvironment; import io.airbyte.config.State; import io.airbyte.config.WorkerSourceConfig; import io.airbyte.config.helpers.LogClientSingleton; +import io.airbyte.config.helpers.LogConfiguration; import io.airbyte.protocol.models.AirbyteMessage; import io.airbyte.protocol.models.CatalogHelpers; import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; @@ -76,7 +78,7 @@ class DefaultAirbyteSourceTest { static { try { logJobRoot = Files.createTempDirectory(Path.of("/tmp"), "mdc_test"); - LogClientSingleton.setJobMdc(logJobRoot); + LogClientSingleton.getInstance().setJobMdc(WorkerEnvironment.DOCKER, LogConfiguration.EMPTY, logJobRoot); } catch (final IOException e) { e.printStackTrace(); } @@ -110,7 +112,7 @@ public void setup() throws IOException, WorkerException { streamFactory = noop -> MESSAGES.stream(); - LogClientSingleton.setJobMdc(logJobRoot); + LogClientSingleton.getInstance().setJobMdc(WorkerEnvironment.DOCKER, LogConfiguration.EMPTY, logJobRoot); } @AfterEach diff --git a/airbyte-workers/src/test/java/io/airbyte/workers/temporal/TemporalAttemptExecutionTest.java b/airbyte-workers/src/test/java/io/airbyte/workers/temporal/TemporalAttemptExecutionTest.java index de7e5827a2e8d..6a6c5e2019828 100644 --- a/airbyte-workers/src/test/java/io/airbyte/workers/temporal/TemporalAttemptExecutionTest.java +++ b/airbyte-workers/src/test/java/io/airbyte/workers/temporal/TemporalAttemptExecutionTest.java @@ -73,12 +73,15 @@ void setup() throws IOException { attemptExecution = new TemporalAttemptExecution<>( workspaceRoot, + configs.getWorkerEnvironment(), configs.getLogConfigs(), JOB_RUN_CONFIG, execution, () -> "", mdcSetter, mock(CancellationHandler.class), - () -> "workflow_id", - configs); + SOURCE_USERNAME, + SOURCE_PASSWORD, + container.getJdbcUrl(), + () -> "workflow_id", configs.getAirbyteVersionOrWarning()); } @AfterAll From 89267fedc8cfb64a93ab265e66c678214333e5d7 Mon Sep 17 00:00:00 2001 From: Vadym Date: Wed, 3 Nov 2021 23:12:24 +0200 Subject: [PATCH 36/83] Source File Secure: Refactor to use the CDK (#7450) * Remove base-python references * Fix requirements.txt * Update connector to CDK * Bump docker version --- .../connectors/source-file-secure/Dockerfile | 17 ++++++++--------- .../acceptance-test-config.yml | 16 +++++++--------- .../connectors/source-file-secure/build.gradle | 5 ----- .../{https_config.json => config.json} | 0 ...tps_catalog.json => configured_catalog.json} | 0 .../integration_tests/invalid_config.json | 2 +- .../source-file-secure/{main_dev.py => main.py} | 2 +- .../source-file-secure/requirements.txt | 5 ++--- .../connectors/source-file-secure/setup.py | 5 ++--- .../source_file_secure/source.py | 6 +++--- .../source-file-secure/unit_tests/unit_test.py | 2 +- 11 files changed, 25 insertions(+), 35 deletions(-) rename airbyte-integrations/connectors/source-file-secure/integration_tests/{https_config.json => config.json} (100%) rename airbyte-integrations/connectors/source-file-secure/integration_tests/{configured_https_catalog.json => configured_catalog.json} (100%) rename airbyte-integrations/connectors/source-file-secure/{main_dev.py => main.py} (82%) diff --git a/airbyte-integrations/connectors/source-file-secure/Dockerfile b/airbyte-integrations/connectors/source-file-secure/Dockerfile index a0187b0ffcc6d..510bb42dc9010 100644 --- a/airbyte-integrations/connectors/source-file-secure/Dockerfile +++ b/airbyte-integrations/connectors/source-file-secure/Dockerfile @@ -1,14 +1,13 @@ -FROM airbyte/source-file:0.2.6 +FROM airbyte/source-file:0.2.7 WORKDIR /airbyte/integration_code - -ENV CODE_PATH="source_file_secure" -ENV AIRBYTE_IMPL_MODULE="source_file_secure" -ENV AIRBYTE_IMPL_PATH="SourceFileSecure" - -COPY $CODE_PATH ./$CODE_PATH -RUN sed -i 's/source_file/source_file_secure/g' setup.py +COPY source_file_secure ./source_file_secure +COPY main.py ./ +COPY setup.py ./ RUN pip install . +ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" +ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] + +LABEL io.airbyte.version=0.1.1 LABEL io.airbyte.name=airbyte/source-file-secure -LABEL io.airbyte.version=0.1.0 diff --git a/airbyte-integrations/connectors/source-file-secure/acceptance-test-config.yml b/airbyte-integrations/connectors/source-file-secure/acceptance-test-config.yml index ddddb348377ef..e8a25bfedf974 100644 --- a/airbyte-integrations/connectors/source-file-secure/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-file-secure/acceptance-test-config.yml @@ -1,7 +1,7 @@ # See [Source Acceptance Tests](https://docs.airbyte.io/connector-development/testing-connectors/source-acceptance-tests-reference) # for more information about how to configure these tests -# Here we tries to test a basic tests only. +# Here we tries to test a basic tests only. # The main part of tests should be executed for the source-file connector connector_image: airbyte/source-file-secure:dev tests: @@ -11,19 +11,17 @@ tests: - config_path: "integration_tests/invalid_config.json" status: "failed" # for https - - config_path: "integration_tests/https_config.json" - status: "succeed" + - config_path: "integration_tests/config.json" + status: "succeed" # for local should be failed - config_path: "integration_tests/local_config.json" - status: "exception" + status: "exception" discovery: # for https - - config_path: "integration_tests/https_config.json" + - config_path: "integration_tests/config.json" basic_read: # for https - - config_path: "integration_tests/https_config.json" - configured_catalog_path: "integration_tests/configured_https_catalog.json" - - + - config_path: "integration_tests/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" diff --git a/airbyte-integrations/connectors/source-file-secure/build.gradle b/airbyte-integrations/connectors/source-file-secure/build.gradle index e4fbfaf782f21..7417c474d873f 100644 --- a/airbyte-integrations/connectors/source-file-secure/build.gradle +++ b/airbyte-integrations/connectors/source-file-secure/build.gradle @@ -8,8 +8,3 @@ plugins { airbytePython { moduleDirectory 'source_file_secure' } - -dependencies { - implementation files(project(':airbyte-integrations:bases:source-acceptance-test').airbyteDocker.outputs) - implementation files(project(':airbyte-integrations:bases:base-python').airbyteDocker.outputs) -} diff --git a/airbyte-integrations/connectors/source-file-secure/integration_tests/https_config.json b/airbyte-integrations/connectors/source-file-secure/integration_tests/config.json similarity index 100% rename from airbyte-integrations/connectors/source-file-secure/integration_tests/https_config.json rename to airbyte-integrations/connectors/source-file-secure/integration_tests/config.json diff --git a/airbyte-integrations/connectors/source-file-secure/integration_tests/configured_https_catalog.json b/airbyte-integrations/connectors/source-file-secure/integration_tests/configured_catalog.json similarity index 100% rename from airbyte-integrations/connectors/source-file-secure/integration_tests/configured_https_catalog.json rename to airbyte-integrations/connectors/source-file-secure/integration_tests/configured_catalog.json diff --git a/airbyte-integrations/connectors/source-file-secure/integration_tests/invalid_config.json b/airbyte-integrations/connectors/source-file-secure/integration_tests/invalid_config.json index c2d398ed6c58a..fd1448b39352f 100644 --- a/airbyte-integrations/connectors/source-file-secure/integration_tests/invalid_config.json +++ b/airbyte-integrations/connectors/source-file-secure/integration_tests/invalid_config.json @@ -4,7 +4,7 @@ "reader_options": "{\"bla\": \",\", \"nrows\": 20}", "url": "https://fake-fake.com", "provider": { - "storage": "fake", + "storage": "HTTPS", "reader_impl": "fake" } } diff --git a/airbyte-integrations/connectors/source-file-secure/main_dev.py b/airbyte-integrations/connectors/source-file-secure/main.py similarity index 82% rename from airbyte-integrations/connectors/source-file-secure/main_dev.py rename to airbyte-integrations/connectors/source-file-secure/main.py index 4d942018cb520..2ba9b1095e950 100644 --- a/airbyte-integrations/connectors/source-file-secure/main_dev.py +++ b/airbyte-integrations/connectors/source-file-secure/main.py @@ -5,7 +5,7 @@ import sys -from base_python.entrypoint import launch +from airbyte_cdk.entrypoint import launch from source_file_secure import SourceFileSecure if __name__ == "__main__": diff --git a/airbyte-integrations/connectors/source-file-secure/requirements.txt b/airbyte-integrations/connectors/source-file-secure/requirements.txt index 16b5e8bf302a0..982c4b5a4923c 100644 --- a/airbyte-integrations/connectors/source-file-secure/requirements.txt +++ b/airbyte-integrations/connectors/source-file-secure/requirements.txt @@ -1,4 +1,3 @@ -# This file is autogenerated -- only edit if you know what you are doing. Use setup.py for declaring dependencies. --e ../../bases/airbyte-protocol --e ../../bases/base-python +-e ../../bases/source-acceptance-test -e ../source-file +-e . diff --git a/airbyte-integrations/connectors/source-file-secure/setup.py b/airbyte-integrations/connectors/source-file-secure/setup.py index 15636060002a7..7345e937a1a5f 100644 --- a/airbyte-integrations/connectors/source-file-secure/setup.py +++ b/airbyte-integrations/connectors/source-file-secure/setup.py @@ -6,8 +6,7 @@ from setuptools import find_packages, setup MAIN_REQUIREMENTS = [ - "airbyte-protocol", - "base-python", + "airbyte-cdk~=0.1", "gcsfs==0.7.1", "genson==1.2.2", "google-cloud-storage==1.35.0", @@ -31,7 +30,7 @@ ] setup( - name="source_file", + name="source_file_secure", description="Source implementation for File", author="Airbyte", author_email="contact@airbyte.io", diff --git a/airbyte-integrations/connectors/source-file-secure/source_file_secure/source.py b/airbyte-integrations/connectors/source-file-secure/source_file_secure/source.py index 323615f59d4aa..761a867576a78 100644 --- a/airbyte-integrations/connectors/source-file-secure/source_file_secure/source.py +++ b/airbyte-integrations/connectors/source-file-secure/source_file_secure/source.py @@ -6,12 +6,12 @@ import os import sys -from airbyte_protocol import ConnectorSpecification -from base_python.logger import AirbyteLogger - # some integration tests doesn't setup dependences from # requirements.txt file and Python can return a exception. # Thus we should to import this parent module manually +from airbyte_cdk import AirbyteLogger +from airbyte_cdk.models import ConnectorSpecification + try: import source_file.source except ModuleNotFoundError: diff --git a/airbyte-integrations/connectors/source-file-secure/unit_tests/unit_test.py b/airbyte-integrations/connectors/source-file-secure/unit_tests/unit_test.py index 2c20878f20e9d..61568885be2b1 100644 --- a/airbyte-integrations/connectors/source-file-secure/unit_tests/unit_test.py +++ b/airbyte-integrations/connectors/source-file-secure/unit_tests/unit_test.py @@ -3,7 +3,7 @@ # import pytest -from base_python.logger import AirbyteLogger +from airbyte_cdk import AirbyteLogger from source_file_secure import SourceFileSecure from source_file_secure.source import LOCAL_STORAGE_NAME From a0c895ecd8d944d8544a37574a9e5f652ee3c626 Mon Sep 17 00:00:00 2001 From: Jared Rhizor Date: Wed, 3 Nov 2021 15:04:53 -0700 Subject: [PATCH 37/83] Bump Airbyte version from 0.30.27-alpha to 0.30.28-alpha (#7608) Co-authored-by: airbyte-jenny --- .bumpversion.cfg | 2 +- .env | 2 +- airbyte-scheduler/app/Dockerfile | 4 ++-- airbyte-server/Dockerfile | 4 ++-- airbyte-webapp/package-lock.json | 2 +- airbyte-webapp/package.json | 2 +- airbyte-workers/Dockerfile | 4 ++-- charts/airbyte/Chart.yaml | 2 +- charts/airbyte/README.md | 8 ++++---- charts/airbyte/values.yaml | 8 ++++---- docs/operator-guides/upgrading-airbyte.md | 2 +- kube/overlays/stable-with-resource-limits/.env | 2 +- .../stable-with-resource-limits/kustomization.yaml | 10 +++++----- kube/overlays/stable/.env | 2 +- kube/overlays/stable/kustomization.yaml | 10 +++++----- 15 files changed, 32 insertions(+), 32 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 0ddee4521505c..c279a1c11ebff 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.30.27-alpha +current_version = 0.30.28-alpha commit = False tag = False parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-[a-z]+)? diff --git a/.env b/.env index 37865db7292d5..67b26a4a6709a 100644 --- a/.env +++ b/.env @@ -1,4 +1,4 @@ -VERSION=0.30.27-alpha +VERSION=0.30.28-alpha # Airbyte Internal Job Database, see https://docs.airbyte.io/operator-guides/configuring-airbyte-db DATABASE_USER=docker diff --git a/airbyte-scheduler/app/Dockerfile b/airbyte-scheduler/app/Dockerfile index 3e6c01c25599a..e9e582691cb83 100644 --- a/airbyte-scheduler/app/Dockerfile +++ b/airbyte-scheduler/app/Dockerfile @@ -5,7 +5,7 @@ ENV APPLICATION airbyte-scheduler WORKDIR /app -ADD build/distributions/${APPLICATION}-0.30.27-alpha.tar /app +ADD build/distributions/${APPLICATION}-0.30.28-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.27-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.28-alpha/bin/${APPLICATION}"] diff --git a/airbyte-server/Dockerfile b/airbyte-server/Dockerfile index cd8ce2180c2a6..8e784da7fdfbd 100644 --- a/airbyte-server/Dockerfile +++ b/airbyte-server/Dockerfile @@ -7,7 +7,7 @@ ENV APPLICATION airbyte-server WORKDIR /app -ADD build/distributions/${APPLICATION}-0.30.27-alpha.tar /app +ADD build/distributions/${APPLICATION}-0.30.28-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.27-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.28-alpha/bin/${APPLICATION}"] diff --git a/airbyte-webapp/package-lock.json b/airbyte-webapp/package-lock.json index f46ebd18eb3f6..297a515acd143 100644 --- a/airbyte-webapp/package-lock.json +++ b/airbyte-webapp/package-lock.json @@ -1,6 +1,6 @@ { "name": "airbyte-webapp", - "version": "0.30.27-alpha", + "version": "0.30.28-alpha", "lockfileVersion": 1, "requires": true, "dependencies": { diff --git a/airbyte-webapp/package.json b/airbyte-webapp/package.json index 0fe0613ccc0dc..9716c152fabeb 100644 --- a/airbyte-webapp/package.json +++ b/airbyte-webapp/package.json @@ -1,6 +1,6 @@ { "name": "airbyte-webapp", - "version": "0.30.27-alpha", + "version": "0.30.28-alpha", "private": true, "scripts": { "start": "react-scripts start", diff --git a/airbyte-workers/Dockerfile b/airbyte-workers/Dockerfile index adbdf161edcb9..05240bf9caae1 100644 --- a/airbyte-workers/Dockerfile +++ b/airbyte-workers/Dockerfile @@ -23,7 +23,7 @@ ENV APPLICATION airbyte-workers WORKDIR /app # Move worker app -ADD build/distributions/${APPLICATION}-0.30.27-alpha.tar /app +ADD build/distributions/${APPLICATION}-0.30.28-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.27-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.28-alpha/bin/${APPLICATION}"] diff --git a/charts/airbyte/Chart.yaml b/charts/airbyte/Chart.yaml index a0b4c91c94e75..533ca2a8df9b4 100644 --- a/charts/airbyte/Chart.yaml +++ b/charts/airbyte/Chart.yaml @@ -21,7 +21,7 @@ version: 0.3.0 # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "0.30.27-alpha" +appVersion: "0.30.28-alpha" dependencies: - name: common diff --git a/charts/airbyte/README.md b/charts/airbyte/README.md index 1f6f355087a8c..e62bba545b4a7 100644 --- a/charts/airbyte/README.md +++ b/charts/airbyte/README.md @@ -29,7 +29,7 @@ | `webapp.replicaCount` | Number of webapp replicas | `1` | | `webapp.image.repository` | The repository to use for the airbyte webapp image. | `airbyte/webapp` | | `webapp.image.pullPolicy` | the pull policy to use for the airbyte webapp image | `IfNotPresent` | -| `webapp.image.tag` | The airbyte webapp image tag. Defaults to the chart's AppVersion | `0.30.27-alpha` | +| `webapp.image.tag` | The airbyte webapp image tag. Defaults to the chart's AppVersion | `0.30.28-alpha` | | `webapp.podAnnotations` | Add extra annotations to the webapp pod(s) | `{}` | | `webapp.service.type` | The service type to use for the webapp service | `ClusterIP` | | `webapp.service.port` | The service port to expose the webapp on | `80` | @@ -56,7 +56,7 @@ | `scheduler.replicaCount` | Number of scheduler replicas | `1` | | `scheduler.image.repository` | The repository to use for the airbyte scheduler image. | `airbyte/scheduler` | | `scheduler.image.pullPolicy` | the pull policy to use for the airbyte scheduler image | `IfNotPresent` | -| `scheduler.image.tag` | The airbyte scheduler image tag. Defaults to the chart's AppVersion | `0.30.27-alpha` | +| `scheduler.image.tag` | The airbyte scheduler image tag. Defaults to the chart's AppVersion | `0.30.28-alpha` | | `scheduler.podAnnotations` | Add extra annotations to the scheduler pod | `{}` | | `scheduler.resources.limits` | The resources limits for the scheduler container | `{}` | | `scheduler.resources.requests` | The requested resources for the scheduler container | `{}` | @@ -87,7 +87,7 @@ | `server.replicaCount` | Number of server replicas | `1` | | `server.image.repository` | The repository to use for the airbyte server image. | `airbyte/server` | | `server.image.pullPolicy` | the pull policy to use for the airbyte server image | `IfNotPresent` | -| `server.image.tag` | The airbyte server image tag. Defaults to the chart's AppVersion | `0.30.27-alpha` | +| `server.image.tag` | The airbyte server image tag. Defaults to the chart's AppVersion | `0.30.28-alpha` | | `server.podAnnotations` | Add extra annotations to the server pod | `{}` | | `server.livenessProbe.enabled` | Enable livenessProbe on the server | `true` | | `server.livenessProbe.initialDelaySeconds` | Initial delay seconds for livenessProbe | `30` | @@ -121,7 +121,7 @@ | `worker.replicaCount` | Number of worker replicas | `1` | | `worker.image.repository` | The repository to use for the airbyte worker image. | `airbyte/worker` | | `worker.image.pullPolicy` | the pull policy to use for the airbyte worker image | `IfNotPresent` | -| `worker.image.tag` | The airbyte worker image tag. Defaults to the chart's AppVersion | `0.30.27-alpha` | +| `worker.image.tag` | The airbyte worker image tag. Defaults to the chart's AppVersion | `0.30.28-alpha` | | `worker.podAnnotations` | Add extra annotations to the worker pod(s) | `{}` | | `worker.livenessProbe.enabled` | Enable livenessProbe on the worker | `true` | | `worker.livenessProbe.initialDelaySeconds` | Initial delay seconds for livenessProbe | `30` | diff --git a/charts/airbyte/values.yaml b/charts/airbyte/values.yaml index 593def5d6fe1b..712f4ee41d84f 100644 --- a/charts/airbyte/values.yaml +++ b/charts/airbyte/values.yaml @@ -44,7 +44,7 @@ webapp: image: repository: airbyte/webapp pullPolicy: IfNotPresent - tag: 0.30.27-alpha + tag: 0.30.28-alpha ## @param webapp.podAnnotations [object] Add extra annotations to the webapp pod(s) ## @@ -141,7 +141,7 @@ scheduler: image: repository: airbyte/scheduler pullPolicy: IfNotPresent - tag: 0.30.27-alpha + tag: 0.30.28-alpha ## @param scheduler.podAnnotations [object] Add extra annotations to the scheduler pod ## @@ -248,7 +248,7 @@ server: image: repository: airbyte/server pullPolicy: IfNotPresent - tag: 0.30.27-alpha + tag: 0.30.28-alpha ## @param server.podAnnotations [object] Add extra annotations to the server pod ## @@ -360,7 +360,7 @@ worker: image: repository: airbyte/worker pullPolicy: IfNotPresent - tag: 0.30.27-alpha + tag: 0.30.28-alpha ## @param worker.podAnnotations [object] Add extra annotations to the worker pod(s) ## diff --git a/docs/operator-guides/upgrading-airbyte.md b/docs/operator-guides/upgrading-airbyte.md index 953feb8df7121..45743b3e884fd 100644 --- a/docs/operator-guides/upgrading-airbyte.md +++ b/docs/operator-guides/upgrading-airbyte.md @@ -82,7 +82,7 @@ If you are upgrading from \(i.e. your current version of Airbyte is\) Airbyte ve Here's an example of what it might look like with the values filled in. It assumes that the downloaded `airbyte_archive.tar.gz` is in `/tmp`. ```bash - docker run --rm -v /tmp:/config airbyte/migration:0.30.27-alpha --\ + docker run --rm -v /tmp:/config airbyte/migration:0.30.28-alpha --\ --input /config/airbyte_archive.tar.gz\ --output /config/airbyte_archive_migrated.tar.gz ``` diff --git a/kube/overlays/stable-with-resource-limits/.env b/kube/overlays/stable-with-resource-limits/.env index b008ae065197d..b07e82a9d35f7 100644 --- a/kube/overlays/stable-with-resource-limits/.env +++ b/kube/overlays/stable-with-resource-limits/.env @@ -1,4 +1,4 @@ -AIRBYTE_VERSION=0.30.27-alpha +AIRBYTE_VERSION=0.30.28-alpha # Airbyte Internal Database, see https://docs.airbyte.io/operator-guides/configuring-airbyte-db DATABASE_HOST=airbyte-db-svc diff --git a/kube/overlays/stable-with-resource-limits/kustomization.yaml b/kube/overlays/stable-with-resource-limits/kustomization.yaml index 4dfe2c2ed9e9b..5e082c7cc9d41 100644 --- a/kube/overlays/stable-with-resource-limits/kustomization.yaml +++ b/kube/overlays/stable-with-resource-limits/kustomization.yaml @@ -8,15 +8,15 @@ bases: images: - name: airbyte/db - newTag: 0.30.27-alpha + newTag: 0.30.28-alpha - name: airbyte/scheduler - newTag: 0.30.27-alpha + newTag: 0.30.28-alpha - name: airbyte/server - newTag: 0.30.27-alpha + newTag: 0.30.28-alpha - name: airbyte/webapp - newTag: 0.30.27-alpha + newTag: 0.30.28-alpha - name: airbyte/worker - newTag: 0.30.27-alpha + newTag: 0.30.28-alpha - name: temporalio/auto-setup newTag: 1.7.0 diff --git a/kube/overlays/stable/.env b/kube/overlays/stable/.env index b008ae065197d..b07e82a9d35f7 100644 --- a/kube/overlays/stable/.env +++ b/kube/overlays/stable/.env @@ -1,4 +1,4 @@ -AIRBYTE_VERSION=0.30.27-alpha +AIRBYTE_VERSION=0.30.28-alpha # Airbyte Internal Database, see https://docs.airbyte.io/operator-guides/configuring-airbyte-db DATABASE_HOST=airbyte-db-svc diff --git a/kube/overlays/stable/kustomization.yaml b/kube/overlays/stable/kustomization.yaml index 25a57a2eedbe4..76298e22daee5 100644 --- a/kube/overlays/stable/kustomization.yaml +++ b/kube/overlays/stable/kustomization.yaml @@ -8,15 +8,15 @@ bases: images: - name: airbyte/db - newTag: 0.30.27-alpha + newTag: 0.30.28-alpha - name: airbyte/scheduler - newTag: 0.30.27-alpha + newTag: 0.30.28-alpha - name: airbyte/server - newTag: 0.30.27-alpha + newTag: 0.30.28-alpha - name: airbyte/webapp - newTag: 0.30.27-alpha + newTag: 0.30.28-alpha - name: airbyte/worker - newTag: 0.30.27-alpha + newTag: 0.30.28-alpha - name: temporalio/auto-setup newTag: 1.7.0 From b6a93c4e09d8d091d2e0744b2006558dcae3dd00 Mon Sep 17 00:00:00 2001 From: LiRen Tu Date: Wed, 3 Nov 2021 20:34:08 -0700 Subject: [PATCH 38/83] Update Databricks doc (#7618) --- docs/integrations/destinations/databricks.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/integrations/destinations/databricks.md b/docs/integrations/destinations/databricks.md index 761a6eb4c9719..e14fd99fe7fde 100644 --- a/docs/integrations/destinations/databricks.md +++ b/docs/integrations/destinations/databricks.md @@ -103,7 +103,6 @@ Under the hood, an Airbyte data stream in Json schema is first converted to an A | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | -| 0.1.2 | 2021-10-25 | [\#7288](https://github.com/airbytehq/airbyte/issues/7288) | Support Json `additionalProperties`. | +| 0.1.2 | 2021-11-03 | [\#7288](https://github.com/airbytehq/airbyte/issues/7288) | Support Json `additionalProperties`. | | 0.1.1 | 2021-10-05 | [\#6792](https://github.com/airbytehq/airbyte/pull/6792) | Require users to accept Databricks JDBC Driver [Terms & Conditions](https://databricks.com/jdbc-odbc-driver-license). | | 0.1.0 | 2021-09-14 | [\#5998](https://github.com/airbytehq/airbyte/pull/5998) | Initial private release. | - From 9ef8626639ba7b24d650a9da3f3512df3424aaca Mon Sep 17 00:00:00 2001 From: "Sherif A. Nada" Date: Wed, 3 Nov 2021 22:22:49 -0700 Subject: [PATCH 39/83] =?UTF-8?q?=F0=9F=90=9BSource=20Iterable:=20Fix=20da?= =?UTF-8?q?te=20parsing=20(#7619)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../src/main/resources/seed/source_definitions.yaml | 2 +- .../bases/source-acceptance-test/CHANGELOG.md | 3 +++ .../bases/source-acceptance-test/Dockerfile | 2 +- .../source_acceptance_test/tests/test_incremental.py | 10 +++++----- .../unit_tests/test_json_schema_helper.py | 6 +++--- .../connectors/source-iterable/Dockerfile | 2 +- .../source-iterable/acceptance-test-docker.sh | 2 +- .../integration_tests/configured_catalog.json | 11 +++++++++++ .../connectors/source-iterable/source_iterable/api.py | 9 +++------ .../source_iterable/schemas/templates.json | 3 ++- docs/integrations/sources/iterable.md | 1 + 11 files changed, 32 insertions(+), 19 deletions(-) diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 1ca547738bf99..9d64c0b78dbc2 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -262,7 +262,7 @@ - name: Iterable sourceDefinitionId: 2e875208-0c0b-4ee4-9e92-1cb3156ea799 dockerRepository: airbyte/source-iterable - dockerImageTag: 0.1.10 + dockerImageTag: 0.1.11 documentationUrl: https://docs.airbyte.io/integrations/sources/iterable sourceType: api - name: Jira diff --git a/airbyte-integrations/bases/source-acceptance-test/CHANGELOG.md b/airbyte-integrations/bases/source-acceptance-test/CHANGELOG.md index 6d6729b3f611a..c47226fda6e9b 100644 --- a/airbyte-integrations/bases/source-acceptance-test/CHANGELOG.md +++ b/airbyte-integrations/bases/source-acceptance-test/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +## 0.1.28 +Print stream name when incremental sync tests fail + ## 0.1.27 Add ignored fields for full refresh test (unit tests) diff --git a/airbyte-integrations/bases/source-acceptance-test/Dockerfile b/airbyte-integrations/bases/source-acceptance-test/Dockerfile index f13ecdebf4ed0..22f70e298ff9a 100644 --- a/airbyte-integrations/bases/source-acceptance-test/Dockerfile +++ b/airbyte-integrations/bases/source-acceptance-test/Dockerfile @@ -9,7 +9,7 @@ COPY setup.py ./ COPY pytest.ini ./ RUN pip install . -LABEL io.airbyte.version=0.1.27 +LABEL io.airbyte.version=0.1.28 LABEL io.airbyte.name=airbyte/source-acceptance-test ENTRYPOINT ["python", "-m", "pytest", "-p", "source_acceptance_test.plugin"] diff --git a/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/tests/test_incremental.py b/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/tests/test_incremental.py index e7fdf1cf2ca0b..dc9db26749c95 100644 --- a/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/tests/test_incremental.py +++ b/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/tests/test_incremental.py @@ -73,7 +73,7 @@ def records_with_state(records, state, stream_mapping, state_cursor_paths) -> It except KeyError: # try second time as an absolute path in state file (i.e. bookmarks -> stream_name -> column -> value) state_value = cursor_field.parse(record=state, path=state_cursor_paths[stream_name]) - yield record_value, state_value + yield record_value, state_value, stream_name @pytest.mark.default_timeout(20 * 60) @@ -89,18 +89,18 @@ def test_two_sequential_reads(self, connector_config, configured_catalog_for_inc assert records_1, "Should produce at least one record" latest_state = states_1[-1].state.data - for record_value, state_value in records_with_state(records_1, latest_state, stream_mapping, cursor_paths): + for record_value, state_value, stream_name in records_with_state(records_1, latest_state, stream_mapping, cursor_paths): assert ( record_value <= state_value - ), "First incremental sync should produce records younger or equal to cursor value from the state" + ), f"First incremental sync should produce records younger or equal to cursor value from the state. Stream: {stream_name}" output = docker_runner.call_read_with_state(connector_config, configured_catalog_for_incremental, state=latest_state) records_2 = filter_output(output, type_=Type.RECORD) - for record_value, state_value in records_with_state(records_2, latest_state, stream_mapping, cursor_paths): + for record_value, state_value, stream_name in records_with_state(records_2, latest_state, stream_mapping, cursor_paths): assert ( record_value >= state_value - ), "Second incremental sync should produce records older or equal to cursor value from the state" + ), f"Second incremental sync should produce records older or equal to cursor value from the state. Stream: {stream_name}" def test_state_with_abnormally_large_values(self, connector_config, configured_catalog, future_state, docker_runner: ConnectorRunner): configured_catalog = incremental_only_catalog(configured_catalog) diff --git a/airbyte-integrations/bases/source-acceptance-test/unit_tests/test_json_schema_helper.py b/airbyte-integrations/bases/source-acceptance-test/unit_tests/test_json_schema_helper.py index 3a9a433704d5c..11478abed4852 100644 --- a/airbyte-integrations/bases/source-acceptance-test/unit_tests/test_json_schema_helper.py +++ b/airbyte-integrations/bases/source-acceptance-test/unit_tests/test_json_schema_helper.py @@ -85,7 +85,7 @@ def test_simple_path(records, stream_mapping, simple_state): paths = {"my_stream": ["id"]} result = records_with_state(records=records, state=simple_state, stream_mapping=stream_mapping, state_cursor_paths=paths) - record_value, state_value = next(result) + record_value, state_value, stream_name = next(result) assert record_value == 1, "record value must be correctly found" assert state_value == 11, "state value must be correctly found" @@ -96,7 +96,7 @@ def test_nested_path(records, stream_mapping, nested_state): paths = {"my_stream": ["some_account_id", "ts_updated"]} result = records_with_state(records=records, state=nested_state, stream_mapping=stream_mapping, state_cursor_paths=paths) - record_value, state_value = next(result) + record_value, state_value, stream_name = next(result) assert record_value == pendulum.datetime(2015, 5, 1), "record value must be correctly found" assert state_value == pendulum.datetime(2015, 1, 1, 22, 3, 11), "state value must be correctly found" @@ -116,7 +116,7 @@ def test_absolute_path(records, stream_mapping, singer_state): paths = {"my_stream": ["bookmarks", "my_stream", "ts_created"]} result = records_with_state(records=records, state=singer_state, stream_mapping=stream_mapping, state_cursor_paths=paths) - record_value, state_value = next(result) + record_value, state_value, stream_name = next(result) assert record_value == pendulum.datetime(2015, 11, 1, 22, 3, 11), "record value must be correctly found" assert state_value == pendulum.datetime(2014, 1, 1, 22, 3, 11), "state value must be correctly found" diff --git a/airbyte-integrations/connectors/source-iterable/Dockerfile b/airbyte-integrations/connectors/source-iterable/Dockerfile index 39127abee7bcb..b24c983735c7c 100644 --- a/airbyte-integrations/connectors/source-iterable/Dockerfile +++ b/airbyte-integrations/connectors/source-iterable/Dockerfile @@ -12,5 +12,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.10 +LABEL io.airbyte.version=0.1.11 LABEL io.airbyte.name=airbyte/source-iterable diff --git a/airbyte-integrations/connectors/source-iterable/acceptance-test-docker.sh b/airbyte-integrations/connectors/source-iterable/acceptance-test-docker.sh index c522eebbd94e8..4ceedd9e7ba03 100755 --- a/airbyte-integrations/connectors/source-iterable/acceptance-test-docker.sh +++ b/airbyte-integrations/connectors/source-iterable/acceptance-test-docker.sh @@ -1,7 +1,7 @@ #!/usr/bin/env sh # Build latest connector image -docker build . -t $(cat acceptance-test-config.yml | grep "connector_image" | head -n 1 | cut -d: -f2) +docker build . -t $(cat acceptance-test-config.yml | grep "connector_image" | head -n 1 | cut -d: -f2):dev # Pull latest acctest image docker pull airbyte/source-acceptance-test:latest diff --git a/airbyte-integrations/connectors/source-iterable/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/source-iterable/integration_tests/configured_catalog.json index a6392effd040c..e4a8426cc2d24 100644 --- a/airbyte-integrations/connectors/source-iterable/integration_tests/configured_catalog.json +++ b/airbyte-integrations/connectors/source-iterable/integration_tests/configured_catalog.json @@ -170,6 +170,17 @@ }, "sync_mode": "incremental", "destination_sync_mode": "append" + }, + { + "stream": { + "name": "templates", + "json_schema": {}, + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": true, + "default_cursor_field": ["createdAt"] + }, + "sync_mode": "incremental", + "destination_sync_mode": "append" } ] } diff --git a/airbyte-integrations/connectors/source-iterable/source_iterable/api.py b/airbyte-integrations/connectors/source-iterable/source_iterable/api.py index 82a7074f42f2a..d3d9eb77870d5 100755 --- a/airbyte-integrations/connectors/source-iterable/source_iterable/api.py +++ b/airbyte-integrations/connectors/source-iterable/source_iterable/api.py @@ -88,12 +88,8 @@ def get_updated_state(self, current_stream_state: MutableMapping[str, Any], late """ latest_benchmark = latest_record[self.cursor_field] if current_stream_state.get(self.cursor_field): - return { - self.cursor_field: max( - latest_benchmark, self._field_to_datetime(current_stream_state[self.cursor_field]) - ).to_datetime_string() - } - return {self.cursor_field: latest_benchmark.to_datetime_string()} + return {self.cursor_field: str(max(latest_benchmark, self._field_to_datetime(current_stream_state[self.cursor_field])))} + return {self.cursor_field: str(latest_benchmark)} def request_params(self, stream_state: Mapping[str, Any], **kwargs) -> MutableMapping[str, Any]: @@ -362,6 +358,7 @@ def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapp records = response_json.get(self.data_field, []) for record in records: + record[self.cursor_field] = self._field_to_datetime(record[self.cursor_field]) yield record diff --git a/airbyte-integrations/connectors/source-iterable/source_iterable/schemas/templates.json b/airbyte-integrations/connectors/source-iterable/source_iterable/schemas/templates.json index 8ea961911c394..b4ad623658b4a 100644 --- a/airbyte-integrations/connectors/source-iterable/source_iterable/schemas/templates.json +++ b/airbyte-integrations/connectors/source-iterable/source_iterable/schemas/templates.json @@ -4,7 +4,8 @@ "type": ["null", "number"] }, "createdAt": { - "type": ["null", "integer"] + "type": ["null", "string"], + "format": "date-time" }, "updatedAt": { "type": ["null", "integer"] diff --git a/docs/integrations/sources/iterable.md b/docs/integrations/sources/iterable.md index 8310d6cf75f52..db20b0671f304 100644 --- a/docs/integrations/sources/iterable.md +++ b/docs/integrations/sources/iterable.md @@ -58,6 +58,7 @@ Please read [How to find your API key](https://support.iterable.com/hc/en-us/art | Version | Date | Pull Request | Subject | | :------ | :-------- | :----- | :------ | +| `0.1.11` | 2021-11-03 | [7619](https://github.com/airbytehq/airbyte/pull/7619) | Bugfix type error while incrementally loading the `Templates` stream | | `0.1.10` | 2021-11-03 | [7591](https://github.com/airbytehq/airbyte/pull/7591) | Optimize export streams memory consumption for large requests | | `0.1.9` | 2021-10-06 | [5915](https://github.com/airbytehq/airbyte/pull/5915) | Enable campaign_metrics stream | | `0.1.8` | 2021-09-20 | [5915](https://github.com/airbytehq/airbyte/pull/5915) | Add new streams: campaign_metrics, events | From 92dbbfe9add528912ed963e2c8d86609035e25a8 Mon Sep 17 00:00:00 2001 From: Jared Rhizor Date: Thu, 4 Nov 2021 00:49:07 -0700 Subject: [PATCH 40/83] Bump Airbyte version from 0.30.28-alpha to 0.30.29-alpha (#7621) Co-authored-by: sherifnada --- .bumpversion.cfg | 2 +- .env | 2 +- airbyte-scheduler/app/Dockerfile | 4 ++-- airbyte-server/Dockerfile | 4 ++-- airbyte-webapp/package-lock.json | 2 +- airbyte-webapp/package.json | 2 +- airbyte-workers/Dockerfile | 4 ++-- charts/airbyte/Chart.yaml | 2 +- charts/airbyte/README.md | 8 ++++---- charts/airbyte/values.yaml | 8 ++++---- docs/operator-guides/upgrading-airbyte.md | 2 +- kube/overlays/stable-with-resource-limits/.env | 2 +- .../stable-with-resource-limits/kustomization.yaml | 10 +++++----- kube/overlays/stable/.env | 2 +- kube/overlays/stable/kustomization.yaml | 10 +++++----- 15 files changed, 32 insertions(+), 32 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index c279a1c11ebff..3b662d499d6aa 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.30.28-alpha +current_version = 0.30.29-alpha commit = False tag = False parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-[a-z]+)? diff --git a/.env b/.env index 67b26a4a6709a..5bb32384124ce 100644 --- a/.env +++ b/.env @@ -1,4 +1,4 @@ -VERSION=0.30.28-alpha +VERSION=0.30.29-alpha # Airbyte Internal Job Database, see https://docs.airbyte.io/operator-guides/configuring-airbyte-db DATABASE_USER=docker diff --git a/airbyte-scheduler/app/Dockerfile b/airbyte-scheduler/app/Dockerfile index e9e582691cb83..c6acbf82d89fa 100644 --- a/airbyte-scheduler/app/Dockerfile +++ b/airbyte-scheduler/app/Dockerfile @@ -5,7 +5,7 @@ ENV APPLICATION airbyte-scheduler WORKDIR /app -ADD build/distributions/${APPLICATION}-0.30.28-alpha.tar /app +ADD build/distributions/${APPLICATION}-0.30.29-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.28-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.29-alpha/bin/${APPLICATION}"] diff --git a/airbyte-server/Dockerfile b/airbyte-server/Dockerfile index 8e784da7fdfbd..00be03200c18b 100644 --- a/airbyte-server/Dockerfile +++ b/airbyte-server/Dockerfile @@ -7,7 +7,7 @@ ENV APPLICATION airbyte-server WORKDIR /app -ADD build/distributions/${APPLICATION}-0.30.28-alpha.tar /app +ADD build/distributions/${APPLICATION}-0.30.29-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.28-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.29-alpha/bin/${APPLICATION}"] diff --git a/airbyte-webapp/package-lock.json b/airbyte-webapp/package-lock.json index 297a515acd143..528b169f6436b 100644 --- a/airbyte-webapp/package-lock.json +++ b/airbyte-webapp/package-lock.json @@ -1,6 +1,6 @@ { "name": "airbyte-webapp", - "version": "0.30.28-alpha", + "version": "0.30.29-alpha", "lockfileVersion": 1, "requires": true, "dependencies": { diff --git a/airbyte-webapp/package.json b/airbyte-webapp/package.json index 9716c152fabeb..d751f8f2e8b86 100644 --- a/airbyte-webapp/package.json +++ b/airbyte-webapp/package.json @@ -1,6 +1,6 @@ { "name": "airbyte-webapp", - "version": "0.30.28-alpha", + "version": "0.30.29-alpha", "private": true, "scripts": { "start": "react-scripts start", diff --git a/airbyte-workers/Dockerfile b/airbyte-workers/Dockerfile index 05240bf9caae1..6eb10a69a08fe 100644 --- a/airbyte-workers/Dockerfile +++ b/airbyte-workers/Dockerfile @@ -23,7 +23,7 @@ ENV APPLICATION airbyte-workers WORKDIR /app # Move worker app -ADD build/distributions/${APPLICATION}-0.30.28-alpha.tar /app +ADD build/distributions/${APPLICATION}-0.30.29-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.28-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.29-alpha/bin/${APPLICATION}"] diff --git a/charts/airbyte/Chart.yaml b/charts/airbyte/Chart.yaml index 533ca2a8df9b4..ce83ad1ee5892 100644 --- a/charts/airbyte/Chart.yaml +++ b/charts/airbyte/Chart.yaml @@ -21,7 +21,7 @@ version: 0.3.0 # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "0.30.28-alpha" +appVersion: "0.30.29-alpha" dependencies: - name: common diff --git a/charts/airbyte/README.md b/charts/airbyte/README.md index e62bba545b4a7..f0b7f5ebe14f2 100644 --- a/charts/airbyte/README.md +++ b/charts/airbyte/README.md @@ -29,7 +29,7 @@ | `webapp.replicaCount` | Number of webapp replicas | `1` | | `webapp.image.repository` | The repository to use for the airbyte webapp image. | `airbyte/webapp` | | `webapp.image.pullPolicy` | the pull policy to use for the airbyte webapp image | `IfNotPresent` | -| `webapp.image.tag` | The airbyte webapp image tag. Defaults to the chart's AppVersion | `0.30.28-alpha` | +| `webapp.image.tag` | The airbyte webapp image tag. Defaults to the chart's AppVersion | `0.30.29-alpha` | | `webapp.podAnnotations` | Add extra annotations to the webapp pod(s) | `{}` | | `webapp.service.type` | The service type to use for the webapp service | `ClusterIP` | | `webapp.service.port` | The service port to expose the webapp on | `80` | @@ -56,7 +56,7 @@ | `scheduler.replicaCount` | Number of scheduler replicas | `1` | | `scheduler.image.repository` | The repository to use for the airbyte scheduler image. | `airbyte/scheduler` | | `scheduler.image.pullPolicy` | the pull policy to use for the airbyte scheduler image | `IfNotPresent` | -| `scheduler.image.tag` | The airbyte scheduler image tag. Defaults to the chart's AppVersion | `0.30.28-alpha` | +| `scheduler.image.tag` | The airbyte scheduler image tag. Defaults to the chart's AppVersion | `0.30.29-alpha` | | `scheduler.podAnnotations` | Add extra annotations to the scheduler pod | `{}` | | `scheduler.resources.limits` | The resources limits for the scheduler container | `{}` | | `scheduler.resources.requests` | The requested resources for the scheduler container | `{}` | @@ -87,7 +87,7 @@ | `server.replicaCount` | Number of server replicas | `1` | | `server.image.repository` | The repository to use for the airbyte server image. | `airbyte/server` | | `server.image.pullPolicy` | the pull policy to use for the airbyte server image | `IfNotPresent` | -| `server.image.tag` | The airbyte server image tag. Defaults to the chart's AppVersion | `0.30.28-alpha` | +| `server.image.tag` | The airbyte server image tag. Defaults to the chart's AppVersion | `0.30.29-alpha` | | `server.podAnnotations` | Add extra annotations to the server pod | `{}` | | `server.livenessProbe.enabled` | Enable livenessProbe on the server | `true` | | `server.livenessProbe.initialDelaySeconds` | Initial delay seconds for livenessProbe | `30` | @@ -121,7 +121,7 @@ | `worker.replicaCount` | Number of worker replicas | `1` | | `worker.image.repository` | The repository to use for the airbyte worker image. | `airbyte/worker` | | `worker.image.pullPolicy` | the pull policy to use for the airbyte worker image | `IfNotPresent` | -| `worker.image.tag` | The airbyte worker image tag. Defaults to the chart's AppVersion | `0.30.28-alpha` | +| `worker.image.tag` | The airbyte worker image tag. Defaults to the chart's AppVersion | `0.30.29-alpha` | | `worker.podAnnotations` | Add extra annotations to the worker pod(s) | `{}` | | `worker.livenessProbe.enabled` | Enable livenessProbe on the worker | `true` | | `worker.livenessProbe.initialDelaySeconds` | Initial delay seconds for livenessProbe | `30` | diff --git a/charts/airbyte/values.yaml b/charts/airbyte/values.yaml index 712f4ee41d84f..40cf37e92149e 100644 --- a/charts/airbyte/values.yaml +++ b/charts/airbyte/values.yaml @@ -44,7 +44,7 @@ webapp: image: repository: airbyte/webapp pullPolicy: IfNotPresent - tag: 0.30.28-alpha + tag: 0.30.29-alpha ## @param webapp.podAnnotations [object] Add extra annotations to the webapp pod(s) ## @@ -141,7 +141,7 @@ scheduler: image: repository: airbyte/scheduler pullPolicy: IfNotPresent - tag: 0.30.28-alpha + tag: 0.30.29-alpha ## @param scheduler.podAnnotations [object] Add extra annotations to the scheduler pod ## @@ -248,7 +248,7 @@ server: image: repository: airbyte/server pullPolicy: IfNotPresent - tag: 0.30.28-alpha + tag: 0.30.29-alpha ## @param server.podAnnotations [object] Add extra annotations to the server pod ## @@ -360,7 +360,7 @@ worker: image: repository: airbyte/worker pullPolicy: IfNotPresent - tag: 0.30.28-alpha + tag: 0.30.29-alpha ## @param worker.podAnnotations [object] Add extra annotations to the worker pod(s) ## diff --git a/docs/operator-guides/upgrading-airbyte.md b/docs/operator-guides/upgrading-airbyte.md index 45743b3e884fd..709c3dda667a1 100644 --- a/docs/operator-guides/upgrading-airbyte.md +++ b/docs/operator-guides/upgrading-airbyte.md @@ -82,7 +82,7 @@ If you are upgrading from \(i.e. your current version of Airbyte is\) Airbyte ve Here's an example of what it might look like with the values filled in. It assumes that the downloaded `airbyte_archive.tar.gz` is in `/tmp`. ```bash - docker run --rm -v /tmp:/config airbyte/migration:0.30.28-alpha --\ + docker run --rm -v /tmp:/config airbyte/migration:0.30.29-alpha --\ --input /config/airbyte_archive.tar.gz\ --output /config/airbyte_archive_migrated.tar.gz ``` diff --git a/kube/overlays/stable-with-resource-limits/.env b/kube/overlays/stable-with-resource-limits/.env index b07e82a9d35f7..263474442545d 100644 --- a/kube/overlays/stable-with-resource-limits/.env +++ b/kube/overlays/stable-with-resource-limits/.env @@ -1,4 +1,4 @@ -AIRBYTE_VERSION=0.30.28-alpha +AIRBYTE_VERSION=0.30.29-alpha # Airbyte Internal Database, see https://docs.airbyte.io/operator-guides/configuring-airbyte-db DATABASE_HOST=airbyte-db-svc diff --git a/kube/overlays/stable-with-resource-limits/kustomization.yaml b/kube/overlays/stable-with-resource-limits/kustomization.yaml index 5e082c7cc9d41..2588aa9fb49ef 100644 --- a/kube/overlays/stable-with-resource-limits/kustomization.yaml +++ b/kube/overlays/stable-with-resource-limits/kustomization.yaml @@ -8,15 +8,15 @@ bases: images: - name: airbyte/db - newTag: 0.30.28-alpha + newTag: 0.30.29-alpha - name: airbyte/scheduler - newTag: 0.30.28-alpha + newTag: 0.30.29-alpha - name: airbyte/server - newTag: 0.30.28-alpha + newTag: 0.30.29-alpha - name: airbyte/webapp - newTag: 0.30.28-alpha + newTag: 0.30.29-alpha - name: airbyte/worker - newTag: 0.30.28-alpha + newTag: 0.30.29-alpha - name: temporalio/auto-setup newTag: 1.7.0 diff --git a/kube/overlays/stable/.env b/kube/overlays/stable/.env index b07e82a9d35f7..263474442545d 100644 --- a/kube/overlays/stable/.env +++ b/kube/overlays/stable/.env @@ -1,4 +1,4 @@ -AIRBYTE_VERSION=0.30.28-alpha +AIRBYTE_VERSION=0.30.29-alpha # Airbyte Internal Database, see https://docs.airbyte.io/operator-guides/configuring-airbyte-db DATABASE_HOST=airbyte-db-svc diff --git a/kube/overlays/stable/kustomization.yaml b/kube/overlays/stable/kustomization.yaml index 76298e22daee5..2c625632bf39a 100644 --- a/kube/overlays/stable/kustomization.yaml +++ b/kube/overlays/stable/kustomization.yaml @@ -8,15 +8,15 @@ bases: images: - name: airbyte/db - newTag: 0.30.28-alpha + newTag: 0.30.29-alpha - name: airbyte/scheduler - newTag: 0.30.28-alpha + newTag: 0.30.29-alpha - name: airbyte/server - newTag: 0.30.28-alpha + newTag: 0.30.29-alpha - name: airbyte/webapp - newTag: 0.30.28-alpha + newTag: 0.30.29-alpha - name: airbyte/worker - newTag: 0.30.28-alpha + newTag: 0.30.29-alpha - name: temporalio/auto-setup newTag: 1.7.0 From a377ac734094c97f32ca450386312af84234496e Mon Sep 17 00:00:00 2001 From: Dmytro Date: Thu, 4 Nov 2021 10:03:32 +0200 Subject: [PATCH 41/83] Facebook pages: generate Page token (#7440) --- .../010eb12f-837b-4685-892d-0a39f76a98f5.json | 2 +- .../resources/seed/source_definitions.yaml | 2 +- .../tests/test_full_refresh.py | 5 +++++ .../source-facebook-pages/Dockerfile | 2 +- .../acceptance-test-config.yml | 7 ++++++ .../source_facebook_pages/source.py | 22 ++++++++++++++++--- .../source_facebook_pages/spec.json | 4 +++- docs/integrations/sources/facebook-pages.md | 1 + 8 files changed, 38 insertions(+), 7 deletions(-) diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/010eb12f-837b-4685-892d-0a39f76a98f5.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/010eb12f-837b-4685-892d-0a39f76a98f5.json index 701207e5790ff..58ceb7512f0c9 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/010eb12f-837b-4685-892d-0a39f76a98f5.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/010eb12f-837b-4685-892d-0a39f76a98f5.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "010eb12f-837b-4685-892d-0a39f76a98f5", "name": "Facebook Pages", "dockerRepository": "airbyte/source-facebook-pages", - "dockerImageTag": "0.1.2", + "dockerImageTag": "0.1.3", "documentationUrl": "https://hub.docker.com/r/airbyte/source-facebook-pages", "icon": "facebook.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 9d64c0b78dbc2..1cd4629c6152e 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -138,7 +138,7 @@ - name: Facebook Pages sourceDefinitionId: 010eb12f-837b-4685-892d-0a39f76a98f5 dockerRepository: airbyte/source-facebook-pages - dockerImageTag: 0.1.2 + dockerImageTag: 0.1.3 documentationUrl: https://hub.docker.com/r/airbyte/source-facebook-pages icon: facebook.svg sourceType: api diff --git a/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/tests/test_full_refresh.py b/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/tests/test_full_refresh.py index 1eaae862cf8f6..1d78264ef0261 100644 --- a/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/tests/test_full_refresh.py +++ b/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/tests/test_full_refresh.py @@ -45,5 +45,10 @@ def test_sequential_reads( if output_diff: msg = f"{stream}: the two sequential reads should produce either equal set of records or one of them is a strict subset of the other" detailed_logger.info(msg) + detailed_logger.info("First read") + detailed_logger.log_json_list(stream_records_1) + detailed_logger.info("Second read") + detailed_logger.log_json_list(stream_records_2) + detailed_logger.info("Difference") detailed_logger.log_json_list(output_diff) pytest.fail(msg) diff --git a/airbyte-integrations/connectors/source-facebook-pages/Dockerfile b/airbyte-integrations/connectors/source-facebook-pages/Dockerfile index ae7872f9c6ea5..58db26cc4f3be 100644 --- a/airbyte-integrations/connectors/source-facebook-pages/Dockerfile +++ b/airbyte-integrations/connectors/source-facebook-pages/Dockerfile @@ -12,5 +12,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.2 +LABEL io.airbyte.version=0.1.3 LABEL io.airbyte.name=airbyte/source-facebook-pages diff --git a/airbyte-integrations/connectors/source-facebook-pages/acceptance-test-config.yml b/airbyte-integrations/connectors/source-facebook-pages/acceptance-test-config.yml index 88c7f8e4f8fac..f78b8aefb46a3 100644 --- a/airbyte-integrations/connectors/source-facebook-pages/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-facebook-pages/acceptance-test-config.yml @@ -18,3 +18,10 @@ tests: full_refresh: - config_path: "secrets/config.json" configured_catalog_path: "integration_tests/configured_catalog.json" + ignored_fields: + # Since we generating new Page access token for each connector run this + # field would containt different values. + page: + - "posts/paging/next" + - "published_posts/paging/next" + - "feed/paging/next" diff --git a/airbyte-integrations/connectors/source-facebook-pages/source_facebook_pages/source.py b/airbyte-integrations/connectors/source-facebook-pages/source_facebook_pages/source.py index 0a0c670d7d6e3..de2ddc87497f1 100755 --- a/airbyte-integrations/connectors/source-facebook-pages/source_facebook_pages/source.py +++ b/airbyte-integrations/connectors/source-facebook-pages/source_facebook_pages/source.py @@ -5,6 +5,7 @@ from typing import Any, List, Mapping, Tuple +import requests from airbyte_cdk.logger import AirbyteLogger from airbyte_cdk.models import SyncMode from airbyte_cdk.sources import AbstractSource @@ -19,17 +20,32 @@ def check_connection(self, logger: AirbyteLogger, config: Mapping[str, Any]) -> error_msg = None try: - _ = list(Page(access_token=config["access_token"], page_id=config["page_id"]).read_records(sync_mode=SyncMode.full_refresh)) + access_token, page_id = config["access_token"], config["page_id"] + access_token = self.generate_page_access_token(page_id, access_token) + _ = list(Page(access_token=access_token, page_id=page_id).read_records(sync_mode=SyncMode.full_refresh)) ok = True except Exception as e: error_msg = repr(e) return ok, error_msg + @staticmethod + def generate_page_access_token(page_id: str, access_token: str) -> str: + # We are expecting to receive User access token from config. To access + # Pages API we need to generate Page access token. Page access tokens + # can be generated from another Page access token (with the same page ID) + # so if user manually set Page access token instead of User access + # token it would be no problem unless it has wrong page ID. + # https://developers.facebook.com/docs/pages/access-tokens#get-a-page-access-token + r = requests.get(f"https://graph.facebook.com/{page_id}", params={"fields": "access_token", "access_token": access_token}) + return r.json()["access_token"] + def streams(self, config: Mapping[str, Any]) -> List[Stream]: + access_token, page_id = config["access_token"], config["page_id"] + access_token = self.generate_page_access_token(page_id, access_token) stream_kwargs = { - "access_token": config["access_token"], - "page_id": config["page_id"], + "access_token": access_token, + "page_id": page_id, } streams = [ diff --git a/airbyte-integrations/connectors/source-facebook-pages/source_facebook_pages/spec.json b/airbyte-integrations/connectors/source-facebook-pages/source_facebook_pages/spec.json index a8745f64b8308..0320a9f340f0f 100755 --- a/airbyte-integrations/connectors/source-facebook-pages/source_facebook_pages/spec.json +++ b/airbyte-integrations/connectors/source-facebook-pages/source_facebook_pages/spec.json @@ -5,15 +5,17 @@ "title": "Facebook Pages Spec", "type": "object", "required": ["access_token", "page_id"], - "additionalProperties": false, + "additionalProperties": true, "properties": { "access_token": { "type": "string", + "title": "Page Access Token", "description": "Facebook Page Access Token", "airbyte_secret": true }, "page_id": { "type": "string", + "title": "Page ID", "description": "Page ID" } } diff --git a/docs/integrations/sources/facebook-pages.md b/docs/integrations/sources/facebook-pages.md index 4a494e6d826b3..4336e4895255d 100644 --- a/docs/integrations/sources/facebook-pages.md +++ b/docs/integrations/sources/facebook-pages.md @@ -83,6 +83,7 @@ You can easily get the page id from the page url. For example, if you have a pag | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.1.3 | 2021-10-28 | [7440](https://github.com/airbytehq/airbyte/pull/7440) | Generate Page token from config access token | | 0.1.2 | 2021-10-18 | [7128](https://github.com/airbytehq/airbyte/pull/7128) | Upgrade Facebook API to v.12 | | 0.1.1 | 2021-09-30 | [6438](https://github.com/airbytehq/airbyte/pull/6438) | Annotate Oauth2 flow initialization parameters in connector specification | | 0.1.0 | 2021-09-01 | [5158](https://github.com/airbytehq/airbyte/pull/5158) | Initial Release | From 7c3b7ef48943f85fa3330fcc2c4877bad1c05343 Mon Sep 17 00:00:00 2001 From: Alexander Tsukanov Date: Thu, 4 Nov 2021 12:18:44 +0200 Subject: [PATCH 42/83] =?UTF-8?q?=F0=9F=90=9B=20Source=20MySQL:=20Value=20?= =?UTF-8?q?'65535'=20is=20outside=20valid=20range=20for=20type=20(#7559)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * BUG-5842: Mysql Value '65535' is outside valid range for type java.lang.Short * BUG-5842: Bumped versions of connector. * BUG-5842: Fixed unit-test for mysql-encrypt * Update docs/integrations/sources/mysql.md Co-authored-by: Sherif A. Nada Co-authored-by: Sherif A. Nada --- .../src/main/resources/seed/source_definitions.yaml | 2 +- .../java/io/airbyte/db/jdbc/JdbcSourceOperations.java | 11 ++++++++++- .../connectors/source-mysql-strict-encrypt/Dockerfile | 2 +- .../src/test/resources/expected_spec.json | 2 +- .../connectors/source-mysql/Dockerfile | 2 +- .../source/mysql/MySqlSourceDatatypeTest.java | 10 ++++++++++ docs/integrations/sources/mysql.md | 1 + 7 files changed, 25 insertions(+), 5 deletions(-) diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 1cd4629c6152e..ef2b67389ff81 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -348,7 +348,7 @@ - name: MySQL sourceDefinitionId: 435bb9a5-7887-4809-aa58-28c27df0d7ad dockerRepository: airbyte/source-mysql - dockerImageTag: 0.4.8 + dockerImageTag: 0.4.9 documentationUrl: https://docs.airbyte.io/integrations/sources/mysql icon: mysql.svg sourceType: database diff --git a/airbyte-db/lib/src/main/java/io/airbyte/db/jdbc/JdbcSourceOperations.java b/airbyte-db/lib/src/main/java/io/airbyte/db/jdbc/JdbcSourceOperations.java index cf06d70a979b6..239fdee0ad225 100644 --- a/airbyte-db/lib/src/main/java/io/airbyte/db/jdbc/JdbcSourceOperations.java +++ b/airbyte-db/lib/src/main/java/io/airbyte/db/jdbc/JdbcSourceOperations.java @@ -116,8 +116,17 @@ protected void putBoolean(final ObjectNode node, final String columnName, final node.put(columnName, resultSet.getBoolean(index)); } + /** + * In some sources Short might have value larger than {@link Short#MAX_VALUE}. E.q. MySQL has + * unsigned smallint type, which can contain value 65535. If we fail to cast Short value, we + * will try to cast Integer. + */ protected void putShortInt(final ObjectNode node, final String columnName, final ResultSet resultSet, final int index) throws SQLException { - node.put(columnName, resultSet.getShort(index)); + try { + node.put(columnName, resultSet.getShort(index)); + } catch (final SQLException e) { + node.put(columnName, DataTypeUtils.returnNullIfInvalid(() -> resultSet.getInt(index))); + } } /** diff --git a/airbyte-integrations/connectors/source-mysql-strict-encrypt/Dockerfile b/airbyte-integrations/connectors/source-mysql-strict-encrypt/Dockerfile index 0710969d42385..d6929a0b4878e 100644 --- a/airbyte-integrations/connectors/source-mysql-strict-encrypt/Dockerfile +++ b/airbyte-integrations/connectors/source-mysql-strict-encrypt/Dockerfile @@ -8,5 +8,5 @@ COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar RUN tar xf ${APPLICATION}.tar --strip-components=1 -LABEL io.airbyte.version=0.1.1 +LABEL io.airbyte.version=0.1.2 LABEL io.airbyte.name=airbyte/source-mysql-strict-encrypt diff --git a/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test/resources/expected_spec.json b/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test/resources/expected_spec.json index 42e1c5104e518..d26dd1d611a6e 100644 --- a/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test/resources/expected_spec.json +++ b/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test/resources/expected_spec.json @@ -46,7 +46,7 @@ "type": "string", "title": "Replication Method", "description": "Replication method to use for extracting data from the database. STANDARD replication requires no setup on the DB side but will not be able to represent deletions incrementally. CDC uses the Binlog to detect inserts, updates, and deletes. This needs to be configured on the source database itself.", - "order": 6, + "order": 7, "default": "STANDARD", "enum": ["STANDARD", "CDC"] } diff --git a/airbyte-integrations/connectors/source-mysql/Dockerfile b/airbyte-integrations/connectors/source-mysql/Dockerfile index b223be5a9f20a..69879005089e2 100644 --- a/airbyte-integrations/connectors/source-mysql/Dockerfile +++ b/airbyte-integrations/connectors/source-mysql/Dockerfile @@ -8,6 +8,6 @@ COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar RUN tar xf ${APPLICATION}.tar --strip-components=1 -LABEL io.airbyte.version=0.4.8 +LABEL io.airbyte.version=0.4.9 LABEL io.airbyte.name=airbyte/source-mysql diff --git a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/source/mysql/MySqlSourceDatatypeTest.java b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/source/mysql/MySqlSourceDatatypeTest.java index 0c3827c89189a..c16afb62e5873 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/source/mysql/MySqlSourceDatatypeTest.java +++ b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/source/mysql/MySqlSourceDatatypeTest.java @@ -102,6 +102,16 @@ protected void initTests() { .addExpectedValues("1") .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("smallint") + .airbyteType(JsonSchemaPrimitive.NUMBER) + .fullSourceDataType("smallint unsigned") + .addInsertValues("null", "0", "65535") + .addExpectedValues(null, "0", "65535") + .build()); + addDataTypeTestData( TestDataHolder.builder() .sourceType("mediumint") diff --git a/docs/integrations/sources/mysql.md b/docs/integrations/sources/mysql.md index 3f34993f79545..d502cd5f6de3b 100644 --- a/docs/integrations/sources/mysql.md +++ b/docs/integrations/sources/mysql.md @@ -180,6 +180,7 @@ If you do not see a type in this list, assume that it is coerced into a string. | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.4.9 | 2021-11-02 | [7559](https://github.com/airbytehq/airbyte/pull/7559) | Correctly process large unsigned short integer values which may fall outside java's `Short` data type capability| | 0.4.8 | 2021-09-16 | [6093](https://github.com/airbytehq/airbyte/pull/6093) | Improve reliability of processing various data types like decimals, dates, datetime, binary, and text | | 0.4.7 | 2021-09-30 | [6585](https://github.com/airbytehq/airbyte/pull/6585) | Improved SSH Tunnel key generation steps | | 0.4.6 | 2021-09-29 | [6510](https://github.com/airbytehq/airbyte/pull/6510) | Support SSL connection | From cddfeea9a933cf26d9c16701a343b18d04e7e9f9 Mon Sep 17 00:00:00 2001 From: Subodh Kant Chaturvedi Date: Thu, 4 Nov 2021 19:47:23 +0530 Subject: [PATCH 43/83] format master (#7633) --- .../airbyte/db/jdbc/JdbcSourceOperations.java | 4 +- .../BigQueryDenormalizedRecordConsumer.java | 8 +- .../BigQueryDenormalizedDestinationTest.java | 6 +- .../BigQueryDenormalizedTestDataUtils.java | 7 +- .../destination/bigquery/BigQueryUtils.java | 11 +- .../s3/S3AvroDestinationAcceptanceTest.java | 1 - .../s3/avro/JsonToAvroConverterTest.java | 3 +- .../json_conversion_test_cases.json | 142 ++++++------------ .../type_conversion_test_cases.json | 17 +-- .../source/mysql/MySqlSourceDatatypeTest.java | 1 - 10 files changed, 78 insertions(+), 122 deletions(-) diff --git a/airbyte-db/lib/src/main/java/io/airbyte/db/jdbc/JdbcSourceOperations.java b/airbyte-db/lib/src/main/java/io/airbyte/db/jdbc/JdbcSourceOperations.java index 239fdee0ad225..6b6ec98c34ff5 100644 --- a/airbyte-db/lib/src/main/java/io/airbyte/db/jdbc/JdbcSourceOperations.java +++ b/airbyte-db/lib/src/main/java/io/airbyte/db/jdbc/JdbcSourceOperations.java @@ -118,8 +118,8 @@ protected void putBoolean(final ObjectNode node, final String columnName, final /** * In some sources Short might have value larger than {@link Short#MAX_VALUE}. E.q. MySQL has - * unsigned smallint type, which can contain value 65535. If we fail to cast Short value, we - * will try to cast Integer. + * unsigned smallint type, which can contain value 65535. If we fail to cast Short value, we will + * try to cast Integer. */ protected void putShortInt(final ObjectNode node, final String columnName, final ResultSet resultSet, final int index) throws SQLException { try { diff --git a/airbyte-integrations/connectors/destination-bigquery-denormalized/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryDenormalizedRecordConsumer.java b/airbyte-integrations/connectors/destination-bigquery-denormalized/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryDenormalizedRecordConsumer.java index 9048dab2a3b65..19c03205692a0 100644 --- a/airbyte-integrations/connectors/destination-bigquery-denormalized/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryDenormalizedRecordConsumer.java +++ b/airbyte-integrations/connectors/destination-bigquery-denormalized/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryDenormalizedRecordConsumer.java @@ -40,10 +40,10 @@ public class BigQueryDenormalizedRecordConsumer extends BigQueryRecordConsumer { private final Set invalidKeys; public BigQueryDenormalizedRecordConsumer(final BigQuery bigquery, - final Map writeConfigs, - final ConfiguredAirbyteCatalog catalog, - final Consumer outputRecordCollector, - final StandardNameTransformer namingResolver) { + final Map writeConfigs, + final ConfiguredAirbyteCatalog catalog, + final Consumer outputRecordCollector, + final StandardNameTransformer namingResolver) { super(bigquery, writeConfigs, catalog, outputRecordCollector, false, false); this.namingResolver = namingResolver; invalidKeys = new HashSet<>(); diff --git a/airbyte-integrations/connectors/destination-bigquery-denormalized/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryDenormalizedDestinationTest.java b/airbyte-integrations/connectors/destination-bigquery-denormalized/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryDenormalizedDestinationTest.java index aa13e9fb02c49..4ffcd7ccc5b76 100644 --- a/airbyte-integrations/connectors/destination-bigquery-denormalized/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryDenormalizedDestinationTest.java +++ b/airbyte-integrations/connectors/destination-bigquery-denormalized/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryDenormalizedDestinationTest.java @@ -237,8 +237,9 @@ void testIfJSONDateTimeWasConvertedToBigQueryFormat() throws Exception { // BigQuery Accepts "YYYY-MM-DD HH:MM:SS[.SSSSSS]" format // returns "yyyy-MM-dd'T'HH:mm:ss" format assertEquals(Set.of(new DateTime("2021-10-11T06:36:53+00:00").toString("yyyy-MM-dd'T'HH:mm:ss")), extractJsonValues(resultJson, "updated_at")); - //check nested datetime - assertEquals(Set.of(new DateTime("2021-11-11T06:36:53+00:00").toString("yyyy-MM-dd'T'HH:mm:ss")), extractJsonValues(resultJson.get("items"), "nested_datetime")); + // check nested datetime + assertEquals(Set.of(new DateTime("2021-11-11T06:36:53+00:00").toString("yyyy-MM-dd'T'HH:mm:ss")), + extractJsonValues(resultJson.get("items"), "nested_datetime")); } private Set extractJsonValues(final JsonNode node, final String attributeName) { @@ -278,4 +279,5 @@ private static Stream schemaAndDataProvider() { arguments(getSchemaWithInvalidArrayType(), MESSAGE_USERS1), arguments(getSchema(), MESSAGE_USERS2)); } + } diff --git a/airbyte-integrations/connectors/destination-bigquery-denormalized/src/test-integration/java/io/airbyte/integrations/destination/bigquery/util/BigQueryDenormalizedTestDataUtils.java b/airbyte-integrations/connectors/destination-bigquery-denormalized/src/test-integration/java/io/airbyte/integrations/destination/bigquery/util/BigQueryDenormalizedTestDataUtils.java index c2fa24cdec102..2c1fbea15e8e9 100644 --- a/airbyte-integrations/connectors/destination-bigquery-denormalized/src/test-integration/java/io/airbyte/integrations/destination/bigquery/util/BigQueryDenormalizedTestDataUtils.java +++ b/airbyte-integrations/connectors/destination-bigquery-denormalized/src/test-integration/java/io/airbyte/integrations/destination/bigquery/util/BigQueryDenormalizedTestDataUtils.java @@ -1,10 +1,14 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + package io.airbyte.integrations.destination.bigquery.util; import com.fasterxml.jackson.databind.JsonNode; import io.airbyte.commons.json.Jsons; public class BigQueryDenormalizedTestDataUtils { - + public static JsonNode getSchema() { return Jsons.deserialize( "{\n" @@ -221,4 +225,5 @@ public static JsonNode getDataWithEmptyObjectAndArray() { + " ]\n" + "}"); } + } diff --git a/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryUtils.java b/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryUtils.java index 613ec652407ff..96fd7d4d6c2a3 100644 --- a/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryUtils.java +++ b/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryUtils.java @@ -29,7 +29,6 @@ import com.google.common.collect.ImmutableMap; import io.airbyte.commons.json.Jsons; import io.airbyte.integrations.base.JavaBaseConstants; -import io.airbyte.protocol.models.AirbyteRecordMessage; import java.util.ArrayList; import java.util.List; import java.util.Set; @@ -172,9 +171,10 @@ public static List getDateTimeFieldsFromSchema(FieldList fieldList) { * @param dateTimeFields - list contains fields of DATETIME format * @param data - Json will be sent to Google BigData service * - * The special DATETIME format is required to save this type to BigQuery. - * @see Supported Google bigquery datatype - * This method is responsible to adapt JSON DATETIME to Bigquery + * The special DATETIME format is required to save this type to BigQuery. + * @see Supported + * Google bigquery datatype This method is responsible to adapt JSON DATETIME to Bigquery */ public static void transformJsonDateTimeToBigDataFormat(List dateTimeFields, ObjectNode data) { dateTimeFields.forEach(e -> { @@ -183,10 +183,11 @@ public static void transformJsonDateTimeToBigDataFormat(List dateTimeFie .dateTime(new DateTime(data .findValue(e) .asText()) - .toString(BIG_QUERY_DATETIME_FORMAT)) + .toString(BIG_QUERY_DATETIME_FORMAT)) .getValue(); data.put(e, googleBigQueryDateFormat); } }); } + } diff --git a/airbyte-integrations/connectors/destination-s3/src/test-integration/java/io/airbyte/integrations/destination/s3/S3AvroDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-s3/src/test-integration/java/io/airbyte/integrations/destination/s3/S3AvroDestinationAcceptanceTest.java index 9626c7ed5f8d0..28c21602a7cee 100644 --- a/airbyte-integrations/connectors/destination-s3/src/test-integration/java/io/airbyte/integrations/destination/s3/S3AvroDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-s3/src/test-integration/java/io/airbyte/integrations/destination/s3/S3AvroDestinationAcceptanceTest.java @@ -19,7 +19,6 @@ import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericData.Record; import org.apache.avro.generic.GenericDatumReader; -import tech.allegro.schema.json2avro.converter.JsonAvroConverter; public class S3AvroDestinationAcceptanceTest extends S3DestinationAcceptanceTest { diff --git a/airbyte-integrations/connectors/destination-s3/src/test/java/io/airbyte/integrations/destination/s3/avro/JsonToAvroConverterTest.java b/airbyte-integrations/connectors/destination-s3/src/test/java/io/airbyte/integrations/destination/s3/avro/JsonToAvroConverterTest.java index 4c7d6eef6a6ba..2a90a03c25827 100644 --- a/airbyte-integrations/connectors/destination-s3/src/test/java/io/airbyte/integrations/destination/s3/avro/JsonToAvroConverterTest.java +++ b/airbyte-integrations/connectors/destination-s3/src/test/java/io/airbyte/integrations/destination/s3/avro/JsonToAvroConverterTest.java @@ -108,7 +108,8 @@ public void testJsonAvroConversion(final String schemaName, final JsonNode jsonSchema, final JsonNode jsonObject, final JsonNode avroSchema, - final JsonNode avroObject) throws Exception { + final JsonNode avroObject) + throws Exception { final Schema actualAvroSchema = SCHEMA_CONVERTER.getAvroSchema(jsonSchema, schemaName, namespace, appendAirbyteFields); assertEquals( avroSchema, diff --git a/airbyte-integrations/connectors/destination-s3/src/test/resources/parquet/json_schema_converter/json_conversion_test_cases.json b/airbyte-integrations/connectors/destination-s3/src/test/resources/parquet/json_schema_converter/json_conversion_test_cases.json index 212ec9959fba1..c611474f080f5 100644 --- a/airbyte-integrations/connectors/destination-s3/src/test/resources/parquet/json_schema_converter/json_conversion_test_cases.json +++ b/airbyte-integrations/connectors/destination-s3/src/test/resources/parquet/json_schema_converter/json_conversion_test_cases.json @@ -25,12 +25,9 @@ "default": null }, { - "name":"_airbyte_additional_properties", - "type":[ - "null", - { "type":"map", "values":"string" } - ], - "default":null + "name": "_airbyte_additional_properties", + "type": ["null", { "type": "map", "values": "string" }], + "default": null } ] }, @@ -99,12 +96,9 @@ "default": null }, { - "name":"_airbyte_additional_properties", - "type":[ - "null", - { "type":"map", "values":"string" } - ], - "default":null + "name": "_airbyte_additional_properties", + "type": ["null", { "type": "map", "values": "string" }], + "default": null } ] } @@ -112,12 +106,9 @@ "default": null }, { - "name":"_airbyte_additional_properties", - "type":[ - "null", - { "type":"map", "values":"string" } - ], - "default":null + "name": "_airbyte_additional_properties", + "type": ["null", { "type": "map", "values": "string" }], + "default": null } ] }, @@ -173,11 +164,8 @@ "default": null }, { - "name":"_airbyte_additional_properties", - "type":[ - "null", - { "type":"map", "values":"string" } - ], + "name": "_airbyte_additional_properties", + "type": ["null", { "type": "map", "values": "string" }], "default": null } ] @@ -217,12 +205,9 @@ "default": null }, { - "name":"_airbyte_additional_properties", - "type":[ - "null", - { "type":"map", "values":"string" } - ], - "default":null + "name": "_airbyte_additional_properties", + "type": ["null", { "type": "map", "values": "string" }], + "default": null } ] }, @@ -257,12 +242,9 @@ "default": null }, { - "name":"_airbyte_additional_properties", - "type":[ - "null", - { "type":"map", "values":"string" } - ], - "default":null + "name": "_airbyte_additional_properties", + "type": ["null", { "type": "map", "values": "string" }], + "default": null } ] }, @@ -306,12 +288,9 @@ "default": null }, { - "name":"_airbyte_additional_properties", - "type":[ - "null", - { "type":"map", "values":"string" } - ], - "default":null + "name": "_airbyte_additional_properties", + "type": ["null", { "type": "map", "values": "string" }], + "default": null } ] }, @@ -366,12 +345,9 @@ "default": null }, { - "name":"_airbyte_additional_properties", - "type":[ - "null", - { "type":"map", "values":"string" } - ], - "default":null + "name": "_airbyte_additional_properties", + "type": ["null", { "type": "map", "values": "string" }], + "default": null } ] }, @@ -416,12 +392,9 @@ "default": null }, { - "name":"_airbyte_additional_properties", - "type":[ - "null", - { "type":"map", "values":"string" } - ], - "default":null + "name": "_airbyte_additional_properties", + "type": ["null", { "type": "map", "values": "string" }], + "default": null } ] }, @@ -482,12 +455,9 @@ "default": null }, { - "name":"_airbyte_additional_properties", - "type":[ - "null", - { "type":"map", "values":"string" } - ], - "default":null + "name": "_airbyte_additional_properties", + "type": ["null", { "type": "map", "values": "string" }], + "default": null } ] } @@ -495,12 +465,9 @@ "default": null }, { - "name":"_airbyte_additional_properties", - "type":[ - "null", - { "type":"map", "values":"string" } - ], - "default":null + "name": "_airbyte_additional_properties", + "type": ["null", { "type": "map", "values": "string" }], + "default": null } ] }, @@ -551,12 +518,9 @@ "default": null }, { - "name":"_airbyte_additional_properties", - "type":[ - "null", - { "type":"map", "values":"string" } - ], - "default":null + "name": "_airbyte_additional_properties", + "type": ["null", { "type": "map", "values": "string" }], + "default": null } ] }, @@ -592,12 +556,9 @@ "default": null }, { - "name":"_airbyte_additional_properties", - "type":[ - "null", - { "type":"map", "values":"string" } - ], - "default":null + "name": "_airbyte_additional_properties", + "type": ["null", { "type": "map", "values": "string" }], + "default": null } ] }, @@ -640,12 +601,9 @@ "default": null }, { - "name":"_airbyte_additional_properties", - "type":[ - "null", - { "type":"map", "values":"string" } - ], - "default":null + "name": "_airbyte_additional_properties", + "type": ["null", { "type": "map", "values": "string" }], + "default": null } ] }, @@ -692,12 +650,9 @@ "default": null }, { - "name":"_airbyte_additional_properties", - "type":[ - "null", - { "type":"map", "values":"string" } - ], - "default":null + "name": "_airbyte_additional_properties", + "type": ["null", { "type": "map", "values": "string" }], + "default": null } ] }, @@ -728,12 +683,9 @@ "namespace": "namespace14", "fields": [ { - "name":"_airbyte_additional_properties", - "type":[ - "null", - { "type":"map", "values":"string" } - ], - "default":null + "name": "_airbyte_additional_properties", + "type": ["null", { "type": "map", "values": "string" }], + "default": null } ] }, diff --git a/airbyte-integrations/connectors/destination-s3/src/test/resources/parquet/json_schema_converter/type_conversion_test_cases.json b/airbyte-integrations/connectors/destination-s3/src/test/resources/parquet/json_schema_converter/type_conversion_test_cases.json index 306d1213ab296..3171888d27340 100644 --- a/airbyte-integrations/connectors/destination-s3/src/test/resources/parquet/json_schema_converter/type_conversion_test_cases.json +++ b/airbyte-integrations/connectors/destination-s3/src/test/resources/parquet/json_schema_converter/type_conversion_test_cases.json @@ -102,10 +102,7 @@ }, { "name": "_airbyte_additional_properties", - "type": [ - "null", - {"type": "map", "values": "string"} - ], + "type": ["null", { "type": "map", "values": "string" }], "default": null } ] @@ -125,7 +122,7 @@ "fields": [ { "name": "_airbyte_additional_properties", - "type": ["null", {"type": "map", "values": "string"}], + "type": ["null", { "type": "map", "values": "string" }], "default": null } ] @@ -137,33 +134,33 @@ "jsonFieldSchema": { "type": "object" }, - "avroFieldType": ["null", {"type":"map","values":"string"}] + "avroFieldType": ["null", { "type": "map", "values": "string" }] }, { "fieldName": "_ab_additional_properties", "jsonFieldSchema": { "type": "object" }, - "avroFieldType": ["null", {"type":"map","values":"string"}] + "avroFieldType": ["null", { "type": "map", "values": "string" }] }, { "fieldName": "any_of_field", "jsonFieldSchema": { - "anyOf": [{"type": "string"}, {"type": "integer"}] + "anyOf": [{ "type": "string" }, { "type": "integer" }] }, "avroFieldType": ["null", "string", "int"] }, { "fieldName": "all_of_field", "jsonFieldSchema": { - "allOf": [{"type": "string"}, {"type": "integer"}] + "allOf": [{ "type": "string" }, { "type": "integer" }] }, "avroFieldType": ["null", "string", "int"] }, { "fieldName": "one_of_field", "jsonFieldSchema": { - "oneOf": [{"type": "string"}, {"type": "integer"}] + "oneOf": [{ "type": "string" }, { "type": "integer" }] }, "avroFieldType": ["null", "string", "int"] } diff --git a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/source/mysql/MySqlSourceDatatypeTest.java b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/source/mysql/MySqlSourceDatatypeTest.java index c16afb62e5873..d314fef26ab28 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/source/mysql/MySqlSourceDatatypeTest.java +++ b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/source/mysql/MySqlSourceDatatypeTest.java @@ -102,7 +102,6 @@ protected void initTests() { .addExpectedValues("1") .build()); - addDataTypeTestData( TestDataHolder.builder() .sourceType("smallint") From ad91b042b686f0017ee4629f916b4eb7ab9a19e5 Mon Sep 17 00:00:00 2001 From: Jared Rhizor Date: Thu, 4 Nov 2021 14:02:51 -0700 Subject: [PATCH 44/83] Bump Airbyte version from 0.30.29-alpha to 0.30.30-alpha (#7651) Co-authored-by: sherifnada --- .bumpversion.cfg | 2 +- .env | 2 +- airbyte-scheduler/app/Dockerfile | 4 ++-- airbyte-server/Dockerfile | 4 ++-- airbyte-webapp/package-lock.json | 2 +- airbyte-webapp/package.json | 2 +- airbyte-workers/Dockerfile | 4 ++-- charts/airbyte/Chart.yaml | 2 +- charts/airbyte/README.md | 8 ++++---- charts/airbyte/values.yaml | 8 ++++---- docs/operator-guides/upgrading-airbyte.md | 2 +- kube/overlays/stable-with-resource-limits/.env | 2 +- .../stable-with-resource-limits/kustomization.yaml | 10 +++++----- kube/overlays/stable/.env | 2 +- kube/overlays/stable/kustomization.yaml | 10 +++++----- 15 files changed, 32 insertions(+), 32 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 3b662d499d6aa..3d4b2e2d1b4d1 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.30.29-alpha +current_version = 0.30.30-alpha commit = False tag = False parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-[a-z]+)? diff --git a/.env b/.env index 5bb32384124ce..cab5426a970c0 100644 --- a/.env +++ b/.env @@ -1,4 +1,4 @@ -VERSION=0.30.29-alpha +VERSION=0.30.30-alpha # Airbyte Internal Job Database, see https://docs.airbyte.io/operator-guides/configuring-airbyte-db DATABASE_USER=docker diff --git a/airbyte-scheduler/app/Dockerfile b/airbyte-scheduler/app/Dockerfile index c6acbf82d89fa..93013fabec3f3 100644 --- a/airbyte-scheduler/app/Dockerfile +++ b/airbyte-scheduler/app/Dockerfile @@ -5,7 +5,7 @@ ENV APPLICATION airbyte-scheduler WORKDIR /app -ADD build/distributions/${APPLICATION}-0.30.29-alpha.tar /app +ADD build/distributions/${APPLICATION}-0.30.30-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.29-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.30-alpha/bin/${APPLICATION}"] diff --git a/airbyte-server/Dockerfile b/airbyte-server/Dockerfile index 00be03200c18b..2538836be92a8 100644 --- a/airbyte-server/Dockerfile +++ b/airbyte-server/Dockerfile @@ -7,7 +7,7 @@ ENV APPLICATION airbyte-server WORKDIR /app -ADD build/distributions/${APPLICATION}-0.30.29-alpha.tar /app +ADD build/distributions/${APPLICATION}-0.30.30-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.29-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.30-alpha/bin/${APPLICATION}"] diff --git a/airbyte-webapp/package-lock.json b/airbyte-webapp/package-lock.json index 528b169f6436b..c71bd0c6670f2 100644 --- a/airbyte-webapp/package-lock.json +++ b/airbyte-webapp/package-lock.json @@ -1,6 +1,6 @@ { "name": "airbyte-webapp", - "version": "0.30.29-alpha", + "version": "0.30.30-alpha", "lockfileVersion": 1, "requires": true, "dependencies": { diff --git a/airbyte-webapp/package.json b/airbyte-webapp/package.json index d751f8f2e8b86..efb541fe5803a 100644 --- a/airbyte-webapp/package.json +++ b/airbyte-webapp/package.json @@ -1,6 +1,6 @@ { "name": "airbyte-webapp", - "version": "0.30.29-alpha", + "version": "0.30.30-alpha", "private": true, "scripts": { "start": "react-scripts start", diff --git a/airbyte-workers/Dockerfile b/airbyte-workers/Dockerfile index 6eb10a69a08fe..1f8af0b48530f 100644 --- a/airbyte-workers/Dockerfile +++ b/airbyte-workers/Dockerfile @@ -23,7 +23,7 @@ ENV APPLICATION airbyte-workers WORKDIR /app # Move worker app -ADD build/distributions/${APPLICATION}-0.30.29-alpha.tar /app +ADD build/distributions/${APPLICATION}-0.30.30-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.29-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.30-alpha/bin/${APPLICATION}"] diff --git a/charts/airbyte/Chart.yaml b/charts/airbyte/Chart.yaml index ce83ad1ee5892..f3c1543c90491 100644 --- a/charts/airbyte/Chart.yaml +++ b/charts/airbyte/Chart.yaml @@ -21,7 +21,7 @@ version: 0.3.0 # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "0.30.29-alpha" +appVersion: "0.30.30-alpha" dependencies: - name: common diff --git a/charts/airbyte/README.md b/charts/airbyte/README.md index f0b7f5ebe14f2..be62e41092d22 100644 --- a/charts/airbyte/README.md +++ b/charts/airbyte/README.md @@ -29,7 +29,7 @@ | `webapp.replicaCount` | Number of webapp replicas | `1` | | `webapp.image.repository` | The repository to use for the airbyte webapp image. | `airbyte/webapp` | | `webapp.image.pullPolicy` | the pull policy to use for the airbyte webapp image | `IfNotPresent` | -| `webapp.image.tag` | The airbyte webapp image tag. Defaults to the chart's AppVersion | `0.30.29-alpha` | +| `webapp.image.tag` | The airbyte webapp image tag. Defaults to the chart's AppVersion | `0.30.30-alpha` | | `webapp.podAnnotations` | Add extra annotations to the webapp pod(s) | `{}` | | `webapp.service.type` | The service type to use for the webapp service | `ClusterIP` | | `webapp.service.port` | The service port to expose the webapp on | `80` | @@ -56,7 +56,7 @@ | `scheduler.replicaCount` | Number of scheduler replicas | `1` | | `scheduler.image.repository` | The repository to use for the airbyte scheduler image. | `airbyte/scheduler` | | `scheduler.image.pullPolicy` | the pull policy to use for the airbyte scheduler image | `IfNotPresent` | -| `scheduler.image.tag` | The airbyte scheduler image tag. Defaults to the chart's AppVersion | `0.30.29-alpha` | +| `scheduler.image.tag` | The airbyte scheduler image tag. Defaults to the chart's AppVersion | `0.30.30-alpha` | | `scheduler.podAnnotations` | Add extra annotations to the scheduler pod | `{}` | | `scheduler.resources.limits` | The resources limits for the scheduler container | `{}` | | `scheduler.resources.requests` | The requested resources for the scheduler container | `{}` | @@ -87,7 +87,7 @@ | `server.replicaCount` | Number of server replicas | `1` | | `server.image.repository` | The repository to use for the airbyte server image. | `airbyte/server` | | `server.image.pullPolicy` | the pull policy to use for the airbyte server image | `IfNotPresent` | -| `server.image.tag` | The airbyte server image tag. Defaults to the chart's AppVersion | `0.30.29-alpha` | +| `server.image.tag` | The airbyte server image tag. Defaults to the chart's AppVersion | `0.30.30-alpha` | | `server.podAnnotations` | Add extra annotations to the server pod | `{}` | | `server.livenessProbe.enabled` | Enable livenessProbe on the server | `true` | | `server.livenessProbe.initialDelaySeconds` | Initial delay seconds for livenessProbe | `30` | @@ -121,7 +121,7 @@ | `worker.replicaCount` | Number of worker replicas | `1` | | `worker.image.repository` | The repository to use for the airbyte worker image. | `airbyte/worker` | | `worker.image.pullPolicy` | the pull policy to use for the airbyte worker image | `IfNotPresent` | -| `worker.image.tag` | The airbyte worker image tag. Defaults to the chart's AppVersion | `0.30.29-alpha` | +| `worker.image.tag` | The airbyte worker image tag. Defaults to the chart's AppVersion | `0.30.30-alpha` | | `worker.podAnnotations` | Add extra annotations to the worker pod(s) | `{}` | | `worker.livenessProbe.enabled` | Enable livenessProbe on the worker | `true` | | `worker.livenessProbe.initialDelaySeconds` | Initial delay seconds for livenessProbe | `30` | diff --git a/charts/airbyte/values.yaml b/charts/airbyte/values.yaml index 40cf37e92149e..24ddd1f04c1a9 100644 --- a/charts/airbyte/values.yaml +++ b/charts/airbyte/values.yaml @@ -44,7 +44,7 @@ webapp: image: repository: airbyte/webapp pullPolicy: IfNotPresent - tag: 0.30.29-alpha + tag: 0.30.30-alpha ## @param webapp.podAnnotations [object] Add extra annotations to the webapp pod(s) ## @@ -141,7 +141,7 @@ scheduler: image: repository: airbyte/scheduler pullPolicy: IfNotPresent - tag: 0.30.29-alpha + tag: 0.30.30-alpha ## @param scheduler.podAnnotations [object] Add extra annotations to the scheduler pod ## @@ -248,7 +248,7 @@ server: image: repository: airbyte/server pullPolicy: IfNotPresent - tag: 0.30.29-alpha + tag: 0.30.30-alpha ## @param server.podAnnotations [object] Add extra annotations to the server pod ## @@ -360,7 +360,7 @@ worker: image: repository: airbyte/worker pullPolicy: IfNotPresent - tag: 0.30.29-alpha + tag: 0.30.30-alpha ## @param worker.podAnnotations [object] Add extra annotations to the worker pod(s) ## diff --git a/docs/operator-guides/upgrading-airbyte.md b/docs/operator-guides/upgrading-airbyte.md index 709c3dda667a1..6cc26a272052b 100644 --- a/docs/operator-guides/upgrading-airbyte.md +++ b/docs/operator-guides/upgrading-airbyte.md @@ -82,7 +82,7 @@ If you are upgrading from \(i.e. your current version of Airbyte is\) Airbyte ve Here's an example of what it might look like with the values filled in. It assumes that the downloaded `airbyte_archive.tar.gz` is in `/tmp`. ```bash - docker run --rm -v /tmp:/config airbyte/migration:0.30.29-alpha --\ + docker run --rm -v /tmp:/config airbyte/migration:0.30.30-alpha --\ --input /config/airbyte_archive.tar.gz\ --output /config/airbyte_archive_migrated.tar.gz ``` diff --git a/kube/overlays/stable-with-resource-limits/.env b/kube/overlays/stable-with-resource-limits/.env index 263474442545d..9fa79fe31efc7 100644 --- a/kube/overlays/stable-with-resource-limits/.env +++ b/kube/overlays/stable-with-resource-limits/.env @@ -1,4 +1,4 @@ -AIRBYTE_VERSION=0.30.29-alpha +AIRBYTE_VERSION=0.30.30-alpha # Airbyte Internal Database, see https://docs.airbyte.io/operator-guides/configuring-airbyte-db DATABASE_HOST=airbyte-db-svc diff --git a/kube/overlays/stable-with-resource-limits/kustomization.yaml b/kube/overlays/stable-with-resource-limits/kustomization.yaml index 2588aa9fb49ef..e28d687e012cf 100644 --- a/kube/overlays/stable-with-resource-limits/kustomization.yaml +++ b/kube/overlays/stable-with-resource-limits/kustomization.yaml @@ -8,15 +8,15 @@ bases: images: - name: airbyte/db - newTag: 0.30.29-alpha + newTag: 0.30.30-alpha - name: airbyte/scheduler - newTag: 0.30.29-alpha + newTag: 0.30.30-alpha - name: airbyte/server - newTag: 0.30.29-alpha + newTag: 0.30.30-alpha - name: airbyte/webapp - newTag: 0.30.29-alpha + newTag: 0.30.30-alpha - name: airbyte/worker - newTag: 0.30.29-alpha + newTag: 0.30.30-alpha - name: temporalio/auto-setup newTag: 1.7.0 diff --git a/kube/overlays/stable/.env b/kube/overlays/stable/.env index 263474442545d..9fa79fe31efc7 100644 --- a/kube/overlays/stable/.env +++ b/kube/overlays/stable/.env @@ -1,4 +1,4 @@ -AIRBYTE_VERSION=0.30.29-alpha +AIRBYTE_VERSION=0.30.30-alpha # Airbyte Internal Database, see https://docs.airbyte.io/operator-guides/configuring-airbyte-db DATABASE_HOST=airbyte-db-svc diff --git a/kube/overlays/stable/kustomization.yaml b/kube/overlays/stable/kustomization.yaml index 2c625632bf39a..0434fee12cbfb 100644 --- a/kube/overlays/stable/kustomization.yaml +++ b/kube/overlays/stable/kustomization.yaml @@ -8,15 +8,15 @@ bases: images: - name: airbyte/db - newTag: 0.30.29-alpha + newTag: 0.30.30-alpha - name: airbyte/scheduler - newTag: 0.30.29-alpha + newTag: 0.30.30-alpha - name: airbyte/server - newTag: 0.30.29-alpha + newTag: 0.30.30-alpha - name: airbyte/webapp - newTag: 0.30.29-alpha + newTag: 0.30.30-alpha - name: airbyte/worker - newTag: 0.30.29-alpha + newTag: 0.30.30-alpha - name: temporalio/auto-setup newTag: 1.7.0 From b863698cf5b48d61d27216b63859ae5e34a164a7 Mon Sep 17 00:00:00 2001 From: "Sherif A. Nada" Date: Thu, 4 Nov 2021 14:08:18 -0700 Subject: [PATCH 45/83] log oauth flow failures (#7648) --- .../java/io/airbyte/oauth/BaseOAuthFlow.java | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/BaseOAuthFlow.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/BaseOAuthFlow.java index 02a8b8af83027..b0273aeba30da 100644 --- a/airbyte-oauth/src/main/java/io/airbyte/oauth/BaseOAuthFlow.java +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/BaseOAuthFlow.java @@ -26,14 +26,19 @@ import java.util.UUID; import java.util.function.Function; import java.util.function.Supplier; + import org.apache.commons.lang3.RandomStringUtils; import org.apache.http.client.utils.URIBuilder; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /* * Class implementing generic oAuth 2.0 flow. */ public abstract class BaseOAuthFlow extends BaseOAuthConfig { + private static final Logger LOGGER = LoggerFactory.getLogger(BaseOAuthFlow.class); + /** * Simple enum of content type strings and their respective encoding functions used for POSTing the * access token request @@ -141,10 +146,10 @@ protected String getState() { @Override public Map completeSourceOAuth( - final UUID workspaceId, - final UUID sourceDefinitionId, - final Map queryParams, - final String redirectUrl) + final UUID workspaceId, + final UUID sourceDefinitionId, + final Map queryParams, + final String redirectUrl) throws IOException, ConfigNotFoundException { final JsonNode oAuthParamConfig = getSourceOAuthParamConfig(workspaceId, sourceDefinitionId); return completeOAuthFlow( @@ -183,7 +188,6 @@ protected Map completeOAuthFlow(final String clientId, .header("Content-Type", tokenReqContentType.contentType) .header("Accept", "application/json") .build(); - // TODO: Handle error response to report better messages try { final HttpResponse response = httpClient.send(request, HttpResponse.BodyHandlers.ofString()); return extractRefreshToken(Jsons.deserialize(response.body()), accessTokenUrl); @@ -230,7 +234,8 @@ protected Map extractRefreshToken(final JsonNode data, String ac } else if (data.has("access_token")) { result.put("access_token", data.get("access_token").asText()); } else { - throw new IOException(String.format("Missing 'refresh_token' in query params from %s", accessTokenUrl)); + LOGGER.info("Oauth flow failed. Data received from server: {}", data); + throw new IOException(String.format("Missing 'refresh_token' in query params from %s. Response: %s", accessTokenUrl)); } return Map.of("credentials", result); From 8cf5ecf56830ca3927c918b9d319994b4ebcb85c Mon Sep 17 00:00:00 2001 From: Jared Rhizor Date: Thu, 4 Nov 2021 16:39:16 -0700 Subject: [PATCH 46/83] fix container specific logging (#7657) * fix container specific logging * fix test --- .../io/airbyte/commons/io/LineGobbler.java | 24 +++++++++---------- .../io/airbyte/commons/logging/MdcScope.java | 2 +- airbyte-commons/src/main/resources/log4j2.xml | 2 +- .../workers/DbtTransformationRunner.java | 10 ++++---- .../DefaultNormalizationRunner.java | 9 ++++--- .../airbyte/DefaultAirbyteDestination.java | 9 ++++--- .../airbyte/DefaultAirbyteSource.java | 9 ++++--- .../airbyte/DefaultAirbyteStreamFactory.java | 17 +++++++------ .../DefaultAirbyteStreamFactoryTest.java | 2 +- 9 files changed, 39 insertions(+), 45 deletions(-) diff --git a/airbyte-commons/src/main/java/io/airbyte/commons/io/LineGobbler.java b/airbyte-commons/src/main/java/io/airbyte/commons/io/LineGobbler.java index 6e87ce1f45172..0918ece248910 100644 --- a/airbyte-commons/src/main/java/io/airbyte/commons/io/LineGobbler.java +++ b/airbyte-commons/src/main/java/io/airbyte/commons/io/LineGobbler.java @@ -22,17 +22,17 @@ public class LineGobbler implements VoidCallable { private final static Logger LOGGER = LoggerFactory.getLogger(LineGobbler.class); public static void gobble(final InputStream is, final Consumer consumer) { - gobble(is, consumer, "generic", MdcScope.DEFAULT); + gobble(is, consumer, "generic", MdcScope.DEFAULT_BUILDER); } - public static void gobble(final InputStream is, final Consumer consumer, final MdcScope mdcScope) { - gobble(is, consumer, "generic", mdcScope); + public static void gobble(final InputStream is, final Consumer consumer, final MdcScope.Builder mdcScopeBuilder) { + gobble(is, consumer, "generic", mdcScopeBuilder); } - public static void gobble(final InputStream is, final Consumer consumer, final String caller, final MdcScope mdcScope) { + public static void gobble(final InputStream is, final Consumer consumer, final String caller, final MdcScope.Builder mdcScopeBuilder) { final ExecutorService executor = Executors.newSingleThreadExecutor(); final Map mdc = MDC.getCopyOfContextMap(); - final var gobbler = new LineGobbler(is, consumer, executor, mdc, caller, mdcScope); + final var gobbler = new LineGobbler(is, consumer, executor, mdc, caller, mdcScopeBuilder); executor.submit(gobbler); } @@ -41,21 +41,21 @@ public static void gobble(final InputStream is, final Consumer consumer, private final ExecutorService executor; private final Map mdc; private final String caller; - private final MdcScope containerLogMDC; + private final MdcScope.Builder containerLogMdcBuilder; LineGobbler(final InputStream is, final Consumer consumer, final ExecutorService executor, final Map mdc) { - this(is, consumer, executor, mdc, "generic", MdcScope.DEFAULT); + this(is, consumer, executor, mdc, "generic", MdcScope.DEFAULT_BUILDER); } LineGobbler(final InputStream is, final Consumer consumer, final ExecutorService executor, final Map mdc, - final MdcScope mdcScope) { - this(is, consumer, executor, mdc, "generic", mdcScope); + final MdcScope.Builder mdcScopeBuilder) { + this(is, consumer, executor, mdc, "generic", mdcScopeBuilder); } LineGobbler(final InputStream is, @@ -63,13 +63,13 @@ public static void gobble(final InputStream is, final Consumer consumer, final ExecutorService executor, final Map mdc, final String caller, - final MdcScope mdcScope) { + final MdcScope.Builder mdcScopeBuilder) { this.is = IOs.newBufferedReader(is); this.consumer = consumer; this.executor = executor; this.mdc = mdc; this.caller = caller; - this.containerLogMDC = mdcScope; + this.containerLogMdcBuilder = mdcScopeBuilder; } @Override @@ -78,7 +78,7 @@ public void voidCall() { try { String line; while ((line = is.readLine()) != null) { - try (containerLogMDC) { + try (final var mdcScope = containerLogMdcBuilder.build()) { consumer.accept(line); } } diff --git a/airbyte-commons/src/main/java/io/airbyte/commons/logging/MdcScope.java b/airbyte-commons/src/main/java/io/airbyte/commons/logging/MdcScope.java index 45e52d4495f18..b4c2f4c46f0e5 100644 --- a/airbyte-commons/src/main/java/io/airbyte/commons/logging/MdcScope.java +++ b/airbyte-commons/src/main/java/io/airbyte/commons/logging/MdcScope.java @@ -28,7 +28,7 @@ */ public class MdcScope implements AutoCloseable { - public final static MdcScope DEFAULT = new Builder().build(); + public final static MdcScope.Builder DEFAULT_BUILDER = new Builder(); private final Map originalContextMap; diff --git a/airbyte-commons/src/main/resources/log4j2.xml b/airbyte-commons/src/main/resources/log4j2.xml index c07bd80c4b319..f682d8017e876 100644 --- a/airbyte-commons/src/main/resources/log4j2.xml +++ b/airbyte-commons/src/main/resources/log4j2.xml @@ -7,7 +7,7 @@ %replace{%X{log_source} - }{^ - }{}%d{yyyy-MM-dd HH:mm:ss}{GMT+0} %p (%X{job_root}) %C{1}(%M):%L - %replace{%m}{apikey=[\w\-]*}{apikey=*****}%n - %d{yyyy-MM-dd HH:mm:ss} %-5p %replace{%m}{apikey=[\w\-]*}{apikey=*****}%n + %replace{%X{log_source} - }{^ - }{}%d{yyyy-MM-dd HH:mm:ss}{GMT+0} %p %C{1}(%M):%L - %replace{%m}{apikey=[\w\-]*}{apikey=*****}%n $${env:LOG_LEVEL:-INFO} diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/DbtTransformationRunner.java b/airbyte-workers/src/main/java/io/airbyte/workers/DbtTransformationRunner.java index 249d5eae4a8c9..a7c37fb05250f 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/DbtTransformationRunner.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/DbtTransformationRunner.java @@ -31,10 +31,9 @@ public class DbtTransformationRunner implements AutoCloseable { private static final Logger LOGGER = LoggerFactory.getLogger(DbtTransformationRunner.class); private static final String DBT_ENTRYPOINT_SH = "entrypoint.sh"; - private static final MdcScope CONTAINER_LOG_MDC = new Builder() + private static final MdcScope.Builder CONTAINER_LOG_MDC_BUILDER = new Builder() .setLogPrefix("dbt") - .setPrefixColor(Color.CYAN) - .build(); + .setPrefixColor(Color.CYAN); private final ProcessFactory processFactory; private final NormalizationRunner normalizationRunner; @@ -93,9 +92,8 @@ public boolean transform(final String jobId, processFactory.create(jobId, attempt, jobRoot, dbtConfig.getDockerImage(), false, files, "/bin/bash", resourceRequirements, Map.of(KubeProcessFactory.JOB_TYPE, KubeProcessFactory.SYNC_JOB, KubeProcessFactory.SYNC_STEP, KubeProcessFactory.CUSTOM_STEP), dbtArguments); - - LineGobbler.gobble(process.getInputStream(), LOGGER::info, CONTAINER_LOG_MDC); - LineGobbler.gobble(process.getErrorStream(), LOGGER::error, CONTAINER_LOG_MDC); + LineGobbler.gobble(process.getInputStream(), LOGGER::info, CONTAINER_LOG_MDC_BUILDER); + LineGobbler.gobble(process.getErrorStream(), LOGGER::error, CONTAINER_LOG_MDC_BUILDER); WorkerUtils.wait(process); diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/normalization/DefaultNormalizationRunner.java b/airbyte-workers/src/main/java/io/airbyte/workers/normalization/DefaultNormalizationRunner.java index 39cbc52562dcc..3a287286c01b7 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/normalization/DefaultNormalizationRunner.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/normalization/DefaultNormalizationRunner.java @@ -30,10 +30,9 @@ public class DefaultNormalizationRunner implements NormalizationRunner { private static final Logger LOGGER = LoggerFactory.getLogger(DefaultNormalizationRunner.class); - private static final MdcScope CONTAINER_LOG_MDC = new Builder() + private static final MdcScope.Builder CONTAINER_LOG_MDC_BUILDER = new Builder() .setLogPrefix("normalization") - .setPrefixColor(Color.GREEN) - .build(); + .setPrefixColor(Color.GREEN); private final DestinationType destinationType; private final ProcessFactory processFactory; @@ -116,8 +115,8 @@ private boolean runProcess(final String jobId, process = processFactory.create(jobId, attempt, jobRoot, normalizationImageName, false, files, null, resourceRequirements, Map.of(KubeProcessFactory.JOB_TYPE, KubeProcessFactory.SYNC_JOB, KubeProcessFactory.SYNC_STEP, KubeProcessFactory.NORMALISE_STEP), args); - LineGobbler.gobble(process.getInputStream(), LOGGER::info, CONTAINER_LOG_MDC); - LineGobbler.gobble(process.getErrorStream(), LOGGER::error, CONTAINER_LOG_MDC); + LineGobbler.gobble(process.getInputStream(), LOGGER::info, CONTAINER_LOG_MDC_BUILDER); + LineGobbler.gobble(process.getErrorStream(), LOGGER::error, CONTAINER_LOG_MDC_BUILDER); WorkerUtils.wait(process); diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/protocols/airbyte/DefaultAirbyteDestination.java b/airbyte-workers/src/main/java/io/airbyte/workers/protocols/airbyte/DefaultAirbyteDestination.java index de4bacfcdc9b2..f05bdb288b505 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/protocols/airbyte/DefaultAirbyteDestination.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/protocols/airbyte/DefaultAirbyteDestination.java @@ -33,10 +33,9 @@ public class DefaultAirbyteDestination implements AirbyteDestination { private static final Logger LOGGER = LoggerFactory.getLogger(DefaultAirbyteDestination.class); - private static final MdcScope CONTAINER_LOG_MDC = new Builder() + private static final MdcScope.Builder CONTAINER_LOG_MDC_BUILDER = new Builder() .setLogPrefix("destination") - .setPrefixColor(Color.MAGENTA) - .build(); + .setPrefixColor(Color.MAGENTA); private final IntegrationLauncher integrationLauncher; private final AirbyteStreamFactory streamFactory; @@ -48,7 +47,7 @@ public class DefaultAirbyteDestination implements AirbyteDestination { private Iterator messageIterator = null; public DefaultAirbyteDestination(final IntegrationLauncher integrationLauncher) { - this(integrationLauncher, new DefaultAirbyteStreamFactory(CONTAINER_LOG_MDC)); + this(integrationLauncher, new DefaultAirbyteStreamFactory(CONTAINER_LOG_MDC_BUILDER)); } @@ -70,7 +69,7 @@ public void start(final WorkerDestinationConfig destinationConfig, final Path jo WorkerConstants.DESTINATION_CATALOG_JSON_FILENAME, Jsons.serialize(destinationConfig.getCatalog())); // stdout logs are logged elsewhere since stdout also contains data - LineGobbler.gobble(destinationProcess.getErrorStream(), LOGGER::error, "airbyte-destination", CONTAINER_LOG_MDC); + LineGobbler.gobble(destinationProcess.getErrorStream(), LOGGER::error, "airbyte-destination", CONTAINER_LOG_MDC_BUILDER); writer = new BufferedWriter(new OutputStreamWriter(destinationProcess.getOutputStream(), Charsets.UTF_8)); diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/protocols/airbyte/DefaultAirbyteSource.java b/airbyte-workers/src/main/java/io/airbyte/workers/protocols/airbyte/DefaultAirbyteSource.java index fadf68e44ef03..d24c567f1528a 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/protocols/airbyte/DefaultAirbyteSource.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/protocols/airbyte/DefaultAirbyteSource.java @@ -38,10 +38,9 @@ public class DefaultAirbyteSource implements AirbyteSource { private static final Duration GRACEFUL_SHUTDOWN_DURATION = Duration.of(10, ChronoUnit.HOURS); private static final Duration FORCED_SHUTDOWN_DURATION = Duration.of(1, ChronoUnit.MINUTES); - private static final MdcScope CONTAINER_LOG_MDC = new Builder() + private static final MdcScope.Builder CONTAINER_LOG_MDC_BUILDER = new Builder() .setLogPrefix("source") - .setPrefixColor(Color.BLUE) - .build(); + .setPrefixColor(Color.BLUE); private final IntegrationLauncher integrationLauncher; private final AirbyteStreamFactory streamFactory; @@ -51,7 +50,7 @@ public class DefaultAirbyteSource implements AirbyteSource { private Iterator messageIterator = null; public DefaultAirbyteSource(final IntegrationLauncher integrationLauncher) { - this(integrationLauncher, new DefaultAirbyteStreamFactory(CONTAINER_LOG_MDC), new HeartbeatMonitor(HEARTBEAT_FRESH_DURATION)); + this(integrationLauncher, new DefaultAirbyteStreamFactory(CONTAINER_LOG_MDC_BUILDER), new HeartbeatMonitor(HEARTBEAT_FRESH_DURATION)); } @VisibleForTesting @@ -75,7 +74,7 @@ public void start(final WorkerSourceConfig sourceConfig, final Path jobRoot) thr sourceConfig.getState() == null ? null : WorkerConstants.INPUT_STATE_JSON_FILENAME, sourceConfig.getState() == null ? null : Jsons.serialize(sourceConfig.getState().getState())); // stdout logs are logged elsewhere since stdout also contains data - LineGobbler.gobble(sourceProcess.getErrorStream(), LOGGER::error, "airbyte-source", CONTAINER_LOG_MDC); + LineGobbler.gobble(sourceProcess.getErrorStream(), LOGGER::error, "airbyte-source", CONTAINER_LOG_MDC_BUILDER); messageIterator = streamFactory.create(IOs.newBufferedReader(sourceProcess.getInputStream())) .peek(message -> heartbeatMonitor.beat()) diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/protocols/airbyte/DefaultAirbyteStreamFactory.java b/airbyte-workers/src/main/java/io/airbyte/workers/protocols/airbyte/DefaultAirbyteStreamFactory.java index e0c8c89e9da5b..4fb66377b9516 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/protocols/airbyte/DefaultAirbyteStreamFactory.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/protocols/airbyte/DefaultAirbyteStreamFactory.java @@ -7,7 +7,6 @@ import com.fasterxml.jackson.databind.JsonNode; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.logging.MdcScope; -import io.airbyte.commons.logging.MdcScope.Builder; import io.airbyte.protocol.models.AirbyteLogMessage; import io.airbyte.protocol.models.AirbyteMessage; import java.io.BufferedReader; @@ -30,22 +29,22 @@ public class DefaultAirbyteStreamFactory implements AirbyteStreamFactory { private static final Logger LOGGER = LoggerFactory.getLogger(DefaultAirbyteStreamFactory.class); - private final MdcScope containerLogMDC; + private final MdcScope.Builder containerLogMdcBuilder; private final AirbyteProtocolPredicate protocolValidator; private final Logger logger; public DefaultAirbyteStreamFactory() { - this(new Builder().build()); + this(MdcScope.DEFAULT_BUILDER); } - public DefaultAirbyteStreamFactory(final MdcScope containerLogMDC) { - this(new AirbyteProtocolPredicate(), LOGGER, containerLogMDC); + public DefaultAirbyteStreamFactory(final MdcScope.Builder containerLogMdcBuilder) { + this(new AirbyteProtocolPredicate(), LOGGER, containerLogMdcBuilder); } - DefaultAirbyteStreamFactory(final AirbyteProtocolPredicate protocolPredicate, final Logger logger, final MdcScope containerLogMDC) { + DefaultAirbyteStreamFactory(final AirbyteProtocolPredicate protocolPredicate, final Logger logger, final MdcScope.Builder containerLogMdcBuilder) { protocolValidator = protocolPredicate; this.logger = logger; - this.containerLogMDC = containerLogMDC; + this.containerLogMdcBuilder = containerLogMdcBuilder; } @Override @@ -58,7 +57,7 @@ public Stream create(final BufferedReader bufferedReader) { // we log as info all the lines that are not valid json // some sources actually log their process on stdout, we // want to make sure this info is available in the logs. - try (containerLogMDC) { + try (final var mdcScope = containerLogMdcBuilder.build()) { logger.info(line); } } @@ -83,7 +82,7 @@ public Stream create(final BufferedReader bufferedReader) { .filter(airbyteMessage -> { final boolean isLog = airbyteMessage.getType() == AirbyteMessage.Type.LOG; if (isLog) { - try (containerLogMDC) { + try (final var mdcScope = containerLogMdcBuilder.build()) { internalLog(airbyteMessage.getLog()); } } diff --git a/airbyte-workers/src/test/java/io/airbyte/workers/protocols/airbyte/DefaultAirbyteStreamFactoryTest.java b/airbyte-workers/src/test/java/io/airbyte/workers/protocols/airbyte/DefaultAirbyteStreamFactoryTest.java index 596f9e0f11f53..d990d719778b1 100644 --- a/airbyte-workers/src/test/java/io/airbyte/workers/protocols/airbyte/DefaultAirbyteStreamFactoryTest.java +++ b/airbyte-workers/src/test/java/io/airbyte/workers/protocols/airbyte/DefaultAirbyteStreamFactoryTest.java @@ -121,7 +121,7 @@ public void testMissingNewLineBetweenValidRecords() { private Stream stringToMessageStream(final String inputString) { final InputStream inputStream = new ByteArrayInputStream(inputString.getBytes()); final BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream)); - return new DefaultAirbyteStreamFactory(protocolPredicate, logger, new Builder().build()).create(bufferedReader); + return new DefaultAirbyteStreamFactory(protocolPredicate, logger, new Builder()).create(bufferedReader); } } From 8ca187013f8d24132aedd94cfd1cd31f9d5c787e Mon Sep 17 00:00:00 2001 From: Jared Rhizor Date: Thu, 4 Nov 2021 16:58:24 -0700 Subject: [PATCH 47/83] format (#7661) --- .../src/main/java/io/airbyte/oauth/BaseOAuthFlow.java | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/BaseOAuthFlow.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/BaseOAuthFlow.java index b0273aeba30da..dd80f158e110a 100644 --- a/airbyte-oauth/src/main/java/io/airbyte/oauth/BaseOAuthFlow.java +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/BaseOAuthFlow.java @@ -26,7 +26,6 @@ import java.util.UUID; import java.util.function.Function; import java.util.function.Supplier; - import org.apache.commons.lang3.RandomStringUtils; import org.apache.http.client.utils.URIBuilder; import org.slf4j.Logger; @@ -146,10 +145,10 @@ protected String getState() { @Override public Map completeSourceOAuth( - final UUID workspaceId, - final UUID sourceDefinitionId, - final Map queryParams, - final String redirectUrl) + final UUID workspaceId, + final UUID sourceDefinitionId, + final Map queryParams, + final String redirectUrl) throws IOException, ConfigNotFoundException { final JsonNode oAuthParamConfig = getSourceOAuthParamConfig(workspaceId, sourceDefinitionId); return completeOAuthFlow( From 09fce77107f828a14cd002debcc95c90fe86d165 Mon Sep 17 00:00:00 2001 From: Lake Mossman Date: Thu, 4 Nov 2021 17:06:16 -0700 Subject: [PATCH 48/83] REDO: Generate seed connector specs on build (#7613) * add specs module with logic to fetch specs on build * format + build and add gradle dependency for new script * check seed file for existing specs + refactor * add tests + a bit more refactoring * run gw format * update yaml config persistence to merge specs into definitions * add comment * add dep * add tests for GcsBucketSpecFetcher * get rid of static block + format * DRY up parse call * add GCS details to comment * formatting + fix test * update comment * do not format seed specs files * change signature of run to allow cloud to reuse this script * run gw format * revert commits that change signature of run * fix comment typo Co-authored-by: Davin Chia * rename enum to be distinct from the enum in cloud * add missing dependencies between modules * add readme for seed connector spec generator * reword * reference readme in comment * ignore 'spec' field in newFields logic * rearrange dependencies so that CONNECTORS_BASE build does not depend on SeedConnectorSpecGenerator * run format * add some more helpful info to the GCS fetch failure message * add more info * get rid of unnecessary static block * Fix publishing docs (#7589) * Fix publishing docs * Reorder steps and add a comment about rebuilding the platform * Update README.md Co-authored-by: Lake Mossman * add dependency and rebuild * update PR template with seed connector generation steps * revert formatting changes to PR template * Update build.gradle * Remove unnecessary dep Co-authored-by: Davin Chia Co-authored-by: Christophe Duong --- .github/pull_request_template.md | 6 +- airbyte-config/init/build.gradle | 4 + .../java/io/airbyte/config/init/SeedType.java | 4 +- .../init}/YamlSeedConfigPersistence.java | 45 +- .../resources/seed/destination_specs.yaml | 2838 ++++++++ .../src/main/resources/seed/source_specs.yaml | 5924 +++++++++++++++++ .../init}/YamlSeedConfigPersistenceTest.java | 12 +- airbyte-config/models/build.gradle | 3 +- .../main/resources/types/DockerImageSpec.yaml | 16 + airbyte-config/persistence/build.gradle | 1 - .../DatabaseConfigPersistence.java | 9 +- .../BaseDatabaseConfigPersistenceTest.java | 50 +- ...DatabaseConfigPersistenceLoadDataTest.java | 13 +- airbyte-config/specs/README.md | 16 + airbyte-config/specs/build.gradle | 24 + .../config/specs/GcsBucketSpecFetcher.java | 70 + .../specs/SeedConnectorSpecGenerator.java | 127 + .../config/specs/SeedConnectorType.java | 33 + .../specs/GcsBucketSpecFetcherTest.java | 79 + .../specs/SeedConnectorSpecGeneratorTest.java | 154 + airbyte-scheduler/client/build.gradle | 1 + .../BucketSpecCacheSchedulerClient.java | 58 +- .../BucketSpecCacheSchedulerClientTest.java | 14 +- airbyte-server/build.gradle | 1 + .../java/io/airbyte/server/ServerApp.java | 2 +- .../server/handlers/ArchiveHandlerTest.java | 11 +- .../server/migration/RunMigrationTest.java | 2 +- build.gradle | 3 +- docs/connector-development/README.md | 19 +- settings.gradle | 13 +- 30 files changed, 9426 insertions(+), 126 deletions(-) rename airbyte-config/{persistence/src/main/java/io/airbyte/config/persistence => init/src/main/java/io/airbyte/config/init}/YamlSeedConfigPersistence.java (63%) create mode 100644 airbyte-config/init/src/main/resources/seed/destination_specs.yaml create mode 100644 airbyte-config/init/src/main/resources/seed/source_specs.yaml rename airbyte-config/{persistence/src/test/java/io/airbyte/config/persistence => init/src/test/java/io/airbyte/config/init}/YamlSeedConfigPersistenceTest.java (86%) create mode 100644 airbyte-config/models/src/main/resources/types/DockerImageSpec.yaml create mode 100644 airbyte-config/specs/README.md create mode 100644 airbyte-config/specs/build.gradle create mode 100644 airbyte-config/specs/src/main/java/io/airbyte/config/specs/GcsBucketSpecFetcher.java create mode 100644 airbyte-config/specs/src/main/java/io/airbyte/config/specs/SeedConnectorSpecGenerator.java create mode 100644 airbyte-config/specs/src/main/java/io/airbyte/config/specs/SeedConnectorType.java create mode 100644 airbyte-config/specs/src/test/java/io/airbyte/config/specs/GcsBucketSpecFetcherTest.java create mode 100644 airbyte-config/specs/src/test/java/io/airbyte/config/specs/SeedConnectorSpecGeneratorTest.java diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 87f7d9dc3564f..a2505b2a5949b 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -29,7 +29,6 @@ Expand the relevant checklist and delete the others. - [ ] `docs/integrations/README.md` - [ ] `airbyte-integrations/builds.md` - [ ] PR name follows [PR naming conventions](https://docs.airbyte.io/contributing-to-airbyte/updating-documentation#issues-and-pull-requests) -- [ ] Connector added to connector index like described [here](https://docs.airbyte.io/connector-development#publishing-a-connector) #### Airbyter @@ -40,6 +39,8 @@ If this is a community PR, the Airbyte engineer reviewing this PR is responsible - [ ] Credentials added to Github CI. [Instructions](https://docs.airbyte.io/connector-development#using-credentials-in-ci). - [ ] [`/test connector=connectors/` command](https://docs.airbyte.io/connector-development#updating-an-existing-connector) is passing. - [ ] New Connector version released on Dockerhub by running the `/publish` command described [here](https://docs.airbyte.io/connector-development#updating-an-existing-connector) +- [ ] After the connector is published, connector added to connector index as described [here](https://docs.airbyte.io/connector-development#publishing-a-connector) +- [ ] Seed specs have been re-generated by building the platform and committing the changes to the seed spec files, as described [here](https://docs.airbyte.io/connector-development#publishing-a-connector)

@@ -59,7 +60,6 @@ If this is a community PR, the Airbyte engineer reviewing this PR is responsible - [ ] Connector's `bootstrap.md`. See [description and examples](https://docs.google.com/document/d/1ypdgmwmEHWv-TrO4_YOQ7pAJGVrMp5BOkEVh831N260/edit?usp=sharing) - [ ] Changelog updated in `docs/integrations//.md` including changelog. See changelog [example](https://docs.airbyte.io/integrations/sources/stripe#changelog) - [ ] PR name follows [PR naming conventions](https://docs.airbyte.io/contributing-to-airbyte/updating-documentation#issues-and-pull-requests) -- [ ] Connector version bumped like described [here](https://docs.airbyte.io/connector-development#publishing-a-connector) #### Airbyter @@ -70,6 +70,8 @@ If this is a community PR, the Airbyte engineer reviewing this PR is responsible - [ ] Credentials added to Github CI. [Instructions](https://docs.airbyte.io/connector-development#using-credentials-in-ci). - [ ] [`/test connector=connectors/` command](https://docs.airbyte.io/connector-development#updating-an-existing-connector) is passing. - [ ] New Connector version released on Dockerhub by running the `/publish` command described [here](https://docs.airbyte.io/connector-development#updating-an-existing-connector) +- [ ] After the new connector version is published, connector version bumped in the seed directory as described [here](https://docs.airbyte.io/connector-development#publishing-a-connector) +- [ ] Seed specs have been re-generated by building the platform and committing the changes to the seed spec files, as described [here](https://docs.airbyte.io/connector-development#publishing-a-connector)

diff --git a/airbyte-config/init/build.gradle b/airbyte-config/init/build.gradle index 945051223ea9f..c7117fd16ea78 100644 --- a/airbyte-config/init/build.gradle +++ b/airbyte-config/init/build.gradle @@ -6,4 +6,8 @@ dependencies { implementation 'commons-cli:commons-cli:1.4' implementation project(':airbyte-config:models') + implementation project(':airbyte-config:persistence') + implementation project(':airbyte-protocol:models') + implementation project(':airbyte-commons-docker') + implementation project(':airbyte-json-validation') } diff --git a/airbyte-config/init/src/main/java/io/airbyte/config/init/SeedType.java b/airbyte-config/init/src/main/java/io/airbyte/config/init/SeedType.java index 47c4c419bcf10..3730369621090 100644 --- a/airbyte-config/init/src/main/java/io/airbyte/config/init/SeedType.java +++ b/airbyte-config/init/src/main/java/io/airbyte/config/init/SeedType.java @@ -7,7 +7,9 @@ public enum SeedType { STANDARD_SOURCE_DEFINITION("/seed/source_definitions.yaml", "sourceDefinitionId"), - STANDARD_DESTINATION_DEFINITION("/seed/destination_definitions.yaml", "destinationDefinitionId"); + STANDARD_DESTINATION_DEFINITION("/seed/destination_definitions.yaml", "destinationDefinitionId"), + SOURCE_SPEC("/seed/source_specs.yaml", "dockerImage"), + DESTINATION_SPEC("/seed/destination_specs.yaml", "dockerImage"); final String resourcePath; // ID field name diff --git a/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/YamlSeedConfigPersistence.java b/airbyte-config/init/src/main/java/io/airbyte/config/init/YamlSeedConfigPersistence.java similarity index 63% rename from airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/YamlSeedConfigPersistence.java rename to airbyte-config/init/src/main/java/io/airbyte/config/init/YamlSeedConfigPersistence.java index 3bca71d57c2db..d218cfca89d4a 100644 --- a/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/YamlSeedConfigPersistence.java +++ b/airbyte-config/init/src/main/java/io/airbyte/config/init/YamlSeedConfigPersistence.java @@ -2,23 +2,27 @@ * Copyright (c) 2021 Airbyte, Inc., all rights reserved. */ -package io.airbyte.config.persistence; +package io.airbyte.config.init; import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; import com.google.common.collect.ImmutableMap; import com.google.common.io.Resources; +import io.airbyte.commons.docker.DockerUtils; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.util.MoreIterators; import io.airbyte.commons.yaml.Yamls; import io.airbyte.config.AirbyteConfig; import io.airbyte.config.ConfigSchema; -import io.airbyte.config.init.SeedType; +import io.airbyte.config.persistence.ConfigNotFoundException; +import io.airbyte.config.persistence.ConfigPersistence; import io.airbyte.validation.json.JsonValidationException; import java.io.IOException; import java.net.URL; import java.nio.charset.StandardCharsets; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -45,11 +49,40 @@ public static YamlSeedConfigPersistence get(final Class seedDefinitionsResour return new YamlSeedConfigPersistence(seedDefinitionsResourceClass); } - private YamlSeedConfigPersistence(final Class seedDefinitionsResourceClass) throws IOException { + private YamlSeedConfigPersistence(final Class seedResourceClass) throws IOException { + final Map sourceDefinitionConfigs = getConfigs(seedResourceClass, SeedType.STANDARD_SOURCE_DEFINITION); + final Map sourceSpecConfigs = getConfigs(seedResourceClass, SeedType.SOURCE_SPEC); + final Map fullSourceDefinitionConfigs = sourceDefinitionConfigs.entrySet().stream() + .collect(Collectors.toMap(Entry::getKey, e -> mergeSpecIntoDefinition(e.getValue(), sourceSpecConfigs))); + + final Map destinationDefinitionConfigs = getConfigs(seedResourceClass, SeedType.STANDARD_DESTINATION_DEFINITION); + final Map destinationSpecConfigs = getConfigs(seedResourceClass, SeedType.DESTINATION_SPEC); + final Map fullDestinationDefinitionConfigs = destinationDefinitionConfigs.entrySet().stream() + .collect(Collectors.toMap(Entry::getKey, e -> mergeSpecIntoDefinition(e.getValue(), destinationSpecConfigs))); + this.allSeedConfigs = ImmutableMap.>builder() - .put(SeedType.STANDARD_SOURCE_DEFINITION, getConfigs(seedDefinitionsResourceClass, SeedType.STANDARD_SOURCE_DEFINITION)) - .put(SeedType.STANDARD_DESTINATION_DEFINITION, getConfigs(seedDefinitionsResourceClass, SeedType.STANDARD_DESTINATION_DEFINITION)) - .build(); + .put(SeedType.STANDARD_SOURCE_DEFINITION, fullSourceDefinitionConfigs) + .put(SeedType.STANDARD_DESTINATION_DEFINITION, fullDestinationDefinitionConfigs).build(); + } + + /** + * Merges the corresponding spec JSON into the definition JSON. This is necessary because specs are + * stored in a separate resource file from definitions. + * + * @param definitionJson JSON of connector definition that is missing a spec + * @param specConfigs map of docker image to JSON of docker image/connector spec pair + * @return JSON of connector definition including the connector spec + */ + private JsonNode mergeSpecIntoDefinition(final JsonNode definitionJson, final Map specConfigs) { + final String dockerImage = DockerUtils.getTaggedImageName( + definitionJson.get("dockerRepository").asText(), + definitionJson.get("dockerImageTag").asText()); + final JsonNode specConfigJson = specConfigs.get(dockerImage); + if (specConfigJson == null || specConfigJson.get("spec") == null) { + throw new UnsupportedOperationException(String.format("There is no seed spec for docker image %s", dockerImage)); + } + ((ObjectNode) definitionJson).set("spec", specConfigJson.get("spec")); + return definitionJson; } @SuppressWarnings("UnstableApiUsage") diff --git a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml new file mode 100644 index 0000000000000..a07a77ba31e3e --- /dev/null +++ b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml @@ -0,0 +1,2838 @@ +# This file is generated by io.airbyte.config.specs.SeedConnectorSpecGenerator. +# Do NOT edit this file directly. See generator class for more details. +--- +- dockerImage: "airbyte/destination-azure-blob-storage:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/azureblobstorage" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "AzureBlobStorage Destination Spec" + type: "object" + required: + - "azure_blob_storage_account_name" + - "azure_blob_storage_account_key" + - "format" + additionalProperties: false + properties: + azure_blob_storage_endpoint_domain_name: + title: "Endpoint Domain Name" + type: "string" + default: "blob.core.windows.net" + description: "This is Azure Blob Storage endpoint domain name. Leave default\ + \ value (or leave it empty if run container from command line) to use\ + \ Microsoft native from example." + examples: + - "blob.core.windows.net" + azure_blob_storage_container_name: + title: "Azure blob storage container (Bucket) Name" + type: "string" + description: "The name of the Azure blob storage container. If not exists\ + \ - will be created automatically. May be empty, then will be created\ + \ automatically airbytecontainer+timestamp" + examples: + - "airbytetescontainername" + azure_blob_storage_account_name: + title: "Azure Blob Storage account name" + type: "string" + description: "The account's name of the Azure Blob Storage." + examples: + - "airbyte5storage" + azure_blob_storage_account_key: + description: "The Azure blob storage account key." + airbyte_secret: true + type: "string" + examples: + - "Z8ZkZpteggFx394vm+PJHnGTvdRncaYS+JhLKdj789YNmD+iyGTnG+PV+POiuYNhBg/ACS+LKjd%4FG3FHGN12Nd==" + format: + title: "Output Format" + type: "object" + description: "Output data format" + oneOf: + - title: "CSV: Comma-Separated Values" + required: + - "format_type" + - "flattening" + properties: + format_type: + type: "string" + const: "CSV" + flattening: + type: "string" + title: "Normalization (Flattening)" + description: "Whether the input json data should be normalized (flattened)\ + \ in the output CSV. Please refer to docs for details." + default: "No flattening" + enum: + - "No flattening" + - "Root level flattening" + - title: "JSON Lines: newline-delimited JSON" + required: + - "format_type" + properties: + format_type: + type: "string" + const: "JSONL" + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "overwrite" + - "append" +- dockerImage: "airbyte/destination-bigquery:0.5.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/bigquery" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "BigQuery Destination Spec" + type: "object" + required: + - "project_id" + - "dataset_id" + additionalProperties: true + properties: + big_query_client_buffer_size_mb: + title: "Google BigQuery client chunk size" + description: "Google BigQuery client's chunk(buffer) size (MIN=1, MAX =\ + \ 15) for each table. The default 15MiB value is used if not set explicitly.\ + \ It's recommended to decrease value for big data sets migration for less\ + \ HEAP memory consumption and avoiding crashes. For more details refer\ + \ to https://googleapis.dev/python/bigquery/latest/generated/google.cloud.bigquery.client.Client.html" + type: "integer" + minimum: 1 + maximum: 15 + default: 15 + examples: + - "15" + project_id: + type: "string" + description: "The GCP project ID for the project containing the target BigQuery\ + \ dataset." + title: "Project ID" + dataset_id: + type: "string" + description: "Default BigQuery Dataset ID tables are replicated to if the\ + \ source does not specify a namespace." + title: "Default Dataset ID" + dataset_location: + type: "string" + description: "The location of the dataset. Warning: Changes made after creation\ + \ will not be applied." + title: "Dataset Location" + default: "US" + enum: + - "US" + - "EU" + - "asia-east1" + - "asia-east2" + - "asia-northeast1" + - "asia-northeast2" + - "asia-northeast3" + - "asia-south1" + - "asia-southeast1" + - "asia-southeast2" + - "australia-southeast1" + - "europe-central1" + - "europe-central2" + - "europe-north1" + - "europe-west1" + - "europe-west2" + - "europe-west3" + - "europe-west4" + - "europe-west5" + - "europe-west6" + - "northamerica-northeast1" + - "southamerica-east1" + - "us-central1" + - "us-east1" + - "us-east4" + - "us-west-1" + - "us-west-2" + - "us-west-3" + - "us-west-4" + credentials_json: + type: "string" + description: "The contents of the JSON service account key. Check out the\ + \ docs if you need help generating this key. Default credentials will\ + \ be used if this field is left empty." + title: "Credentials JSON" + airbyte_secret: true + transformation_priority: + type: "string" + description: "When running custom transformations or Basic normalization,\ + \ running queries on interactive mode can hit BQ limits, choosing batch\ + \ will solve those limitss." + title: "Transformation Query Run Type" + default: "interactive" + enum: + - "interactive" + - "batch" + loading_method: + type: "object" + title: "Loading Method" + description: "Loading method used to send select the way data will be uploaded\ + \ to BigQuery." + oneOf: + - title: "Standard Inserts" + additionalProperties: false + description: "Direct uploading using streams." + required: + - "method" + properties: + method: + type: "string" + const: "Standard" + - title: "GCS Staging" + additionalProperties: false + description: "Writes large batches of records to a file, uploads the file\ + \ to GCS, then uses
COPY INTO table
to upload the file. Recommended\ + \ for large production workloads for better speed and scalability." + required: + - "method" + - "gcs_bucket_name" + - "gcs_bucket_path" + - "credential" + properties: + method: + type: "string" + const: "GCS Staging" + gcs_bucket_name: + title: "GCS Bucket Name" + type: "string" + description: "The name of the GCS bucket." + examples: + - "airbyte_sync" + gcs_bucket_path: + description: "Directory under the GCS bucket where data will be written." + type: "string" + examples: + - "data_sync/test" + keep_files_in_gcs-bucket: + type: "string" + description: "This upload method is supposed to temporary store records\ + \ in GCS bucket. What do you want to do with data in GCS bucket\ + \ when migration has finished?" + title: "GCS tmp files afterward processing" + default: "Delete all tmp files from GCS" + enum: + - "Delete all tmp files from GCS" + - "Keep all tmp files in GCS" + credential: + title: "Credential" + type: "object" + oneOf: + - title: "HMAC key" + required: + - "credential_type" + - "hmac_key_access_id" + - "hmac_key_secret" + properties: + credential_type: + type: "string" + const: "HMAC_KEY" + hmac_key_access_id: + type: "string" + description: "HMAC key access ID. When linked to a service account,\ + \ this ID is 61 characters long; when linked to a user account,\ + \ it is 24 characters long." + title: "HMAC Key Access ID" + airbyte_secret: true + examples: + - "1234567890abcdefghij1234" + hmac_key_secret: + type: "string" + description: "The corresponding secret for the access ID. It\ + \ is a 40-character base-64 encoded string." + title: "HMAC Key Secret" + airbyte_secret: true + examples: + - "1234567890abcdefghij1234567890ABCDEFGHIJ" + supportsIncremental: true + supportsNormalization: true + supportsDBT: true + supported_destination_sync_modes: + - "overwrite" + - "append" + - "append_dedup" +- dockerImage: "airbyte/destination-bigquery-denormalized:0.1.8" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/bigquery" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "BigQuery Denormalized Typed Struct Destination Spec" + type: "object" + required: + - "project_id" + - "dataset_id" + additionalProperties: true + properties: + project_id: + type: "string" + description: "The GCP project ID for the project containing the target BigQuery\ + \ dataset." + title: "Project ID" + dataset_id: + type: "string" + description: "Default BigQuery Dataset ID tables are replicated to if the\ + \ source does not specify a namespace." + title: "Default Dataset ID" + dataset_location: + type: "string" + description: "The location of the dataset. Warning: Changes made after creation\ + \ will not be applied." + title: "Dataset Location" + default: "US" + enum: + - "US" + - "EU" + - "asia-east1" + - "asia-east2" + - "asia-northeast1" + - "asia-northeast2" + - "asia-northeast3" + - "asia-south1" + - "asia-southeast1" + - "asia-southeast2" + - "australia-southeast1" + - "europe-central1" + - "europe-central2" + - "europe-north1" + - "europe-west1" + - "europe-west2" + - "europe-west3" + - "europe-west4" + - "europe-west5" + - "europe-west6" + - "northamerica-northeast1" + - "southamerica-east1" + - "us-central1" + - "us-east1" + - "us-east4" + - "us-west-1" + - "us-west-2" + - "us-west-3" + - "us-west-4" + credentials_json: + type: "string" + description: "The contents of the JSON service account key. Check out the\ + \ docs if you need help generating this key. Default credentials will\ + \ be used if this field is left empty." + title: "Credentials JSON" + airbyte_secret: true + supportsIncremental: true + supportsNormalization: false + supportsDBT: true + supported_destination_sync_modes: + - "overwrite" + - "append" +- dockerImage: "airbyte/destination-keen:0.2.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/keen" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Keen Spec" + type: "object" + required: + - "project_id" + - "api_key" + additionalProperties: false + properties: + project_id: + description: "Keen Project ID" + type: "string" + examples: + - "58b4acc22ba938934e888322e" + api_key: + title: "API Key" + description: "Keen Master API key" + type: "string" + examples: + - "ABCDEFGHIJKLMNOPRSTUWXYZ" + airbyte_secret: true + infer_timestamp: + title: "Infer Timestamp" + description: "Allow connector to guess keen.timestamp value based on the\ + \ streamed data" + type: "boolean" + default: true + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "overwrite" + - "append" +- dockerImage: "airbyte/destination-dynamodb:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/dynamodb" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "DynamoDB Destination Spec" + type: "object" + required: + - "dynamodb_table_name" + - "dynamodb_region" + - "access_key_id" + - "secret_access_key" + additionalProperties: false + properties: + dynamodb_endpoint: + title: "Endpoint" + type: "string" + default: "" + description: "This is your DynamoDB endpoint url.(if you are working with\ + \ AWS DynamoDB, just leave empty)." + examples: + - "http://localhost:9000" + dynamodb_table_name: + title: "DynamoDB Table Name" + type: "string" + description: "The name of the DynamoDB table." + examples: + - "airbyte_sync" + dynamodb_region: + title: "DynamoDB Region" + type: "string" + default: "" + description: "The region of the DynamoDB." + enum: + - "" + - "us-east-1" + - "us-east-2" + - "us-west-1" + - "us-west-2" + - "af-south-1" + - "ap-east-1" + - "ap-south-1" + - "ap-northeast-1" + - "ap-northeast-2" + - "ap-northeast-3" + - "ap-southeast-1" + - "ap-southeast-2" + - "ca-central-1" + - "cn-north-1" + - "cn-northwest-1" + - "eu-central-1" + - "eu-north-1" + - "eu-south-1" + - "eu-west-1" + - "eu-west-2" + - "eu-west-3" + - "sa-east-1" + - "me-south-1" + - "us-gov-east-1" + - "us-gov-west-1" + access_key_id: + type: "string" + description: "The access key id to access the DynamoDB. Airbyte requires\ + \ Read and Write permissions to the DynamoDB." + title: "DynamoDB Key Id" + airbyte_secret: true + examples: + - "A012345678910EXAMPLE" + secret_access_key: + type: "string" + description: "The corresponding secret to the access key id." + title: "DynamoDB Access Key" + airbyte_secret: true + examples: + - "a012345678910ABCDEFGH/AbCdEfGhEXAMPLEKEY" + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "overwrite" + - "append" +- dockerImage: "airbyte/destination-elasticsearch:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/elasticsearch" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Elasticsearch Connection Configuration" + type: "object" + required: + - "endpoint" + additionalProperties: false + properties: + endpoint: + title: "Server Endpoint" + type: "string" + description: "The full url of the Elasticsearch server" + upsert: + type: "boolean" + title: "Upsert Records" + description: "If a primary key identifier is defined in the source, an upsert\ + \ will be performed using the primary key value as the elasticsearch doc\ + \ id. Does not support composite primary keys." + default: true + authenticationMethod: + title: "Authentication Method" + type: "object" + description: "The type of authentication to be used" + oneOf: + - title: "None" + additionalProperties: false + description: "No authentication will be used" + required: + - "method" + properties: + method: + type: "string" + const: "none" + - title: "Api Key/Secret" + additionalProperties: false + description: "Use a api key and secret combination to authenticate" + required: + - "method" + - "apiKeyId" + - "apiKeySecret" + properties: + method: + type: "string" + const: "secret" + apiKeyId: + title: "API Key ID" + description: "The Key ID to used when accessing an enterprise Elasticsearch\ + \ instance." + type: "string" + apiKeySecret: + title: "API Key Secret" + description: "The secret associated with the API Key ID." + type: "string" + airbyte_secret: true + - title: "Username/Password" + additionalProperties: false + description: "Basic auth header with a username and password" + required: + - "method" + - "username" + - "password" + properties: + method: + type: "string" + const: "basic" + username: + title: "Username" + description: "Basic auth username to access a secure Elasticsearch\ + \ server" + type: "string" + password: + title: "Password" + description: "Basic auth password to access a secure Elasticsearch\ + \ server" + type: "string" + airbyte_secret: true + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "overwrite" + - "append" + supportsNamespaces: true +- dockerImage: "airbyte/destination-gcs:0.1.3" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/gcs" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "GCS Destination Spec" + type: "object" + required: + - "gcs_bucket_name" + - "gcs_bucket_path" + - "gcs_bucket_region" + - "credential" + - "format" + additionalProperties: false + properties: + gcs_bucket_name: + title: "GCS Bucket Name" + type: "string" + description: "The name of the GCS bucket." + examples: + - "airbyte_sync" + gcs_bucket_path: + description: "Directory under the GCS bucket where data will be written." + type: "string" + examples: + - "data_sync/test" + gcs_bucket_region: + title: "GCS Bucket Region" + type: "string" + default: "" + description: "The region of the GCS bucket." + enum: + - "" + - "-- North America --" + - "northamerica-northeast1" + - "us-central1" + - "us-east1" + - "us-east4" + - "us-west1" + - "us-west2" + - "us-west3" + - "us-west4" + - "-- South America --" + - "southamerica-east1" + - "-- Europe --" + - "europe-central2" + - "europe-north1" + - "europe-west1" + - "europe-west2" + - "europe-west3" + - "europe-west4" + - "europe-west6" + - "-- Asia --" + - "asia-east1" + - "asia-east2" + - "asia-northeast1" + - "asia-northeast2" + - "asia-northeast3" + - "asia-south1" + - "asia-south2" + - "asia-southeast1" + - "asia-southeast2" + - "-- Australia --" + - "australia-southeast1" + - "australia-southeast2" + - "-- Multi-regions --" + - "asia" + - "eu" + - "us" + - "-- Dual-regions --" + - "asia1" + - "eur4" + - "nam4" + credential: + title: "Credential" + type: "object" + oneOf: + - title: "HMAC key" + required: + - "credential_type" + - "hmac_key_access_id" + - "hmac_key_secret" + properties: + credential_type: + type: "string" + enum: + - "HMAC_KEY" + default: "HMAC_KEY" + hmac_key_access_id: + type: "string" + description: "HMAC key access ID. When linked to a service account,\ + \ this ID is 61 characters long; when linked to a user account,\ + \ it is 24 characters long." + title: "HMAC Key Access ID" + airbyte_secret: true + examples: + - "1234567890abcdefghij1234" + hmac_key_secret: + type: "string" + description: "The corresponding secret for the access ID. It is a\ + \ 40-character base-64 encoded string." + title: "HMAC Key Secret" + airbyte_secret: true + examples: + - "1234567890abcdefghij1234567890ABCDEFGHIJ" + format: + title: "Output Format" + type: "object" + description: "Output data format" + oneOf: + - title: "Avro: Apache Avro" + required: + - "format_type" + - "compression_codec" + properties: + format_type: + type: "string" + enum: + - "Avro" + default: "Avro" + compression_codec: + title: "Compression Codec" + description: "The compression algorithm used to compress data. Default\ + \ to no compression." + type: "object" + oneOf: + - title: "no compression" + required: + - "codec" + properties: + codec: + type: "string" + enum: + - "no compression" + default: "no compression" + - title: "Deflate" + required: + - "codec" + - "compression_level" + properties: + codec: + type: "string" + enum: + - "Deflate" + default: "Deflate" + compression_level: + title: "Deflate level" + description: "0: no compression & fastest, 9: best compression\ + \ & slowest." + type: "integer" + default: 0 + minimum: 0 + maximum: 9 + - title: "bzip2" + required: + - "codec" + properties: + codec: + type: "string" + enum: + - "bzip2" + default: "bzip2" + - title: "xz" + required: + - "codec" + - "compression_level" + properties: + codec: + type: "string" + enum: + - "xz" + default: "xz" + compression_level: + title: "Compression level" + description: "See here for details." + type: "integer" + default: 6 + minimum: 0 + maximum: 9 + - title: "zstandard" + required: + - "codec" + - "compression_level" + properties: + codec: + type: "string" + enum: + - "zstandard" + default: "zstandard" + compression_level: + title: "Compression level" + description: "Negative levels are 'fast' modes akin to lz4 or\ + \ snappy, levels above 9 are generally for archival purposes,\ + \ and levels above 18 use a lot of memory." + type: "integer" + default: 3 + minimum: -5 + maximum: 22 + include_checksum: + title: "Include checksum" + description: "If true, include a checksum with each data block." + type: "boolean" + default: false + - title: "snappy" + required: + - "codec" + properties: + codec: + type: "string" + enum: + - "snappy" + default: "snappy" + part_size_mb: + title: "Block Size (MB) for GCS multipart upload" + description: "This is the size of a \"Part\" being buffered in memory.\ + \ It limits the memory usage when writing. Larger values will allow\ + \ to upload a bigger files and improve the speed, but consumes9\ + \ more memory. Allowed values: min=5MB, max=525MB Default: 5MB." + type: "integer" + default: 5 + examples: + - 5 + - title: "CSV: Comma-Separated Values" + required: + - "format_type" + - "flattening" + properties: + format_type: + type: "string" + enum: + - "CSV" + default: "CSV" + flattening: + type: "string" + title: "Normalization (Flattening)" + description: "Whether the input json data should be normalized (flattened)\ + \ in the output CSV. Please refer to docs for details." + default: "No flattening" + enum: + - "No flattening" + - "Root level flattening" + part_size_mb: + title: "Block Size (MB) for GCS multipart upload" + description: "This is the size of a \"Part\" being buffered in memory.\ + \ It limits the memory usage when writing. Larger values will allow\ + \ to upload a bigger files and improve the speed, but consumes9\ + \ more memory. Allowed values: min=5MB, max=525MB Default: 5MB." + type: "integer" + default: 5 + examples: + - 5 + - title: "JSON Lines: newline-delimited JSON" + required: + - "format_type" + properties: + format_type: + type: "string" + enum: + - "JSONL" + default: "JSONL" + part_size_mb: + title: "Block Size (MB) for GCS multipart upload" + description: "This is the size of a \"Part\" being buffered in memory.\ + \ It limits the memory usage when writing. Larger values will allow\ + \ to upload a bigger files and improve the speed, but consumes9\ + \ more memory. Allowed values: min=5MB, max=525MB Default: 5MB." + type: "integer" + default: 5 + examples: + - 5 + - title: "Parquet: Columnar Storage" + required: + - "format_type" + properties: + format_type: + type: "string" + enum: + - "Parquet" + default: "Parquet" + compression_codec: + title: "Compression Codec" + description: "The compression algorithm used to compress data pages." + type: "string" + enum: + - "UNCOMPRESSED" + - "SNAPPY" + - "GZIP" + - "LZO" + - "BROTLI" + - "LZ4" + - "ZSTD" + default: "UNCOMPRESSED" + block_size_mb: + title: "Block Size (Row Group Size) (MB)" + description: "This is the size of a row group being buffered in memory.\ + \ It limits the memory usage when writing. Larger values will improve\ + \ the IO when reading, but consume more memory when writing. Default:\ + \ 128 MB." + type: "integer" + default: 128 + examples: + - 128 + max_padding_size_mb: + title: "Max Padding Size (MB)" + description: "Maximum size allowed as padding to align row groups.\ + \ This is also the minimum size of a row group. Default: 8 MB." + type: "integer" + default: 8 + examples: + - 8 + page_size_kb: + title: "Page Size (KB)" + description: "The page size is for compression. A block is composed\ + \ of pages. A page is the smallest unit that must be read fully\ + \ to access a single record. If this value is too small, the compression\ + \ will deteriorate. Default: 1024 KB." + type: "integer" + default: 1024 + examples: + - 1024 + dictionary_page_size_kb: + title: "Dictionary Page Size (KB)" + description: "There is one dictionary page per column per row group\ + \ when dictionary encoding is used. The dictionary page size works\ + \ like the page size but for dictionary. Default: 1024 KB." + type: "integer" + default: 1024 + examples: + - 1024 + dictionary_encoding: + title: "Dictionary Encoding" + description: "Default: true." + type: "boolean" + default: true + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "overwrite" + - "append" + $schema: "http://json-schema.org/draft-07/schema#" +- dockerImage: "airbyte/destination-pubsub:0.1.1" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/pubsub" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Google PubSub Destination Spec" + type: "object" + required: + - "project_id" + - "topic_id" + - "credentials_json" + additionalProperties: true + properties: + project_id: + type: "string" + description: "The GCP project ID for the project containing the target PubSub" + title: "Project ID" + topic_id: + type: "string" + description: "PubSub topic ID in the given GCP project ID" + title: "PubSub Topic ID" + credentials_json: + type: "string" + description: "The contents of the JSON service account key. Check out the\ + \ docs if you need help generating this key." + title: "Credentials JSON" + airbyte_secret: true + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "append" +- dockerImage: "airbyte/destination-kafka:0.1.2" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/kafka" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Kafka Destination Spec" + type: "object" + required: + - "bootstrap_servers" + - "topic_pattern" + - "protocol" + - "acks" + - "enable_idempotence" + - "compression_type" + - "batch_size" + - "linger_ms" + - "max_in_flight_requests_per_connection" + - "client_dns_lookup" + - "buffer_memory" + - "max_request_size" + - "retries" + - "socket_connection_setup_timeout_ms" + - "socket_connection_setup_timeout_max_ms" + - "max_block_ms" + - "request_timeout_ms" + - "delivery_timeout_ms" + - "send_buffer_bytes" + - "receive_buffer_bytes" + additionalProperties: true + properties: + bootstrap_servers: + title: "Bootstrap servers" + description: "A list of host/port pairs to use for establishing the initial\ + \ connection to the Kafka cluster. The client will make use of all servers\ + \ irrespective of which servers are specified here for bootstrapping—this\ + \ list only impacts the initial hosts used to discover the full set of\ + \ servers. This list should be in the form host1:port1,host2:port2,....\ + \ Since these servers are just used for the initial connection to discover\ + \ the full cluster membership (which may change dynamically), this list\ + \ need not contain the full set of servers (you may want more than one,\ + \ though, in case a server is down)." + type: "string" + examples: + - "kafka-broker1:9092,kafka-broker2:9092" + topic_pattern: + title: "Topic pattern" + description: "Topic pattern in which the records will be sent. You can use\ + \ patterns like '{namespace}' and/or '{stream}' to send the message to\ + \ a specific topic based on these values. Notice that the topic name will\ + \ be transformed to a standard naming convention." + type: "string" + examples: + - "sample.topic" + - "{namespace}.{stream}.sample" + test_topic: + title: "Test topic" + description: "Topic to test if Airbyte can produce messages." + type: "string" + examples: + - "test.topic" + sync_producer: + title: "Sync producer" + description: "Wait synchronously until the record has been sent to Kafka." + type: "boolean" + default: false + protocol: + title: "Protocol" + type: "object" + description: "Protocol used to communicate with brokers." + oneOf: + - title: "PLAINTEXT" + required: + - "security_protocol" + properties: + security_protocol: + type: "string" + enum: + - "PLAINTEXT" + default: "PLAINTEXT" + - title: "SASL PLAINTEXT" + required: + - "security_protocol" + - "sasl_mechanism" + - "sasl_jaas_config" + properties: + security_protocol: + type: "string" + enum: + - "SASL_PLAINTEXT" + default: "SASL_PLAINTEXT" + sasl_mechanism: + title: "SASL mechanism" + description: "SASL mechanism used for client connections. This may\ + \ be any mechanism for which a security provider is available." + type: "string" + default: "PLAIN" + enum: + - "PLAIN" + sasl_jaas_config: + title: "SASL JAAS config" + description: "JAAS login context parameters for SASL connections in\ + \ the format used by JAAS configuration files." + type: "string" + default: "" + airbyte_secret: true + - title: "SASL SSL" + required: + - "security_protocol" + - "sasl_mechanism" + - "sasl_jaas_config" + properties: + security_protocol: + type: "string" + enum: + - "SASL_SSL" + default: "SASL_SSL" + sasl_mechanism: + title: "SASL mechanism" + description: "SASL mechanism used for client connections. This may\ + \ be any mechanism for which a security provider is available." + type: "string" + default: "GSSAPI" + enum: + - "GSSAPI" + - "OAUTHBEARER" + - "SCRAM-SHA-256" + sasl_jaas_config: + title: "SASL JAAS config" + description: "JAAS login context parameters for SASL connections in\ + \ the format used by JAAS configuration files." + type: "string" + default: "" + airbyte_secret: true + client_id: + title: "Client ID" + description: "An id string to pass to the server when making requests. The\ + \ purpose of this is to be able to track the source of requests beyond\ + \ just ip/port by allowing a logical application name to be included in\ + \ server-side request logging." + type: "string" + examples: + - "airbyte-producer" + acks: + title: "ACKs" + description: "The number of acknowledgments the producer requires the leader\ + \ to have received before considering a request complete. This controls\ + \ the durability of records that are sent." + type: "string" + default: "1" + enum: + - "0" + - "1" + - "all" + enable_idempotence: + title: "Enable idempotence" + description: "When set to 'true', the producer will ensure that exactly\ + \ one copy of each message is written in the stream. If 'false', producer\ + \ retries due to broker failures, etc., may write duplicates of the retried\ + \ message in the stream." + type: "boolean" + default: false + compression_type: + title: "Compression type" + description: "The compression type for all data generated by the producer." + type: "string" + default: "none" + enum: + - "none" + - "gzip" + - "snappy" + - "lz4" + - "zstd" + batch_size: + title: "Batch size" + description: "The producer will attempt to batch records together into fewer\ + \ requests whenever multiple records are being sent to the same partition." + type: "integer" + examples: + - 16384 + linger_ms: + title: "Linger ms" + description: "The producer groups together any records that arrive in between\ + \ request transmissions into a single batched request." + type: "string" + examples: + - 0 + max_in_flight_requests_per_connection: + title: "Max in flight requests per connection" + description: "The maximum number of unacknowledged requests the client will\ + \ send on a single connection before blocking." + type: "integer" + examples: + - 5 + client_dns_lookup: + title: "Client DNS lookup" + description: "Controls how the client uses DNS lookups. If set to use_all_dns_ips,\ + \ connect to each returned IP address in sequence until a successful connection\ + \ is established. After a disconnection, the next IP is used. Once all\ + \ IPs have been used once, the client resolves the IP(s) from the hostname\ + \ again. If set to resolve_canonical_bootstrap_servers_only, resolve each\ + \ bootstrap address into a list of canonical names. After the bootstrap\ + \ phase, this behaves the same as use_all_dns_ips. If set to default (deprecated),\ + \ attempt to connect to the first IP address returned by the lookup, even\ + \ if the lookup returns multiple IP addresses." + type: "string" + default: "use_all_dns_ips" + enum: + - "default" + - "use_all_dns_ips" + - "resolve_canonical_bootstrap_servers_only" + - "use_all_dns_ips" + buffer_memory: + title: "Buffer memory" + description: "The total bytes of memory the producer can use to buffer records\ + \ waiting to be sent to the server." + type: "string" + examples: 33554432 + max_request_size: + title: "Max request size" + description: "The maximum size of a request in bytes." + type: "integer" + examples: + - 1048576 + retries: + title: "Retries" + description: "Setting a value greater than zero will cause the client to\ + \ resend any record whose send fails with a potentially transient error." + type: "integer" + examples: + - 2147483647 + socket_connection_setup_timeout_ms: + title: "Socket connection setup timeout" + description: "The amount of time the client will wait for the socket connection\ + \ to be established." + type: "string" + examples: + - 10000 + socket_connection_setup_timeout_max_ms: + title: "Socket connection setup max timeout" + description: "The maximum amount of time the client will wait for the socket\ + \ connection to be established. The connection setup timeout will increase\ + \ exponentially for each consecutive connection failure up to this maximum." + type: "string" + examples: + - 30000 + max_block_ms: + title: "Max block ms" + description: "The configuration controls how long the KafkaProducer's send(),\ + \ partitionsFor(), initTransactions(), sendOffsetsToTransaction(), commitTransaction()\ + \ and abortTransaction() methods will block." + type: "string" + examples: + - 60000 + request_timeout_ms: + title: "Request timeout" + description: "The configuration controls the maximum amount of time the\ + \ client will wait for the response of a request. If the response is not\ + \ received before the timeout elapses the client will resend the request\ + \ if necessary or fail the request if retries are exhausted." + type: "integer" + examples: + - 30000 + delivery_timeout_ms: + title: "Delivery timeout" + description: "An upper bound on the time to report success or failure after\ + \ a call to 'send()' returns." + type: "integer" + examples: + - 120000 + send_buffer_bytes: + title: "Send buffer bytes" + description: "The size of the TCP send buffer (SO_SNDBUF) to use when sending\ + \ data. If the value is -1, the OS default will be used." + type: "integer" + examples: + - 131072 + receive_buffer_bytes: + title: "Receive buffer bytes" + description: "The size of the TCP receive buffer (SO_RCVBUF) to use when\ + \ reading data. If the value is -1, the OS default will be used." + type: "integer" + examples: + - 32768 + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "append" +- dockerImage: "airbyte/destination-csv:0.2.8" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/local-csv" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "CSV Destination Spec" + type: "object" + required: + - "destination_path" + additionalProperties: false + properties: + destination_path: + description: "Path to the directory where csv files will be written. The\ + \ destination uses the local mount \"/local\" and any data files will\ + \ be placed inside that local mount. For more information check out our\ + \ docs" + type: "string" + examples: + - "/local" + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "overwrite" + - "append" +- dockerImage: "airbyte/destination-local-json:0.2.8" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/local-json" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Local Json Destination Spec" + type: "object" + required: + - "destination_path" + additionalProperties: false + properties: + destination_path: + description: "Path to the directory where json files will be written. The\ + \ files will be placed inside that local mount. For more information check\ + \ out our docs" + type: "string" + examples: + - "/json_data" + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "overwrite" + - "append" +- dockerImage: "airbyte/destination-mssql:0.1.10" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/mssql" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "MS SQL Server Destination Spec" + type: "object" + required: + - "host" + - "port" + - "username" + - "database" + - "schema" + additionalProperties: true + properties: + host: + title: "Host" + description: "Hostname of the database." + type: "string" + order: 0 + port: + title: "Port" + description: "Port of the database." + type: "integer" + minimum: 0 + maximum: 65536 + default: 1433 + examples: + - "1433" + order: 1 + database: + title: "DB Name" + description: "Name of the database." + type: "string" + order: 2 + schema: + title: "Default Schema" + description: "The default schema tables are written to if the source does\ + \ not specify a namespace. The usual value for this field is \"public\"\ + ." + type: "string" + examples: + - "public" + default: "public" + order: 3 + username: + title: "User" + description: "Username to use to access the database." + type: "string" + order: 4 + password: + title: "Password" + description: "Password associated with the username." + type: "string" + airbyte_secret: true + order: 5 + ssl_method: + title: "SSL Method" + type: "object" + description: "Encryption method to use when communicating with the database" + order: 6 + oneOf: + - title: "Unencrypted" + additionalProperties: false + description: "Data transfer will not be encrypted." + required: + - "ssl_method" + type: "object" + properties: + ssl_method: + type: "string" + enum: + - "unencrypted" + default: "unencrypted" + - title: "Encrypted (trust server certificate)" + additionalProperties: false + description: "Use the cert provided by the server without verification.\ + \ (For testing purposes only!)" + required: + - "ssl_method" + type: "object" + properties: + ssl_method: + type: "string" + enum: + - "encrypted_trust_server_certificate" + default: "encrypted_trust_server_certificate" + - title: "Encrypted (verify certificate)" + additionalProperties: false + description: "Verify and use the cert provided by the server." + required: + - "ssl_method" + - "trustStoreName" + - "trustStorePassword" + type: "object" + properties: + ssl_method: + type: "string" + enum: + - "encrypted_verify_certificate" + default: "encrypted_verify_certificate" + hostNameInCertificate: + title: "Host Name In Certificate" + type: "string" + description: "Specifies the host name of the server. The value of\ + \ this property must match the subject property of the certificate." + order: 7 + tunnel_method: + type: "object" + title: "SSH Tunnel Method" + description: "Whether to initiate an SSH tunnel before connecting to the\ + \ database, and if so, which kind of authentication to use." + oneOf: + - title: "No Tunnel" + required: + - "tunnel_method" + properties: + tunnel_method: + description: "No ssh tunnel needed to connect to database" + type: "string" + const: "NO_TUNNEL" + order: 0 + - title: "SSH Key Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "ssh_key" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and ssh key" + type: "string" + const: "SSH_KEY_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host." + type: "string" + order: 3 + ssh_key: + title: "SSH Private Key" + description: "OS-level user account ssh key credentials in RSA PEM\ + \ format ( created with ssh-keygen -t rsa -m PEM -f myuser_rsa )" + type: "string" + airbyte_secret: true + multiline: true + order: 4 + - title: "Password Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "tunnel_user_password" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and password authentication" + type: "string" + const: "SSH_PASSWORD_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host" + type: "string" + order: 3 + tunnel_user_password: + title: "Password" + description: "OS-level password for logging into the jump server host" + type: "string" + airbyte_secret: true + order: 4 + supportsIncremental: true + supportsNormalization: true + supportsDBT: true + supported_destination_sync_modes: + - "overwrite" + - "append" + - "append_dedup" +- dockerImage: "airbyte/destination-meilisearch:0.2.10" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/meilisearch" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "MeiliSearch Destination Spec" + type: "object" + required: + - "host" + additionalProperties: true + properties: + host: + title: "Host" + description: "Hostname of the MeiliSearch instance" + type: "string" + order: 0 + api_key: + title: "API Key" + airbyte_secret: true + description: "MeiliSearch instance API Key" + type: "string" + order: 1 + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "overwrite" + - "append" +- dockerImage: "airbyte/destination-mongodb:0.1.2" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/mongodb" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "MongoDB Destination Spec" + type: "object" + required: + - "database" + - "auth_type" + additionalProperties: true + properties: + instance_type: + description: "MongoDb instance to connect to. For MongoDB Atlas and Replica\ + \ Set TLS connection is used by default." + title: "MongoDb instance type" + type: "object" + order: 0 + oneOf: + - title: "Standalone MongoDb Instance" + required: + - "instance" + - "host" + - "port" + properties: + instance: + type: "string" + enum: + - "standalone" + default: "standalone" + host: + title: "Host" + type: "string" + description: "Host of a Mongo database to be replicated." + order: 0 + port: + title: "Port" + type: "integer" + description: "Port of a Mongo database to be replicated." + minimum: 0 + maximum: 65536 + default: 27017 + examples: + - "27017" + order: 1 + tls: + title: "TLS connection" + type: "boolean" + description: "Indicates whether TLS encryption protocol will be used\ + \ to connect to MongoDB. It is recommended to use TLS connection\ + \ if possible. For more information see documentation." + default: false + order: 2 + - title: "Replica Set" + required: + - "instance" + - "server_addresses" + properties: + instance: + type: "string" + enum: + - "replica" + default: "replica" + server_addresses: + title: "Server addresses" + type: "string" + description: "The members of a replica set. Please specify `host`:`port`\ + \ of each member seperated by comma." + examples: + - "host1:27017,host2:27017,host3:27017" + order: 0 + replica_set: + title: "Replica Set" + type: "string" + description: "A replica set name." + order: 1 + - title: "MongoDB Atlas" + additionalProperties: false + required: + - "instance" + - "cluster_url" + properties: + instance: + type: "string" + enum: + - "atlas" + default: "atlas" + cluster_url: + title: "Cluster URL" + type: "string" + description: "URL of a cluster to connect to." + order: 0 + database: + title: "DB Name" + description: "Name of the database." + type: "string" + order: 2 + auth_type: + title: "Authorization type" + type: "object" + description: "Authorization type." + oneOf: + - title: "None" + additionalProperties: false + description: "None." + required: + - "authorization" + type: "object" + properties: + authorization: + type: "string" + const: "none" + - title: "Login/Password" + additionalProperties: false + description: "Login/Password." + required: + - "authorization" + - "username" + - "password" + type: "object" + properties: + authorization: + type: "string" + const: "login/password" + username: + title: "User" + description: "Username to use to access the database." + type: "string" + order: 1 + password: + title: "Password" + description: "Password associated with the username." + type: "string" + airbyte_secret: true + order: 2 + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "overwrite" + - "append" +- dockerImage: "airbyte/destination-mysql:0.1.13" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/mysql" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "MySQL Destination Spec" + type: "object" + required: + - "host" + - "port" + - "username" + - "database" + additionalProperties: true + properties: + host: + title: "Host" + description: "Hostname of the database." + type: "string" + order: 0 + port: + title: "Port" + description: "Port of the database." + type: "integer" + minimum: 0 + maximum: 65536 + default: 3306 + examples: + - "3306" + order: 1 + database: + title: "DB Name" + description: "Name of the database." + type: "string" + order: 2 + username: + title: "User" + description: "Username to use to access the database." + type: "string" + order: 3 + password: + title: "Password" + description: "Password associated with the username." + type: "string" + airbyte_secret: true + order: 4 + ssl: + title: "SSL Connection" + description: "Encrypt data using SSL." + type: "boolean" + default: true + order: 5 + tunnel_method: + type: "object" + title: "SSH Tunnel Method" + description: "Whether to initiate an SSH tunnel before connecting to the\ + \ database, and if so, which kind of authentication to use." + oneOf: + - title: "No Tunnel" + required: + - "tunnel_method" + properties: + tunnel_method: + description: "No ssh tunnel needed to connect to database" + type: "string" + const: "NO_TUNNEL" + order: 0 + - title: "SSH Key Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "ssh_key" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and ssh key" + type: "string" + const: "SSH_KEY_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host." + type: "string" + order: 3 + ssh_key: + title: "SSH Private Key" + description: "OS-level user account ssh key credentials for logging\ + \ into the jump server host." + type: "string" + airbyte_secret: true + multiline: true + order: 4 + - title: "Password Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "tunnel_user_password" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and password authentication" + type: "string" + const: "SSH_PASSWORD_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host" + type: "string" + order: 3 + tunnel_user_password: + title: "Password" + description: "OS-level password for logging into the jump server host" + type: "string" + airbyte_secret: true + order: 4 + supportsIncremental: true + supportsNormalization: true + supportsDBT: true + supported_destination_sync_modes: + - "overwrite" + - "append" +- dockerImage: "airbyte/destination-oracle:0.1.11" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/oracle" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Oracle Destination Spec" + type: "object" + required: + - "host" + - "port" + - "username" + - "sid" + additionalProperties: true + properties: + host: + title: "Host" + description: "Hostname of the database." + type: "string" + order: 0 + port: + title: "Port" + description: "Port of the database." + type: "integer" + minimum: 0 + maximum: 65536 + default: 1521 + examples: + - "1521" + order: 1 + sid: + title: "SID" + description: "SID" + type: "string" + order: 2 + username: + title: "User" + description: "Username to use to access the database. This user must have\ + \ CREATE USER privileges in the database." + type: "string" + order: 3 + password: + title: "Password" + description: "Password associated with the username." + type: "string" + airbyte_secret: true + order: 4 + schema: + title: "Default Schema" + description: "The default schema tables are written to if the source does\ + \ not specify a namespace. The usual value for this field is \"airbyte\"\ + . In Oracle, schemas and users are the same thing, so the \"user\" parameter\ + \ is used as the login credentials and this is used for the default Airbyte\ + \ message schema." + type: "string" + examples: + - "airbyte" + default: "airbyte" + order: 5 + encryption: + title: "Encryption" + type: "object" + description: "Encryption method to use when communicating with the database" + order: 6 + oneOf: + - title: "Unencrypted" + additionalProperties: false + description: "Data transfer will not be encrypted." + required: + - "encryption_method" + properties: + encryption_method: + type: "string" + const: "unencrypted" + enum: + - "unencrypted" + default: "unencrypted" + - title: "Native Network Ecryption (NNE)" + additionalProperties: false + description: "Native network encryption gives you the ability to encrypt\ + \ database connections, without the configuration overhead of TCP/IP\ + \ and SSL/TLS and without the need to open and listen on different ports." + required: + - "encryption_method" + properties: + encryption_method: + type: "string" + const: "client_nne" + enum: + - "client_nne" + default: "client_nne" + encryption_algorithm: + type: "string" + description: "This parameter defines the encryption algorithm to be\ + \ used" + title: "Encryption Algorithm" + default: "AES256" + enum: + - "AES256" + - "RC4_56" + - "3DES168" + - title: "TLS Encrypted (verify certificate)" + additionalProperties: false + description: "Verify and use the cert provided by the server." + required: + - "encryption_method" + - "ssl_certificate" + properties: + encryption_method: + type: "string" + const: "encrypted_verify_certificate" + enum: + - "encrypted_verify_certificate" + default: "encrypted_verify_certificate" + ssl_certificate: + title: "SSL PEM file" + description: "Privacy Enhanced Mail (PEM) files are concatenated certificate\ + \ containers frequently used in certificate installations" + type: "string" + airbyte_secret: true + multiline: true + tunnel_method: + type: "object" + title: "SSH Tunnel Method" + description: "Whether to initiate an SSH tunnel before connecting to the\ + \ database, and if so, which kind of authentication to use." + oneOf: + - title: "No Tunnel" + required: + - "tunnel_method" + properties: + tunnel_method: + description: "No ssh tunnel needed to connect to database" + type: "string" + const: "NO_TUNNEL" + order: 0 + - title: "SSH Key Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "ssh_key" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and ssh key" + type: "string" + const: "SSH_KEY_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host." + type: "string" + order: 3 + ssh_key: + title: "SSH Private Key" + description: "OS-level user account ssh key credentials in RSA PEM\ + \ format ( created with ssh-keygen -t rsa -m PEM -f myuser_rsa )" + type: "string" + airbyte_secret: true + multiline: true + order: 4 + - title: "Password Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "tunnel_user_password" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and password authentication" + type: "string" + const: "SSH_PASSWORD_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host" + type: "string" + order: 3 + tunnel_user_password: + title: "Password" + description: "OS-level password for logging into the jump server host" + type: "string" + airbyte_secret: true + order: 4 + supportsIncremental: true + supportsNormalization: false + supportsDBT: true + supported_destination_sync_modes: + - "overwrite" + - "append" +- dockerImage: "airbyte/destination-postgres:0.3.11" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/postgres" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Postgres Destination Spec" + type: "object" + required: + - "host" + - "port" + - "username" + - "database" + - "schema" + additionalProperties: true + properties: + host: + title: "Host" + description: "Hostname of the database." + type: "string" + order: 0 + port: + title: "Port" + description: "Port of the database." + type: "integer" + minimum: 0 + maximum: 65536 + default: 5432 + examples: + - "5432" + order: 1 + database: + title: "DB Name" + description: "Name of the database." + type: "string" + order: 2 + schema: + title: "Default Schema" + description: "The default schema tables are written to if the source does\ + \ not specify a namespace. The usual value for this field is \"public\"\ + ." + type: "string" + examples: + - "public" + default: "public" + order: 3 + username: + title: "User" + description: "Username to use to access the database." + type: "string" + order: 4 + password: + title: "Password" + description: "Password associated with the username." + type: "string" + airbyte_secret: true + order: 5 + ssl: + title: "SSL Connection" + description: "Encrypt data using SSL." + type: "boolean" + default: false + order: 6 + tunnel_method: + type: "object" + title: "SSH Tunnel Method" + description: "Whether to initiate an SSH tunnel before connecting to the\ + \ database, and if so, which kind of authentication to use." + oneOf: + - title: "No Tunnel" + required: + - "tunnel_method" + properties: + tunnel_method: + description: "No ssh tunnel needed to connect to database" + type: "string" + const: "NO_TUNNEL" + order: 0 + - title: "SSH Key Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "ssh_key" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and ssh key" + type: "string" + const: "SSH_KEY_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host." + type: "string" + order: 3 + ssh_key: + title: "SSH Private Key" + description: "OS-level user account ssh key credentials for logging\ + \ into the jump server host." + type: "string" + airbyte_secret: true + multiline: true + order: 4 + - title: "Password Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "tunnel_user_password" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and password authentication" + type: "string" + const: "SSH_PASSWORD_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host" + type: "string" + order: 3 + tunnel_user_password: + title: "Password" + description: "OS-level password for logging into the jump server host" + type: "string" + airbyte_secret: true + order: 4 + supportsIncremental: true + supportsNormalization: true + supportsDBT: true + supported_destination_sync_modes: + - "overwrite" + - "append" + - "append_dedup" +- dockerImage: "airbyte/destination-redshift:0.3.19" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/redshift" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Redshift Destination Spec" + type: "object" + required: + - "host" + - "port" + - "database" + - "username" + - "password" + - "schema" + additionalProperties: true + properties: + host: + description: "Host Endpoint of the Redshift Cluster (must include the cluster-id,\ + \ region and end with .redshift.amazonaws.com)" + type: "string" + title: "Host" + port: + description: "Port of the database." + type: "integer" + minimum: 0 + maximum: 65536 + default: 5439 + examples: + - "5439" + title: "Port" + username: + description: "Username to use to access the database." + type: "string" + title: "Username" + password: + description: "Password associated with the username." + type: "string" + airbyte_secret: true + title: "Password" + database: + description: "Name of the database." + type: "string" + title: "Database" + schema: + description: "The default schema tables are written to if the source does\ + \ not specify a namespace. Unless specifically configured, the usual value\ + \ for this field is \"public\"." + type: "string" + examples: + - "public" + default: "public" + title: "Default Schema" + s3_bucket_name: + title: "S3 Bucket Name" + type: "string" + description: "The name of the staging S3 bucket to use if utilising a COPY\ + \ strategy. COPY is recommended for production workloads for better speed\ + \ and scalability. See AWS docs for more details." + examples: + - "airbyte.staging" + s3_bucket_region: + title: "S3 Bucket Region" + type: "string" + default: "" + description: "The region of the S3 staging bucket to use if utilising a\ + \ copy strategy." + enum: + - "" + - "us-east-1" + - "us-east-2" + - "us-west-1" + - "us-west-2" + - "af-south-1" + - "ap-east-1" + - "ap-south-1" + - "ap-northeast-1" + - "ap-northeast-2" + - "ap-northeast-3" + - "ap-southeast-1" + - "ap-southeast-2" + - "ca-central-1" + - "cn-north-1" + - "cn-northwest-1" + - "eu-central-1" + - "eu-north-1" + - "eu-south-1" + - "eu-west-1" + - "eu-west-2" + - "eu-west-3" + - "sa-east-1" + - "me-south-1" + access_key_id: + type: "string" + description: "The Access Key Id granting allow one to access the above S3\ + \ staging bucket. Airbyte requires Read and Write permissions to the given\ + \ bucket." + title: "S3 Key Id" + airbyte_secret: true + secret_access_key: + type: "string" + description: "The corresponding secret to the above access key id." + title: "S3 Access Key" + airbyte_secret: true + part_size: + type: "integer" + minimum: 10 + maximum: 100 + examples: + - "10" + description: "Optional. Increase this if syncing tables larger than 100GB.\ + \ Only relevant for COPY. Files are streamed to S3 in parts. This determines\ + \ the size of each part, in MBs. As S3 has a limit of 10,000 parts per\ + \ file, part size affects the table size. This is 10MB by default, resulting\ + \ in a default limit of 100GB tables. Note, a larger part size will result\ + \ in larger memory requirements. A rule of thumb is to multiply the part\ + \ size by 10 to get the memory requirement. Modify this with care." + title: "Stream Part Size" + supportsIncremental: true + supportsNormalization: true + supportsDBT: true + supported_destination_sync_modes: + - "overwrite" + - "append" + - "append_dedup" +- dockerImage: "airbyte/destination-s3:0.1.13" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/s3" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "S3 Destination Spec" + type: "object" + required: + - "s3_bucket_name" + - "s3_bucket_path" + - "s3_bucket_region" + - "access_key_id" + - "secret_access_key" + - "format" + additionalProperties: false + properties: + s3_endpoint: + title: "Endpoint" + type: "string" + default: "" + description: "This is your S3 endpoint url.(if you are working with AWS\ + \ S3, just leave empty)." + examples: + - "http://localhost:9000" + s3_bucket_name: + title: "S3 Bucket Name" + type: "string" + description: "The name of the S3 bucket." + examples: + - "airbyte_sync" + s3_bucket_path: + description: "Directory under the S3 bucket where data will be written." + type: "string" + examples: + - "data_sync/test" + s3_bucket_region: + title: "S3 Bucket Region" + type: "string" + default: "" + description: "The region of the S3 bucket." + enum: + - "" + - "us-east-1" + - "us-east-2" + - "us-west-1" + - "us-west-2" + - "af-south-1" + - "ap-east-1" + - "ap-south-1" + - "ap-northeast-1" + - "ap-northeast-2" + - "ap-northeast-3" + - "ap-southeast-1" + - "ap-southeast-2" + - "ca-central-1" + - "cn-north-1" + - "cn-northwest-1" + - "eu-central-1" + - "eu-north-1" + - "eu-south-1" + - "eu-west-1" + - "eu-west-2" + - "eu-west-3" + - "sa-east-1" + - "me-south-1" + - "us-gov-east-1" + - "us-gov-west-1" + access_key_id: + type: "string" + description: "The access key id to access the S3 bucket. Airbyte requires\ + \ Read and Write permissions to the given bucket." + title: "S3 Key Id" + airbyte_secret: true + examples: + - "A012345678910EXAMPLE" + secret_access_key: + type: "string" + description: "The corresponding secret to the access key id." + title: "S3 Access Key" + airbyte_secret: true + examples: + - "a012345678910ABCDEFGH/AbCdEfGhEXAMPLEKEY" + format: + title: "Output Format" + type: "object" + description: "Output data format" + oneOf: + - title: "Avro: Apache Avro" + required: + - "format_type" + - "compression_codec" + properties: + format_type: + type: "string" + enum: + - "Avro" + default: "Avro" + compression_codec: + title: "Compression Codec" + description: "The compression algorithm used to compress data. Default\ + \ to no compression." + type: "object" + oneOf: + - title: "no compression" + required: + - "codec" + properties: + codec: + type: "string" + enum: + - "no compression" + default: "no compression" + - title: "Deflate" + required: + - "codec" + - "compression_level" + properties: + codec: + type: "string" + enum: + - "Deflate" + default: "Deflate" + compression_level: + title: "Deflate level" + description: "0: no compression & fastest, 9: best compression\ + \ & slowest." + type: "integer" + default: 0 + minimum: 0 + maximum: 9 + - title: "bzip2" + required: + - "codec" + properties: + codec: + type: "string" + enum: + - "bzip2" + default: "bzip2" + - title: "xz" + required: + - "codec" + - "compression_level" + properties: + codec: + type: "string" + enum: + - "xz" + default: "xz" + compression_level: + title: "Compression level" + description: "See here for details." + type: "integer" + default: 6 + minimum: 0 + maximum: 9 + - title: "zstandard" + required: + - "codec" + - "compression_level" + properties: + codec: + type: "string" + enum: + - "zstandard" + default: "zstandard" + compression_level: + title: "Compression level" + description: "Negative levels are 'fast' modes akin to lz4 or\ + \ snappy, levels above 9 are generally for archival purposes,\ + \ and levels above 18 use a lot of memory." + type: "integer" + default: 3 + minimum: -5 + maximum: 22 + include_checksum: + title: "Include checksum" + description: "If true, include a checksum with each data block." + type: "boolean" + default: false + - title: "snappy" + required: + - "codec" + properties: + codec: + type: "string" + enum: + - "snappy" + default: "snappy" + part_size_mb: + title: "Block Size (MB) for Amazon S3 multipart upload" + description: "This is the size of a \"Part\" being buffered in memory.\ + \ It limits the memory usage when writing. Larger values will allow\ + \ to upload a bigger files and improve the speed, but consumes9\ + \ more memory. Allowed values: min=5MB, max=525MB Default: 5MB." + type: "integer" + default: 5 + examples: + - 5 + - title: "CSV: Comma-Separated Values" + required: + - "format_type" + - "flattening" + properties: + format_type: + type: "string" + enum: + - "CSV" + default: "CSV" + flattening: + type: "string" + title: "Normalization (Flattening)" + description: "Whether the input json data should be normalized (flattened)\ + \ in the output CSV. Please refer to docs for details." + default: "No flattening" + enum: + - "No flattening" + - "Root level flattening" + part_size_mb: + title: "Block Size (MB) for Amazon S3 multipart upload" + description: "This is the size of a \"Part\" being buffered in memory.\ + \ It limits the memory usage when writing. Larger values will allow\ + \ to upload a bigger files and improve the speed, but consumes9\ + \ more memory. Allowed values: min=5MB, max=525MB Default: 5MB." + type: "integer" + default: 5 + examples: + - 5 + - title: "JSON Lines: newline-delimited JSON" + required: + - "format_type" + properties: + format_type: + type: "string" + enum: + - "JSONL" + default: "JSONL" + part_size_mb: + title: "Block Size (MB) for Amazon S3 multipart upload" + description: "This is the size of a \"Part\" being buffered in memory.\ + \ It limits the memory usage when writing. Larger values will allow\ + \ to upload a bigger files and improve the speed, but consumes9\ + \ more memory. Allowed values: min=5MB, max=525MB Default: 5MB." + type: "integer" + default: 5 + examples: + - 5 + - title: "Parquet: Columnar Storage" + required: + - "format_type" + properties: + format_type: + type: "string" + enum: + - "Parquet" + default: "Parquet" + compression_codec: + title: "Compression Codec" + description: "The compression algorithm used to compress data pages." + type: "string" + enum: + - "UNCOMPRESSED" + - "SNAPPY" + - "GZIP" + - "LZO" + - "BROTLI" + - "LZ4" + - "ZSTD" + default: "UNCOMPRESSED" + block_size_mb: + title: "Block Size (Row Group Size) (MB)" + description: "This is the size of a row group being buffered in memory.\ + \ It limits the memory usage when writing. Larger values will improve\ + \ the IO when reading, but consume more memory when writing. Default:\ + \ 128 MB." + type: "integer" + default: 128 + examples: + - 128 + max_padding_size_mb: + title: "Max Padding Size (MB)" + description: "Maximum size allowed as padding to align row groups.\ + \ This is also the minimum size of a row group. Default: 8 MB." + type: "integer" + default: 8 + examples: + - 8 + page_size_kb: + title: "Page Size (KB)" + description: "The page size is for compression. A block is composed\ + \ of pages. A page is the smallest unit that must be read fully\ + \ to access a single record. If this value is too small, the compression\ + \ will deteriorate. Default: 1024 KB." + type: "integer" + default: 1024 + examples: + - 1024 + dictionary_page_size_kb: + title: "Dictionary Page Size (KB)" + description: "There is one dictionary page per column per row group\ + \ when dictionary encoding is used. The dictionary page size works\ + \ like the page size but for dictionary. Default: 1024 KB." + type: "integer" + default: 1024 + examples: + - 1024 + dictionary_encoding: + title: "Dictionary Encoding" + description: "Default: true." + type: "boolean" + default: true + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "overwrite" + - "append" +- dockerImage: "airbyte/destination-snowflake:0.3.16" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/snowflake" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Snowflake Destination Spec" + type: "object" + required: + - "host" + - "role" + - "warehouse" + - "database" + - "schema" + - "username" + - "password" + additionalProperties: true + properties: + host: + description: "Host domain of the snowflake instance (must include the account,\ + \ region, cloud environment, and end with snowflakecomputing.com)." + examples: + - "accountname.us-east-2.aws.snowflakecomputing.com" + type: "string" + title: "Host" + order: 0 + role: + description: "The role you created for Airbyte to access Snowflake." + examples: + - "AIRBYTE_ROLE" + type: "string" + title: "Role" + order: 1 + warehouse: + description: "The warehouse you created for Airbyte to sync data into." + examples: + - "AIRBYTE_WAREHOUSE" + type: "string" + title: "Warehouse" + order: 2 + database: + description: "The database you created for Airbyte to sync data into." + examples: + - "AIRBYTE_DATABASE" + type: "string" + title: "Database" + order: 3 + schema: + description: "The default Snowflake schema tables are written to if the\ + \ source does not specify a namespace." + examples: + - "AIRBYTE_SCHEMA" + type: "string" + title: "Default Schema" + order: 4 + username: + description: "The username you created to allow Airbyte to access the database." + examples: + - "AIRBYTE_USER" + type: "string" + title: "Username" + order: 5 + password: + description: "Password associated with the username." + type: "string" + airbyte_secret: true + title: "Password" + order: 6 + loading_method: + type: "object" + title: "Loading Method" + description: "Loading method used to send data to Snowflake." + order: 7 + oneOf: + - title: "Standard Inserts" + additionalProperties: false + description: "Uses
INSERT
statements to send batches of records\ + \ to Snowflake. Easiest (no setup) but not recommended for large production\ + \ workloads due to slow speed." + required: + - "method" + properties: + method: + type: "string" + enum: + - "Standard" + default: "Standard" + - title: "AWS S3 Staging" + additionalProperties: false + description: "Writes large batches of records to a file, uploads the file\ + \ to S3, then uses
COPY INTO table
to upload the file. Recommended\ + \ for large production workloads for better speed and scalability." + required: + - "method" + - "s3_bucket_name" + - "access_key_id" + - "secret_access_key" + properties: + method: + type: "string" + enum: + - "S3 Staging" + default: "S3 Staging" + order: 0 + s3_bucket_name: + title: "S3 Bucket Name" + type: "string" + description: "The name of the staging S3 bucket. Airbyte will write\ + \ files to this bucket and read them via
COPY
statements\ + \ on Snowflake." + examples: + - "airbyte.staging" + order: 1 + s3_bucket_region: + title: "S3 Bucket Region" + type: "string" + default: "" + description: "The region of the S3 staging bucket to use if utilising\ + \ a copy strategy." + enum: + - "" + - "us-east-1" + - "us-east-2" + - "us-west-1" + - "us-west-2" + - "af-south-1" + - "ap-east-1" + - "ap-south-1" + - "ap-northeast-1" + - "ap-northeast-2" + - "ap-northeast-3" + - "ap-southeast-1" + - "ap-southeast-2" + - "ca-central-1" + - "cn-north-1" + - "cn-northwest-1" + - "eu-central-1" + - "eu-west-1" + - "eu-west-2" + - "eu-west-3" + - "eu-south-1" + - "eu-north-1" + - "sa-east-1" + - "me-south-1" + order: 2 + access_key_id: + type: "string" + description: "The Access Key Id granting allow one to access the above\ + \ S3 staging bucket. Airbyte requires Read and Write permissions\ + \ to the given bucket." + title: "S3 Key Id" + airbyte_secret: true + order: 3 + secret_access_key: + type: "string" + description: "The corresponding secret to the above access key id." + title: "S3 Access Key" + airbyte_secret: true + order: 4 + - title: "GCS Staging" + additionalProperties: false + description: "Writes large batches of records to a file, uploads the file\ + \ to GCS, then uses
COPY INTO table
to upload the file. Recommended\ + \ for large production workloads for better speed and scalability." + required: + - "method" + - "project_id" + - "bucket_name" + - "credentials_json" + properties: + method: + type: "string" + enum: + - "GCS Staging" + default: "GCS Staging" + order: 0 + project_id: + title: "GCP Project ID" + type: "string" + description: "The name of the GCP project ID for your credentials." + examples: + - "my-project" + order: 1 + bucket_name: + title: "GCS Bucket Name" + type: "string" + description: "The name of the staging GCS bucket. Airbyte will write\ + \ files to this bucket and read them via
COPY
statements\ + \ on Snowflake." + examples: + - "airbyte-staging" + order: 2 + credentials_json: + title: "Google Application Credentials" + type: "string" + description: "The contents of the JSON key file that has read/write\ + \ permissions to the staging GCS bucket. You will separately need\ + \ to grant bucket access to your Snowflake GCP service account.\ + \ See the GCP docs for more information on how to generate a JSON key\ + \ for your service account." + airbyte_secret: true + multiline: true + order: 3 + supportsIncremental: true + supportsNormalization: true + supportsDBT: true + supported_destination_sync_modes: + - "overwrite" + - "append" + - "append_dedup" diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml new file mode 100644 index 0000000000000..ced64ea39fcab --- /dev/null +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -0,0 +1,5924 @@ +# This file is generated by io.airbyte.config.specs.SeedConnectorSpecGenerator. +# Do NOT edit this file directly. See generator class for more details. +--- +- dockerImage: "airbyte/source-aws-cloudtrail:0.1.2" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/aws-cloudtrail" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Aws CloudTrail Spec" + type: "object" + required: + - "aws_key_id" + - "aws_secret_key" + - "aws_region_name" + - "start_date" + additionalProperties: true + properties: + aws_key_id: + type: "string" + description: "Specifies an AWS access key associated with an IAM user or\ + \ role." + airbyte_secret: true + aws_secret_key: + type: "string" + description: "Specifies the secret key associated with the access key. This\ + \ is essentially the 'password' for the access key." + airbyte_secret: true + aws_region_name: + type: "string" + description: "The default AWS Region to use, for example, us-west-1 or us-west-2.\ + \ When specifying a Region inline during client initialization, this property\ + \ is named region_name." + start_date: + type: "string" + description: "The date you would like to replicate data. Data in ClouTraid\ + \ is available for last 90 days only. Format: YYYY-MM-DD." + examples: + - "2021-01-01" + default: "1970-01-01" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-amazon-ads:0.1.2" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/amazon-ads" + connectionSpecification: + title: "Amazon Ads Spec" + type: "object" + properties: + client_id: + title: "Client Id" + description: "Oauth client id How to create your Login with Amazon" + name: "Client ID" + type: "string" + client_secret: + title: "Client Secret" + description: "Oauth client secret How to create your Login with Amazon" + name: "Client secret" + airbyte_secret: true + type: "string" + scope: + title: "Scope" + description: "By default its advertising::campaign_management, but customers\ + \ may need to set scope to cpc_advertising:campaign_management." + default: "advertising::campaign_management" + name: "Client scope" + examples: + - "cpc_advertising:campaign_management" + type: "string" + refresh_token: + title: "Refresh Token" + description: "Oauth 2.0 refresh_token, read details here" + name: "Oauth refresh token" + airbyte_secret: true + type: "string" + start_date: + title: "Start Date" + description: "Start date for collectiong reports, should not be more than\ + \ 60 days in past. In YYYY-MM-DD format" + name: "Start date" + examples: + - "2022-10-10" + - "2022-10-22" + type: "string" + region: + description: "Region to pull data from (EU/NA/FE/SANDBOX)" + default: "NA" + name: "Region" + title: "AmazonAdsRegion" + enum: + - "NA" + - "EU" + - "FE" + - "SANDBOX" + type: "string" + profiles: + title: "Profiles" + description: "profile Ids you want to fetch data for" + name: "Profile Ids" + type: "array" + items: + type: "integer" + required: + - "client_id" + - "client_secret" + - "refresh_token" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-amazon-seller-partner:0.2.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/amazon-seller-partner" + changelogUrl: "https://docs.airbyte.io/integrations/sources/amazon-seller-partner" + connectionSpecification: + title: "Amazon Seller Partner Spec" + type: "object" + properties: + replication_start_date: + title: "Replication Start Date" + description: "UTC date and time in the format 2017-01-25T00:00:00Z. Any\ + \ data before this date will not be replicated." + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + examples: + - "2017-01-25T00:00:00Z" + type: "string" + refresh_token: + title: "Refresh Token" + description: "The refresh token used obtained via authorization (can be\ + \ passed to the client instead)" + airbyte_secret: true + type: "string" + lwa_app_id: + title: "Lwa App Id" + description: "Your login with amazon app id" + airbyte_secret: true + type: "string" + lwa_client_secret: + title: "Lwa Client Secret" + description: "Your login with amazon client secret" + airbyte_secret: true + type: "string" + aws_access_key: + title: "Aws Access Key" + description: "AWS user access key" + airbyte_secret: true + type: "string" + aws_secret_key: + title: "Aws Secret Key" + description: "AWS user secret key" + airbyte_secret: true + type: "string" + role_arn: + title: "Role Arn" + description: "The role's arn (needs permission to 'Assume Role' STS)" + airbyte_secret: true + type: "string" + aws_environment: + title: "AWSEnvironment" + description: "An enumeration." + enum: + - "PRODUCTION" + - "SANDBOX" + type: "string" + region: + title: "AWSRegion" + description: "An enumeration." + enum: + - "AE" + - "DE" + - "PL" + - "EG" + - "ES" + - "FR" + - "IN" + - "IT" + - "NL" + - "SA" + - "SE" + - "TR" + - "UK" + - "AU" + - "JP" + - "SG" + - "US" + - "BR" + - "CA" + - "MX" + - "GB" + type: "string" + required: + - "replication_start_date" + - "refresh_token" + - "lwa_app_id" + - "lwa_client_secret" + - "aws_access_key" + - "aws_secret_key" + - "role_arn" + - "aws_environment" + - "region" + definitions: + AWSEnvironment: + title: "AWSEnvironment" + description: "An enumeration." + enum: + - "PRODUCTION" + - "SANDBOX" + type: "string" + AWSRegion: + title: "AWSRegion" + description: "An enumeration." + enum: + - "AE" + - "DE" + - "PL" + - "EG" + - "ES" + - "FR" + - "IN" + - "IT" + - "NL" + - "SA" + - "SE" + - "TR" + - "UK" + - "AU" + - "JP" + - "SG" + - "US" + - "BR" + - "CA" + - "MX" + - "GB" + type: "string" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-amplitude:0.1.3" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/amplitude" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Amplitude Spec" + type: "object" + required: + - "api_key" + - "secret_key" + - "start_date" + additionalProperties: false + properties: + api_key: + type: "string" + description: "This is the project’s API key, used for calling Amplitude’\ + s APIs" + airbyte_secret: true + secret_key: + type: "string" + description: "This is the project's secret key, which is also used for calling\ + \ Amplitude’s APIs" + airbyte_secret: true + start_date: + type: "string" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + description: "UTC date and time in the format 2021-01-25T00:00:00Z. Any\ + \ data before this date will not be replicated." + examples: + - "2021-01-25T00:00:00Z" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-apify-dataset:0.1.1" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/apify-dataset" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Apify Dataset Spec" + type: "object" + required: + - "datasetId" + additionalProperties: false + properties: + datasetId: + type: "string" + description: "ID of the dataset you would like to load to Airbyte." + clean: + type: "boolean" + description: "If set to true, only clean items will be downloaded from the\ + \ dataset. See description of what clean means in Apify API docs. If not sure, set clean to false." + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-appstore-singer:0.2.4" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/appstore" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Source Appstore Singer Spec" + type: "object" + required: + - "key_id" + - "private_key" + - "issuer_id" + - "vendor" + - "start_date" + additionalProperties: false + properties: + key_id: + type: "string" + description: "Key_id is the API key you use to connect to appstore's API." + private_key: + type: "string" + description: "Private_key is the contents of the key file you use to connect to appstore's API." + airbyte_secret: true + multiline: true + issuer_id: + type: "string" + description: "Issuer_id is used to generate the credentials to connect to appstore's\ + \ API." + vendor: + type: "string" + description: "This is the Apple ID of your account." + start_date: + type: "string" + description: "Date from which to start pulling data." + examples: + - "2020-11-16T00:00:00Z" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-asana:0.1.3" + spec: + documentationUrl: "https://docsurl.com" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Asana Spec" + type: "object" + additionalProperties: true + properties: + credentials: + title: "Authentication mechanism" + description: "Choose how to authenticate to Github" + type: "object" + oneOf: + - type: "object" + title: "Authenticate with Personal Access Token" + required: + - "personal_access_token" + properties: + option_title: + type: "string" + title: "Credentials title" + description: "PAT Credentials" + const: "PAT Credentials" + personal_access_token: + type: "string" + title: "Personal Access Token" + description: "Asana Personal Access Token (generate yours here)." + airbyte_secret: true + - type: "object" + title: "Authenticate via Asana (Oauth)" + required: + - "client_id" + - "client_secret" + - "refresh_token" + properties: + option_title: + type: "string" + title: "Credentials title" + description: "OAuth Credentials" + const: "OAuth Credentials" + client_id: + type: "string" + title: "" + description: "" + airbyte_secret: true + client_secret: + type: "string" + title: "" + description: "" + airbyte_secret: true + refresh_token: + type: "string" + title: "" + description: "" + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] + authSpecification: + auth_type: "oauth2.0" + oauth2Specification: + rootObject: + - "credentials" + - "1" + oauthFlowInitParameters: + - - "client_id" + - - "client_secret" + oauthFlowOutputParameters: + - - "refresh_token" +- dockerImage: "airbyte/source-bamboo-hr:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/bamboo-hr" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Bamboo HR Spec" + type: "object" + required: + - "subdomain" + - "api_key" + additionalProperties: false + properties: + subdomain: + type: "string" + description: "Sub Domain of bamboo hr" + api_key: + type: "string" + description: "Api key of bamboo hr" + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-bigcommerce:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/bigcommerce" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "BigCommerce Source CDK Specifications" + type: "object" + required: + - "start_date" + - "store_hash" + - "access_token" + additionalProperties: false + properties: + start_date: + type: "string" + description: "The date you would like to replicate data. Format: YYYY-MM-DD." + examples: + - "2021-01-01" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" + store_hash: + type: "string" + description: "The hash code of the store. For https://api.bigcommerce.com/stores/HASH_CODE/v3/,\ + \ The store's hash code is 'HASH_CODE'." + access_token: + type: "string" + description: "The API Access Token." + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-bigquery:0.1.4" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/source/bigquery" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "BigQuery Source Spec" + type: "object" + required: + - "project_id" + - "credentials_json" + additionalProperties: false + properties: + project_id: + type: "string" + description: "The GCP project ID for the project containing the target BigQuery\ + \ dataset." + title: "Project ID" + dataset_id: + type: "string" + description: "The BigQuery Dataset ID to look for tables to replicate from." + title: "Default Dataset ID" + credentials_json: + type: "string" + description: "The contents of the JSON service account key. Check out the\ + \ docs\ + \ if you need help generating this key." + title: "Credentials JSON" + airbyte_secret: true + supportsIncremental: true + supportsNormalization: true + supportsDBT: true + supported_destination_sync_modes: [] + supported_sync_modes: + - "overwrite" + - "append" + - "append_dedup" +- dockerImage: "airbyte/source-bing-ads:0.1.1" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/bing-ads" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Bing Ads Spec" + type: "object" + required: + - "accounts" + - "client_id" + - "client_secret" + - "customer_id" + - "developer_token" + - "refresh_token" + - "user_id" + - "reports_start_date" + - "hourly_reports" + - "daily_reports" + - "weekly_reports" + - "monthly_reports" + additionalProperties: false + properties: + accounts: + title: "Accounts" + type: "object" + description: "Account selection strategy." + oneOf: + - title: "All accounts assigned to your user" + additionalProperties: false + description: "Fetch data for all available accounts." + required: + - "selection_strategy" + properties: + selection_strategy: + type: "string" + enum: + - "all" + const: "all" + - title: "Subset of your accounts" + additionalProperties: false + description: "Fetch data for subset of account ids." + required: + - "ids" + - "selection_strategy" + properties: + selection_strategy: + type: "string" + enum: + - "subset" + const: "subset" + ids: + type: "array" + description: "List of accounts from which data will be fetched." + items: + type: "string" + minItems: 1 + uniqueItems: true + client_id: + type: "string" + description: "ID of your Microsoft Advertising client application." + airbyte_secret: true + client_secret: + type: "string" + description: "Secret of your Microsoft Advertising client application." + airbyte_secret: true + customer_id: + type: "string" + description: "User's customer ID." + developer_token: + type: "string" + description: "Developer token associated with user." + airbyte_secret: true + refresh_token: + type: "string" + description: "The long-lived Refresh token received via grant_type=refresh_token\ + \ request." + airbyte_secret: true + user_id: + type: "string" + description: "Unique user identifier." + reports_start_date: + type: "string" + format: "date" + default: "2020-01-01" + description: "From which date perform initial sync for report related streams.\ + \ In YYYY-MM-DD format" + hourly_reports: + title: "Hourly reports" + type: "boolean" + description: "The report data will be aggregated by each hour of the day." + default: false + daily_reports: + title: "Daily reports" + type: "boolean" + description: "The report data will be aggregated by each day." + default: false + weekly_reports: + title: "Weekly reports" + type: "boolean" + description: "The report data will be aggregated by each week running from\ + \ Sunday through Saturday." + default: false + monthly_reports: + title: "Monthly reports" + type: "boolean" + description: "The report data will be aggregated by each month." + default: false + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-braintree:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/braintree" + connectionSpecification: + title: "Braintree Spec" + type: "object" + properties: + merchant_id: + title: "Merchant Id" + description: "Merchant ID is the unique identifier for entire gateway account." + name: "Merchant ID" + type: "string" + public_key: + title: "Public Key" + description: "This is your user-specific public identifier for Braintree." + name: "Public key" + type: "string" + private_key: + title: "Private Key" + description: "This is your user-specific private identifier." + name: "Private Key" + airbyte_secret: true + type: "string" + start_date: + title: "Start Date" + description: "The date from which you'd like to replicate data for Braintree\ + \ API for UTC timezone, All data generated after this date will be replicated." + name: "Start date" + examples: + - "2020" + - "2020-12-30" + - "2020-11-22 20:20:05" + type: "string" + format: "date-time" + environment: + description: "Environment specifies where the data will come from." + name: "Environment" + examples: + - "sandbox" + - "production" + - "qa" + - "development" + allOf: + - $ref: "#/definitions/Environment" + required: + - "merchant_id" + - "public_key" + - "private_key" + - "environment" + definitions: + Environment: + title: "Environment" + description: "An enumeration." + enum: + - "Development" + - "Sandbox" + - "Qa" + - "Production" + type: "string" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-cart:0.1.3" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/cart" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Cart Spec" + type: "object" + required: + - "access_token" + - "start_date" + - "store_name" + additionalProperties: true + properties: + access_token: + type: "string" + airbyte_secret: true + description: "API Key. See the docs for information on how to generate this key." + store_name: + type: "string" + description: "Store name. All API URLs start with https://[mystorename.com]/api/v1/,\ + \ where [mystorename.com] is the domain name of your store." + start_date: + title: "Start Date" + type: "string" + description: "The date from which you'd like to replicate the data" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + examples: + - "2021-01-01T00:00:00Z" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-chargebee:0.1.4" + spec: + documentationUrl: "https://apidocs.chargebee.com/docs/api" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Chargebee Spec" + type: "object" + required: + - "site" + - "site_api_key" + - "start_date" + - "product_catalog" + additionalProperties: false + properties: + site: + type: "string" + title: "Site" + description: "The site prefix for your Chargebee instance." + examples: + - "airbyte-test" + site_api_key: + type: "string" + title: "API Key" + description: "The API key from your Chargebee instance." + examples: + - "test_3yzfanAXF66USdWC9wQcM555DQJkSYoppu" + airbyte_secret: true + start_date: + type: "string" + title: "Start Date" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + description: "UTC date and time in the format 2021-01-25T00:00:00Z. Any\ + \ data before this date will not be replicated." + examples: + - "2021-01-25T00:00:00Z" + product_catalog: + title: "Product Catalog" + type: "string" + description: "Product Catalog version of your Chargebee site. Instructions\ + \ on how to find your version you may find here under `API Version` section." + enum: + - "1.0" + - "2.0" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-clickhouse:0.1.4" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/clickhouse" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "ClickHouse Source Spec" + type: "object" + required: + - "host" + - "port" + - "database" + - "username" + additionalProperties: false + properties: + host: + description: "Host Endpoint of the Clickhouse Cluster" + type: "string" + port: + description: "Port of the database." + type: "integer" + minimum: 0 + maximum: 65536 + default: 8123 + examples: + - "8123" + database: + description: "Name of the database." + type: "string" + examples: + - "default" + username: + description: "Username to use to access the database." + type: "string" + password: + description: "Password associated with the username." + type: "string" + airbyte_secret: true + ssl: + title: "SSL Connection" + description: "Encrypt data using SSL." + type: "boolean" + default: true + tunnel_method: + type: "object" + title: "SSH Tunnel Method" + description: "Whether to initiate an SSH tunnel before connecting to the\ + \ database, and if so, which kind of authentication to use." + oneOf: + - title: "No Tunnel" + required: + - "tunnel_method" + properties: + tunnel_method: + description: "No ssh tunnel needed to connect to database" + type: "string" + const: "NO_TUNNEL" + order: 0 + - title: "SSH Key Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "ssh_key" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and ssh key" + type: "string" + const: "SSH_KEY_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host." + type: "string" + order: 3 + ssh_key: + title: "SSH Private Key" + description: "OS-level user account ssh key credentials in RSA PEM\ + \ format ( created with ssh-keygen -t rsa -m PEM -f myuser_rsa )" + type: "string" + airbyte_secret: true + multiline: true + order: 4 + - title: "Password Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "tunnel_user_password" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and password authentication" + type: "string" + const: "SSH_PASSWORD_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host" + type: "string" + order: 3 + tunnel_user_password: + title: "Password" + description: "OS-level password for logging into the jump server host" + type: "string" + airbyte_secret: true + order: 4 + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-close-com:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/close-com" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Close.com Spec" + type: "object" + required: + - "api_key" + additionalProperties: false + properties: + api_key: + type: "string" + description: "Close.com API key (usually starts with 'api_'; find yours\ + \ here)." + airbyte_secret: true + start_date: + type: "string" + description: "The start date to sync data. Leave blank for full sync. Format:\ + \ YYYY-MM-DD." + examples: + - "2021-01-01" + default: "2021-01-01" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-cockroachdb:0.1.2" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/postgres" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Cockroach Source Spec" + type: "object" + required: + - "host" + - "port" + - "database" + - "username" + additionalProperties: false + properties: + host: + title: "Host" + description: "Hostname of the database." + type: "string" + order: 0 + port: + title: "Port" + description: "Port of the database." + type: "integer" + minimum: 0 + maximum: 65536 + default: 5432 + examples: + - "5432" + order: 1 + database: + title: "DB Name" + description: "Name of the database." + type: "string" + order: 2 + username: + title: "User" + description: "Username to use to access the database." + type: "string" + order: 3 + password: + title: "Password" + description: "Password associated with the username." + type: "string" + airbyte_secret: true + order: 4 + ssl: + title: "Connect using SSL" + description: "Encrypt client/server communications for increased security." + type: "boolean" + default: false + order: 5 + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-delighted:0.1.0" + spec: + documentationUrl: "https://docsurl.com" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Delighted Spec" + type: "object" + required: + - "since" + - "api_key" + additionalProperties: false + properties: + since: + type: "integer" + description: "An Unix timestamp to retrieve records created on or after\ + \ this time." + examples: + - 1625328167 + api_key: + type: "string" + description: "A Delighted API key." + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-dixa:0.1.1" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/dixa" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Dixa Spec" + type: "object" + required: + - "api_token" + - "start_date" + additionalProperties: false + properties: + api_token: + type: "string" + description: "Dixa API token" + airbyte_secret: true + start_date: + type: "string" + description: "The connector pulls records updated from this date onwards." + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" + examples: + - "YYYY-MM-DD" + batch_size: + type: "integer" + description: "Number of days to batch into one request. Max 31." + pattern: "^[0-9]{1,2}$" + examples: + - 1 + - 31 + default: 31 + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-drift:0.2.3" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/drift" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Drift Spec" + type: "object" + required: + - "access_token" + additionalProperties: false + properties: + access_token: + type: "string" + description: "Drift Access Token. See the docs for more information on how to generate this key." + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-exchange-rates:0.2.3" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/exchangeratesapi" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "ratesapi.io Source Spec" + type: "object" + required: + - "start_date" + - "access_key" + additionalProperties: false + properties: + start_date: + type: "string" + description: "Start getting data from that date." + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" + examples: + - "YYYY-MM-DD" + access_key: + type: "string" + description: "Your API Access Key. See here. The key is case sensitive." + airbyte_secret: true + base: + type: "string" + description: "ISO reference currency. See here. Free plan doesn't support Source Currency Switching, default\ + \ base currency is EUR" + examples: + - "EUR" + - "USD" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-facebook-marketing:0.2.21" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/facebook-marketing" + changelogUrl: "https://docs.airbyte.io/integrations/sources/facebook-marketing" + connectionSpecification: + title: "Source Facebook Marketing" + type: "object" + properties: + account_id: + title: "Account Id" + description: "The Facebook Ad account ID to use when pulling data from the\ + \ Facebook Marketing API." + type: "string" + access_token: + title: "Access Token" + description: "The value of the access token generated. See the docs\ + \ for more information" + airbyte_secret: true + type: "string" + start_date: + title: "Start Date" + description: "The date from which you'd like to replicate data for AdCreatives\ + \ and AdInsights APIs, in the format YYYY-MM-DDT00:00:00Z. All data generated\ + \ after this date will be replicated." + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + examples: + - "2017-01-25T00:00:00Z" + type: "string" + format: "date-time" + end_date: + title: "End Date" + description: "The date until which you'd like to replicate data for AdCreatives\ + \ and AdInsights APIs, in the format YYYY-MM-DDT00:00:00Z. All data generated\ + \ between start_date and this date will be replicated. Not setting this\ + \ option will result in always syncing the latest data." + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + examples: + - "2017-01-26T00:00:00Z" + type: "string" + format: "date-time" + include_deleted: + title: "Include Deleted" + description: "Include data from deleted campaigns, ads, and adsets." + default: false + type: "boolean" + insights_lookback_window: + title: "Insights Lookback Window" + description: "The attribution window for the actions" + default: 28 + minimum: 0 + maximum: 28 + type: "integer" + insights_days_per_job: + title: "Insights Days Per Job" + description: "Number of days to sync in one job. The more data you have\ + \ - the smaller you want this parameter to be." + default: 7 + minimum: 1 + maximum: 30 + type: "integer" + custom_insights: + title: "Custom Insights" + description: "A list wich contains insights entries, each entry must have\ + \ a name and can contains fields, breakdowns or action_breakdowns)" + type: "array" + items: + title: "InsightConfig" + type: "object" + properties: + name: + title: "Name" + description: "The name value of insight" + type: "string" + fields: + title: "Fields" + description: "A list of chosen fields for fields parameter" + default: [] + type: "array" + items: + type: "string" + breakdowns: + title: "Breakdowns" + description: "A list of chosen breakdowns for breakdowns" + default: [] + type: "array" + items: + type: "string" + action_breakdowns: + title: "Action Breakdowns" + description: "A list of chosen action_breakdowns for action_breakdowns" + default: [] + type: "array" + items: + type: "string" + required: + - "name" + required: + - "account_id" + - "access_token" + - "start_date" + definitions: + InsightConfig: + title: "InsightConfig" + type: "object" + properties: + name: + title: "Name" + description: "The name value of insight" + type: "string" + fields: + title: "Fields" + description: "A list of chosen fields for fields parameter" + default: [] + type: "array" + items: + type: "string" + breakdowns: + title: "Breakdowns" + description: "A list of chosen breakdowns for breakdowns" + default: [] + type: "array" + items: + type: "string" + action_breakdowns: + title: "Action Breakdowns" + description: "A list of chosen action_breakdowns for action_breakdowns" + default: [] + type: "array" + items: + type: "string" + required: + - "name" + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "append" + authSpecification: + auth_type: "oauth2.0" + oauth2Specification: + rootObject: [] + oauthFlowInitParameters: [] + oauthFlowOutputParameters: + - - "access_token" +- dockerImage: "airbyte/source-facebook-pages:0.1.2" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/facebook-pages" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Facebook Pages Spec" + type: "object" + required: + - "access_token" + - "page_id" + additionalProperties: false + properties: + access_token: + type: "string" + description: "Facebook Page Access Token" + airbyte_secret: true + page_id: + type: "string" + description: "Page ID" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] + authSpecification: + auth_type: "oauth2.0" + oauth2Specification: + rootObject: [] + oauthFlowInitParameters: [] + oauthFlowOutputParameters: + - - "access_token" +- dockerImage: "airbyte/source-file:0.2.6" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/file" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "File Source Spec" + type: "object" + additionalProperties: false + required: + - "dataset_name" + - "format" + - "url" + - "provider" + properties: + dataset_name: + type: "string" + description: "Name of the final table where to replicate this file (should\ + \ include only letters, numbers dash and underscores)" + format: + type: "string" + enum: + - "csv" + - "json" + - "jsonl" + - "excel" + - "feather" + - "parquet" + default: "csv" + description: "File Format of the file to be replicated (Warning: some format\ + \ may be experimental, please refer to docs)." + reader_options: + type: "string" + description: "This should be a valid JSON string used by each reader/parser\ + \ to provide additional options and tune its behavior" + examples: + - "{}" + - "{'sep': ' '}" + url: + type: "string" + description: "URL path to access the file to be replicated" + provider: + type: "object" + description: "Storage Provider or Location of the file(s) to be replicated." + default: "Public Web" + oneOf: + - title: "HTTPS: Public Web" + required: + - "storage" + properties: + storage: + type: "string" + enum: + - "HTTPS" + default: "HTTPS" + - title: "GCS: Google Cloud Storage" + required: + - "storage" + properties: + storage: + type: "string" + enum: + - "GCS" + default: "GCS" + service_account_json: + type: "string" + description: "In order to access private Buckets stored on Google\ + \ Cloud, this connector would need a service account json credentials\ + \ with the proper permissions as described here. Please generate the credentials.json\ + \ file and copy/paste its content to this field (expecting JSON\ + \ formats). If accessing publicly available data, this field is\ + \ not necessary." + - title: "S3: Amazon Web Services" + required: + - "storage" + properties: + storage: + type: "string" + enum: + - "S3" + default: "S3" + aws_access_key_id: + type: "string" + description: "In order to access private Buckets stored on AWS S3,\ + \ this connector would need credentials with the proper permissions.\ + \ If accessing publicly available data, this field is not necessary." + aws_secret_access_key: + type: "string" + description: "In order to access private Buckets stored on AWS S3,\ + \ this connector would need credentials with the proper permissions.\ + \ If accessing publicly available data, this field is not necessary." + airbyte_secret: true + - title: "AzBlob: Azure Blob Storage" + required: + - "storage" + - "storage_account" + properties: + storage: + type: "string" + enum: + - "AzBlob" + default: "AzBlob" + storage_account: + type: "string" + description: "The globally unique name of the storage account that\ + \ the desired blob sits within. See here for more details." + sas_token: + type: "string" + description: "To access Azure Blob Storage, this connector would need\ + \ credentials with the proper permissions. One option is a SAS (Shared\ + \ Access Signature) token. If accessing publicly available data,\ + \ this field is not necessary." + airbyte_secret: true + shared_key: + type: "string" + description: "To access Azure Blob Storage, this connector would need\ + \ credentials with the proper permissions. One option is a storage\ + \ account shared key (aka account key or access key). If accessing\ + \ publicly available data, this field is not necessary." + airbyte_secret: true + - title: "SSH: Secure Shell" + required: + - "storage" + - "user" + - "host" + properties: + storage: + type: "string" + enum: + - "SSH" + default: "SSH" + user: + type: "string" + password: + type: "string" + airbyte_secret: true + host: + type: "string" + port: + type: "string" + default: "22" + - title: "SCP: Secure copy protocol" + required: + - "storage" + - "user" + - "host" + properties: + storage: + type: "string" + enum: + - "SCP" + default: "SCP" + user: + type: "string" + password: + type: "string" + airbyte_secret: true + host: + type: "string" + port: + type: "string" + default: "22" + - title: "SFTP: Secure File Transfer Protocol" + required: + - "storage" + - "user" + - "host" + properties: + storage: + type: "string" + enum: + - "SFTP" + default: "SFTP" + user: + type: "string" + password: + type: "string" + airbyte_secret: true + host: + type: "string" + port: + type: "string" + default: "22" + - title: "Local Filesystem (limited)" + required: + - "storage" + properties: + storage: + type: "string" + description: "WARNING: Note that local storage URL available for read\ + \ must start with the local mount \"/local/\" at the moment until\ + \ we implement more advanced docker mounting options..." + enum: + - "local" + default: "local" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-freshdesk:0.2.7" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/freshdesk" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Freshdesk Spec" + type: "object" + required: + - "domain" + - "api_key" + additionalProperties: false + properties: + domain: + type: "string" + description: "Freshdesk domain" + examples: + - "myaccount.freshdesk.com" + pattern: + - "^[a-zA-Z0-9._-]*\\.freshdesk\\.com$" + api_key: + type: "string" + description: "Freshdesk API Key. See the docs for more information on how to obtain this key." + airbyte_secret: true + requests_per_minute: + title: "Requests per minute" + type: "integer" + description: "Number of requests per minute that this source allowed to\ + \ use." + start_date: + title: "Start date" + description: "Date from which to start pulling data." + format: "date-time" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + examples: + - "2020-12-01T00:00:00Z" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-freshsales:0.1.0" + spec: + documentationUrl: "https://docsurl.com" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Freshsales Spec" + type: "object" + required: + - "domain_name" + - "api_key" + additionalProperties: false + properties: + domain_name: + type: "string" + description: "Freshsales domain" + examples: + - "mydomain.myfreshworks.com" + api_key: + type: "string" + description: "Your API Access Key. See here. The key is case sensitive." + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-freshservice:0.1.0" + spec: + documentationUrl: "https://hub.docker.com/r/airbyte/source-freshservice" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Freshservice Spec" + type: "object" + required: + - "domain_name" + - "api_key" + - "start_date" + additionalProperties: false + properties: + domain_name: + type: "string" + description: "Freshservice domain" + examples: + - "mydomain.freshservice.com" + api_key: + title: "Api Key" + type: "string" + description: "Your API Access Key. See here. The key is case sensitive." + airbyte_secret: true + start_date: + title: "Replication Start Date" + type: "string" + description: "UTC date and time in the format 2020-10-01T00:00:00Z. Any\ + \ data before this date will not be replicated." + examples: + - "2020-10-01T00:00:00Z" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-github:0.2.3" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/github" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Github Source Spec" + type: "object" + required: + - "start_date" + - "repository" + additionalProperties: true + properties: + credentials: + title: "Authentication mechanism" + description: "Choose how to authenticate to Github" + type: "object" + oneOf: + - type: "object" + title: "Authenticate via Github (Oauth)" + required: + - "access_token" + properties: + option_title: + type: "string" + title: "Credentials title" + description: "OAuth Credentials" + const: "OAuth Credentials" + access_token: + type: "string" + title: "Access Token" + description: "Oauth access token" + airbyte_secret: true + - type: "object" + title: "Authenticate with Personal Access Token" + required: + - "personal_access_token" + properties: + option_title: + type: "string" + title: "Credentials title" + description: "PAT Credentials" + const: "PAT Credentials" + personal_access_token: + type: "string" + title: "Personal Access Tokens" + description: "Log into Github and then generate a personal access token. To load balance your API quota consumption\ + \ across multiple API tokens, input multiple tokens separated with\ + \ \",\"" + airbyte_secret: true + repository: + type: "string" + examples: + - "airbytehq/airbyte" + - "airbytehq/*" + title: "Github repositories" + description: "Space-delimited list of GitHub repositories/organizations,\ + \ e.g. `airbytehq/airbyte` for single repository and `airbytehq/*` for\ + \ get all repositories from organization" + start_date: + type: "string" + title: "Start date" + description: "The date from which you'd like to replicate data for GitHub\ + \ in the format YYYY-MM-DDT00:00:00Z. All data generated after this date\ + \ will be replicated. Note that it will be used only in the following\ + \ incremental streams: comments, commits and issues." + examples: + - "2021-03-01T00:00:00Z" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + branch: + type: "string" + title: "Branch" + examples: + - "airbytehq/airbyte/master" + description: "Space-delimited list of GitHub repository branches to pull\ + \ commits for, e.g. `airbytehq/airbyte/master`. If no branches are specified\ + \ for a repository, the default branch will be pulled." + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] + authSpecification: + auth_type: "oauth2.0" + oauth2Specification: + rootObject: + - "credentials" + - "0" + oauthFlowInitParameters: [] + oauthFlowOutputParameters: + - - "access_token" +- dockerImage: "airbyte/source-gitlab:0.1.2" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/gitlab" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Source Gitlab Singer Spec" + type: "object" + required: + - "api_url" + - "private_token" + - "start_date" + additionalProperties: false + properties: + api_url: + type: "string" + examples: + - "gitlab.com" + description: "Please enter your basic URL from Gitlab instance" + private_token: + type: "string" + description: "Log into your Gitlab account and then generate a personal\ + \ Access Token." + airbyte_secret: true + groups: + type: "string" + examples: + - "airbyte.io" + description: "Space-delimited list of groups. e.g. airbyte.io" + projects: + type: "string" + examples: + - "airbyte.io/documentation" + description: "Space-delimited list of projects. e.g. airbyte.io/documentation\ + \ meltano/tap-gitlab" + start_date: + type: "string" + description: "The date from which you'd like to replicate data for Gitlab\ + \ API, in the format YYYY-MM-DDT00:00:00Z. All data generated after this\ + \ date will be replicated." + examples: + - "2021-03-01T00:00:00Z" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-google-ads:0.1.15" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/google-ads" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Google Ads Spec" + type: "object" + required: + - "credentials" + - "start_date" + - "customer_id" + additionalProperties: true + properties: + credentials: + type: "object" + title: "Google Credentials" + required: + - "developer_token" + - "client_id" + - "client_secret" + - "refresh_token" + properties: + developer_token: + type: "string" + title: "Developer Token" + description: "Developer token granted by Google to use their APIs. More\ + \ instruction on how to find this value in our docs" + airbyte_secret: true + client_id: + type: "string" + title: "Client Id" + description: "Google client id. More instruction on how to find this\ + \ value in our docs" + client_secret: + type: "string" + title: "Client Secret" + description: "Google client secret. More instruction on how to find\ + \ this value in our docs" + airbyte_secret: true + access_token: + type: "string" + title: "Access Token" + description: "Access token generated using developer_token, oauth_client_id,\ + \ and oauth_client_secret. More instruction on how to find this value\ + \ in our docs" + airbyte_secret: true + refresh_token: + type: "string" + title: "Refresh Token" + description: "Refresh token generated using developer_token, oauth_client_id,\ + \ and oauth_client_secret. More instruction on how to find this value\ + \ in our docs" + airbyte_secret: true + customer_id: + title: "Customer Id" + type: "string" + description: "Customer id must be specified as a 10-digit number without\ + \ dashes. More instruction on how to find this value in our docs" + login_customer_id: + type: "string" + title: "Login Customer ID" + description: "If your access to the customer account is through a manager\ + \ account, this field is required and must be set to the customer ID of\ + \ the manager account (10-digit number without dashes). More information\ + \ about this field you can see here" + start_date: + type: "string" + title: "Start Date" + description: "UTC date and time in the format 2017-01-25. Any data before\ + \ this date will not be replicated." + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" + examples: + - "2017-01-25" + conversion_window_days: + title: "Conversion Window" + type: "integer" + description: "Define the historical replication lookback window in days" + minimum: 0 + maximum: 1095 + default: 14 + examples: + - 14 + custom_queries: + type: "array" + title: "Custom GAQL Queries" + items: + type: "object" + properties: + query: + type: "string" + title: "Custom query" + description: "A custom defined GAQL query for building the report.\ + \ Should not contain segments.date expression as it used by incremental\ + \ streams" + examples: + - "SELECT segments.ad_destination_type, campaign.advertising_channel_sub_type\ + \ FROM campaign WHERE campaign.status = 'PAUSED'" + table_name: + type: "string" + title: "Destination table name" + description: "The table name in your destination database for choosen\ + \ query." + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] + authSpecification: + auth_type: "oauth2.0" + oauth2Specification: + rootObject: + - "credentials" + oauthFlowInitParameters: + - - "client_id" + - - "client_secret" + - - "developer_token" + oauthFlowOutputParameters: + - - "access_token" + - - "refresh_token" +- dockerImage: "airbyte/source-google-analytics-v4:0.1.9" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/google-analytics-v4" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Google Analytics V4 Spec" + type: "object" + required: + - "view_id" + - "start_date" + additionalProperties: true + properties: + view_id: + type: "string" + title: "View ID" + description: "The ID for the Google Analytics View you want to fetch data\ + \ from. This can be found from the Google Analytics Account Explorer." + airbyte_secret: true + start_date: + type: "string" + title: "Start Date" + description: "A date in the format YYYY-MM-DD." + examples: + - "2020-06-01" + window_in_days: + type: "integer" + description: "The amount of days for each data-chunk begining from start_date.\ + \ Bigger the value - faster the fetch. (Min=1, as for a Day; Max=364,\ + \ as for a Year)." + examples: + - 30 + - 60 + - 90 + - 120 + - 200 + - 364 + default: 90 + custom_reports: + title: "Custom Reports" + type: "string" + description: "A JSON array describing the custom reports you want to sync\ + \ from GA. Check out the docs to get more information about this field." + credentials: + type: "object" + oneOf: + - title: "Authenticate via Google (Oauth)" + type: "object" + required: + - "client_id" + - "client_secret" + - "refresh_token" + properties: + auth_type: + type: "string" + const: "Client" + enum: + - "Client" + default: "Client" + order: 0 + client_id: + title: "Client ID" + type: "string" + description: "The Client ID of your developer application" + airbyte_secret: true + client_secret: + title: "Client Secret" + type: "string" + description: "The client secret of your developer application" + airbyte_secret: true + refresh_token: + title: "Refresh Token" + type: "string" + description: "A refresh token generated using the above client ID\ + \ and secret" + airbyte_secret: true + access_token: + title: "Access Token" + type: "string" + description: "A access token generated using the above client ID,\ + \ secret and refresh_token" + airbyte_secret: true + - type: "object" + title: "Service Account Key Authentication" + required: + - "credentials_json" + properties: + auth_type: + type: "string" + const: "Service" + enum: + - "Service" + default: "Service" + order: 0 + credentials_json: + type: "string" + description: "The JSON key of the service account to use for authorization" + examples: + - "{ \"type\": \"service_account\", \"project_id\": YOUR_PROJECT_ID,\ + \ \"private_key_id\": YOUR_PRIVATE_KEY, ... }" + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] + authSpecification: + auth_type: "oauth2.0" + oauth2Specification: + rootObject: + - "credentials" + - "0" + oauthFlowInitParameters: + - - "client_id" + - - "client_secret" + oauthFlowOutputParameters: + - - "access_token" + - - "refresh_token" +- dockerImage: "airbyte/source-google-directory:0.1.6" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/google-directory" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Google Directory Spec" + type: "object" + required: + - "credentials_json" + - "email" + additionalProperties: false + properties: + credentials_json: + type: "string" + description: "The contents of the JSON service account key. See the docs for more information on how to generate this key." + airbyte_secret: true + email: + type: "string" + description: "The email of the user, which has permissions to access the\ + \ Google Workspace Admin APIs." + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-google-search-console:0.1.6" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/google-search-console" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Google Search Console Spec" + type: "object" + additionalProperties: false + required: + - "site_urls" + - "start_date" + - "authorization" + properties: + site_urls: + type: "array" + items: + type: "string" + description: "Website URLs property; do not include the domain-level property\ + \ in the list" + examples: + - "https://example1.com" + - "https://example2.com" + start_date: + type: "string" + description: "The date from which you'd like to replicate data in the format\ + \ YYYY-MM-DD." + examples: + - "2021-01-01" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" + end_date: + type: "string" + description: "The date from which you'd like to replicate data in the format\ + \ YYYY-MM-DD. Must be greater or equal start_date field" + examples: + - "2021-12-12" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" + authorization: + type: "object" + title: "Authentication Type" + oneOf: + - title: "Authenticate via Google (Oauth)" + type: "object" + required: + - "auth_type" + - "client_id" + - "client_secret" + - "refresh_token" + properties: + auth_type: + type: "string" + const: "Client" + enum: + - "Client" + default: "Client" + order: 0 + client_id: + title: "Client ID" + type: "string" + description: "The Client ID of your developer application" + airbyte_secret: true + client_secret: + title: "Client Secret" + type: "string" + description: "The client secret of your developer application" + airbyte_secret: true + access_token: + title: "Access Token" + type: "string" + description: "An access token generated using the above client ID\ + \ and secret" + airbyte_secret: true + refresh_token: + title: "Refresh Token" + type: "string" + description: "A refresh token generated using the above client ID\ + \ and secret" + airbyte_secret: true + - type: "object" + title: "Service Account Key Authentication" + required: + - "auth_type" + - "service_account_info" + - "email" + properties: + auth_type: + type: "string" + const: "Service" + enum: + - "Service" + default: "Service" + order: 0 + service_account_info: + title: "Service Account JSON Key" + type: "string" + description: "The JSON key of the service account to use for authorization" + examples: + - "{ \"type\": \"service_account\", \"project_id\": YOUR_PROJECT_ID,\ + \ \"private_key_id\": YOUR_PRIVATE_KEY, ... }" + email: + title: "Admin Email" + type: "string" + description: "The email of the user which has permissions to access\ + \ the Google Workspace Admin APIs." + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] + authSpecification: + auth_type: "oauth2.0" + oauth2Specification: + rootObject: + - "authorization" + - "0" + oauthFlowInitParameters: + - - "client_id" + - - "client_secret" + oauthFlowOutputParameters: + - - "access_token" + - - "refresh_token" +- dockerImage: "airbyte/source-google-sheets:0.2.6" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/google-sheets" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Stripe Source Spec" + type: "object" + required: + - "spreadsheet_id" + additionalProperties: true + properties: + spreadsheet_id: + type: "string" + description: "The ID of the spreadsheet to be replicated." + credentials: + type: "object" + oneOf: + - title: "Authenticate via Google (Oauth)" + type: "object" + required: + - "auth_type" + - "client_id" + - "client_secret" + - "refresh_token" + properties: + auth_type: + type: "string" + const: "Client" + client_id: + title: "Client ID" + type: "string" + description: "The Client ID of your developer application" + airbyte_secret: true + client_secret: + title: "Client Secret" + type: "string" + description: "The client secret of your developer application" + airbyte_secret: true + refresh_token: + title: "Refresh Token" + type: "string" + description: "A refresh token generated using the above client ID\ + \ and secret" + airbyte_secret: true + - title: "Service Account Key Authentication" + type: "object" + required: + - "auth_type" + - "service_account_info" + properties: + auth_type: + type: "string" + const: "Service" + service_account_info: + type: "string" + description: "The JSON key of the service account to use for authorization" + examples: + - "{ \"type\": \"service_account\", \"project_id\": YOUR_PROJECT_ID,\ + \ \"private_key_id\": YOUR_PRIVATE_KEY, ... }" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] + authSpecification: + auth_type: "oauth2.0" + oauth2Specification: + rootObject: + - "credentials" + - 0 + oauthFlowInitParameters: + - - "client_id" + - - "client_secret" + oauthFlowOutputParameters: + - - "refresh_token" +- dockerImage: "airbyte/source-google-workspace-admin-reports:0.1.5" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/google-workspace-admin-reports" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Google Directory Spec" + type: "object" + required: + - "credentials_json" + - "email" + additionalProperties: false + properties: + credentials_json: + type: "string" + description: "The contents of the JSON service account key. See the docs for more information on how to generate this key." + airbyte_secret: true + email: + type: "string" + description: "The email of the user, which has permissions to access the\ + \ Google Workspace Admin APIs." + lookback: + type: "integer" + minimum: 0 + maximum: 180 + description: "Sets the range of time shown in the report. Reports API allows\ + \ from up to 180 days ago. " + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-greenhouse:0.2.5" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/greenhouse" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Greenhouse Spec" + type: "object" + required: + - "api_key" + additionalProperties: false + properties: + api_key: + type: "string" + description: "Greenhouse API Key. See the docs for more information on how to generate this key." + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-harvest:0.1.5" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/harvest" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Harvest Spec" + type: "object" + required: + - "api_token" + - "account_id" + - "replication_start_date" + additionalProperties: false + properties: + api_token: + title: "API Token" + description: "Harvest API Token." + airbyte_secret: true + type: "string" + account_id: + title: "Account ID" + description: "Harvest account ID. Required for all Harvest requests in pair\ + \ with API Key" + airbyte_secret: true + type: "string" + replication_start_date: + title: "Replication Start Date" + description: "UTC date and time in the format 2017-01-25T00:00:00Z. Any\ + \ data before this date will not be replicated." + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + examples: + - "2017-01-25T00:00:00Z" + type: "string" + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "append" +- dockerImage: "airbyte/source-hubspot:0.1.22" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/hubspot" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Hubspot Source Spec" + type: "object" + required: + - "start_date" + - "credentials" + additionalProperties: false + properties: + start_date: + type: "string" + title: "Replication start date" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + description: "UTC date and time in the format 2017-01-25T00:00:00Z. Any\ + \ data before this date will not be replicated." + examples: + - "2017-01-25T00:00:00Z" + credentials: + title: "Authentication mechanism" + description: "Choose either to provide the API key or the OAuth2.0 credentials" + type: "object" + oneOf: + - type: "object" + title: "Authenticate via Hubspot (Oauth)" + required: + - "redirect_uri" + - "client_id" + - "client_secret" + - "refresh_token" + - "access_token" + - "credentials_title" + properties: + credentials_title: + type: "string" + title: "Credentials title" + description: "Name of the credentials set" + const: "OAuth Credentials" + enum: + - "OAuth Credentials" + default: "OAuth Credentials" + order: 0 + client_id: + title: "Client ID" + description: "Hubspot client_id. See our docs if you need help finding this id." + type: "string" + examples: + - "123456789000" + client_secret: + title: "Client Secret" + description: "Hubspot client_secret. See our docs if you need help finding this secret." + type: "string" + examples: + - "secret" + airbyte_secret: true + refresh_token: + title: "Refresh token" + description: "Hubspot refresh_token. See our docs if you need help generating the token." + type: "string" + examples: + - "refresh_token" + airbyte_secret: true + - type: "object" + title: "API key" + required: + - "api_key" + - "credentials_title" + properties: + credentials_title: + type: "string" + title: "Credentials title" + description: "Name of the credentials set" + const: "API Key Credentials" + enum: + - "API Key Credentials" + default: "API Key Credentials" + order: 0 + api_key: + title: "API key" + description: "Hubspot API Key. See our docs if you need help finding this key." + type: "string" + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] + authSpecification: + auth_type: "oauth2.0" + oauth2Specification: + rootObject: + - "credentials" + - "0" + oauthFlowInitParameters: + - - "client_id" + - - "client_secret" + - - "refresh_token" + oauthFlowOutputParameters: + - - "refresh_token" +- dockerImage: "airbyte/source-db2:0.1.1" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/db2" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "IBM Db2 Source Spec" + type: "object" + required: + - "host" + - "port" + - "db" + - "username" + - "password" + additionalProperties: false + properties: + host: + description: "Host of the Db2." + type: "string" + port: + description: "Port of the database." + type: "integer" + minimum: 0 + maximum: 65536 + default: 8123 + examples: + - "8123" + db: + description: "Name of the database." + type: "string" + examples: + - "default" + username: + description: "Username to use to access the database." + type: "string" + password: + description: "Password associated with the username." + type: "string" + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-instagram:0.1.9" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/instagram" + changelogUrl: "https://docs.airbyte.io/integrations/sources/instagram" + connectionSpecification: + title: "Source Instagram" + type: "object" + properties: + start_date: + title: "Start Date" + description: "The date from which you'd like to replicate data for User\ + \ Insights, in the format YYYY-MM-DDT00:00:00Z. All data generated after\ + \ this date will be replicated." + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + examples: + - "2017-01-25T00:00:00Z" + type: "string" + format: "date-time" + access_token: + title: "Access Token" + description: "The value of the access token generated. See the docs for\ + \ more information" + airbyte_secret: true + type: "string" + required: + - "start_date" + - "access_token" + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "append" + authSpecification: + auth_type: "oauth2.0" + oauth2Specification: + rootObject: [] + oauthFlowInitParameters: [] + oauthFlowOutputParameters: + - - "access_token" +- dockerImage: "airbyte/source-intercom:0.1.6" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/intercom" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Source Intercom Spec" + type: "object" + required: + - "access_token" + - "start_date" + additionalProperties: false + properties: + access_token: + type: "string" + description: "Intercom Access Token. See the docs for more information on how to obtain this key." + airbyte_secret: true + start_date: + type: "string" + description: "The date from which you'd like to replicate data for Intercom\ + \ API, in the format YYYY-MM-DDT00:00:00Z. All data generated after this\ + \ date will be replicated." + examples: + - "2020-11-16T00:00:00Z" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-iterable:0.1.10" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/iterable" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Iterable Spec" + type: "object" + required: + - "start_date" + - "api_key" + additionalProperties: false + properties: + start_date: + type: "string" + description: "The date from which you'd like to replicate data for Iterable,\ + \ in the format YYYY-MM-DDT00:00:00Z. All data generated after this date\ + \ will be replicated." + examples: + - "2021-04-01T00:00:00Z" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + api_key: + type: "string" + description: "Iterable API Key. See the docs for more information on how to obtain this key." + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-jira:0.2.14" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/jira" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Jira Spec" + type: "object" + required: + - "api_token" + - "domain" + - "email" + additionalProperties: true + properties: + api_token: + type: "string" + description: "Jira API Token. See the docs for more information on how to generate this key." + airbyte_secret: true + domain: + type: "string" + examples: + - "domainname.atlassian.net" + pattern: "^[a-zA-Z0-9._-]*\\.atlassian\\.net$" + description: "Domain for your Jira account, e.g. airbyteio.atlassian.net" + email: + type: "string" + description: "The user email for your Jira account" + projects: + type: "array" + title: "Projects" + items: + type: "string" + examples: + - "PROJ1" + - "PROJ2" + description: "Comma-separated list of Jira project keys to replicate data\ + \ for" + start_date: + type: "string" + title: "Start Date" + description: "The date from which you'd like to replicate data for Jira\ + \ in the format YYYY-MM-DDT00:00:00Z. All data generated after this date\ + \ will be replicated. Note that it will be used only in the following\ + \ incremental streams: issues." + examples: + - "2021-03-01T00:00:00Z" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + additional_fields: + type: "array" + title: "Additional Fields" + items: + type: "string" + description: "Comma-separated list of additional fields to include in replicating\ + \ issues" + examples: + - "Field A" + - "Field B" + expand_issue_changelog: + type: "boolean" + title: "Expand Issue Changelog" + description: "Expand the changelog when replicating issues" + default: false + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-kafka:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/kafka" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Kafka Source Spec" + type: "object" + required: + - "bootstrap_servers" + - "subscription" + - "protocol" + additionalProperties: false + properties: + bootstrap_servers: + title: "Bootstrap servers" + description: "A list of host/port pairs to use for establishing the initial\ + \ connection to the Kafka cluster. The client will make use of all servers\ + \ irrespective of which servers are specified here for bootstrapping—this\ + \ list only impacts the initial hosts used to discover the full set of\ + \ servers. This list should be in the form host1:port1,host2:port2,....\ + \ Since these servers are just used for the initial connection to discover\ + \ the full cluster membership (which may change dynamically), this list\ + \ need not contain the full set of servers (you may want more than one,\ + \ though, in case a server is down)." + type: "string" + examples: + - "kafka-broker1:9092,kafka-broker2:9092" + subscription: + title: "Subscribe method" + type: "object" + description: "You can choose to manually assign a list of partitions, or\ + \ subscribe to all topics matching specified pattern to get dynamically\ + \ assigned partitions" + oneOf: + - title: "Manually assign a list of partitions" + required: + - "subscription_type" + - "topic_partitions" + properties: + subscription_type: + description: "Manually assign a list of partitions to this consumer.\ + \ This interface does not allow for incremental assignment and will\ + \ replace the previous assignment (if there is one).\nIf the given\ + \ list of topic partitions is empty, it is treated the same as unsubscribe()." + type: "string" + const: "assign" + enum: + - "assign" + default: "assign" + topic_partitions: + title: "List of topic:partition pairs" + type: "string" + examples: + - "sample.topic:0, sample.topic:1" + - title: "Subscribe to all topics matching specified pattern" + required: + - "subscription_type" + - "topic_pattern" + properties: + subscription_type: + description: "Topic pattern from which the records will be read." + type: "string" + const: "subscribe" + enum: + - "subscribe" + default: "subscribe" + topic_pattern: + title: "Topic pattern" + type: "string" + examples: + - "sample.topic" + test_topic: + title: "Test topic" + description: "Topic to test if Airbyte can consume messages." + type: "string" + examples: + - "test.topic" + group_id: + title: "Group ID" + description: "Group id." + type: "string" + examples: + - "group.id" + max_poll_records: + title: "Max poll records" + description: "The maximum number of records returned in a single call to\ + \ poll(). Note, that max_poll_records does not impact the underlying fetching\ + \ behavior. The consumer will cache the records from each fetch request\ + \ and returns them incrementally from each poll." + type: "integer" + default: 500 + protocol: + title: "Protocol" + type: "object" + description: "Protocol used to communicate with brokers." + oneOf: + - title: "PLAINTEXT" + required: + - "security_protocol" + properties: + security_protocol: + type: "string" + enum: + - "PLAINTEXT" + default: "PLAINTEXT" + - title: "SASL PLAINTEXT" + required: + - "security_protocol" + - "sasl_mechanism" + - "sasl_jaas_config" + properties: + security_protocol: + type: "string" + enum: + - "SASL_PLAINTEXT" + default: "SASL_PLAINTEXT" + sasl_mechanism: + title: "SASL mechanism" + description: "SASL mechanism used for client connections. This may\ + \ be any mechanism for which a security provider is available." + type: "string" + default: "PLAIN" + enum: + - "PLAIN" + sasl_jaas_config: + title: "SASL JAAS config" + description: "JAAS login context parameters for SASL connections in\ + \ the format used by JAAS configuration files." + type: "string" + default: "" + airbyte_secret: true + - title: "SASL SSL" + required: + - "security_protocol" + - "sasl_mechanism" + - "sasl_jaas_config" + properties: + security_protocol: + type: "string" + enum: + - "SASL_SSL" + default: "SASL_SSL" + sasl_mechanism: + title: "SASL mechanism" + description: "SASL mechanism used for client connections. This may\ + \ be any mechanism for which a security provider is available." + type: "string" + default: "GSSAPI" + enum: + - "GSSAPI" + - "OAUTHBEARER" + - "SCRAM-SHA-256" + sasl_jaas_config: + title: "SASL JAAS config" + description: "JAAS login context parameters for SASL connections in\ + \ the format used by JAAS configuration files." + type: "string" + default: "" + airbyte_secret: true + client_id: + title: "Client ID" + description: "An id string to pass to the server when making requests. The\ + \ purpose of this is to be able to track the source of requests beyond\ + \ just ip/port by allowing a logical application name to be included in\ + \ server-side request logging." + type: "string" + examples: + - "airbyte-consumer" + enable_auto_commit: + title: "Enable auto commit" + description: "If true the consumer's offset will be periodically committed\ + \ in the background." + type: "boolean" + default: true + auto_commit_interval_ms: + title: "Auto commit interval ms" + description: "The frequency in milliseconds that the consumer offsets are\ + \ auto-committed to Kafka if enable.auto.commit is set to true." + type: "integer" + default: 5000 + client_dns_lookup: + title: "Client DNS lookup" + description: "Controls how the client uses DNS lookups. If set to use_all_dns_ips,\ + \ connect to each returned IP address in sequence until a successful connection\ + \ is established. After a disconnection, the next IP is used. Once all\ + \ IPs have been used once, the client resolves the IP(s) from the hostname\ + \ again. If set to resolve_canonical_bootstrap_servers_only, resolve each\ + \ bootstrap address into a list of canonical names. After the bootstrap\ + \ phase, this behaves the same as use_all_dns_ips. If set to default (deprecated),\ + \ attempt to connect to the first IP address returned by the lookup, even\ + \ if the lookup returns multiple IP addresses." + type: "string" + default: "use_all_dns_ips" + enum: + - "default" + - "use_all_dns_ips" + - "resolve_canonical_bootstrap_servers_only" + retry_backoff_ms: + title: "Retry backoff ms" + description: "The amount of time to wait before attempting to retry a failed\ + \ request to a given topic partition. This avoids repeatedly sending requests\ + \ in a tight loop under some failure scenarios." + type: "integer" + default: 100 + request_timeout_ms: + title: "Request timeout ms" + description: "The configuration controls the maximum amount of time the\ + \ client will wait for the response of a request. If the response is not\ + \ received before the timeout elapses the client will resend the request\ + \ if necessary or fail the request if retries are exhausted." + type: "integer" + default: 30000 + receive_buffer_bytes: + title: "Receive buffer bytes" + description: "The size of the TCP receive buffer (SO_RCVBUF) to use when\ + \ reading data. If the value is -1, the OS default will be used." + type: "integer" + default: 32768 + auto_offset_reset: + title: "Auto offset reset" + description: "What to do when there is no initial offset in Kafka or if\ + \ the current offset does not exist any more on the server - earliest:\ + \ automatically reset the offset to the earliest offset, latest: automatically\ + \ reset the offset to the latest offset, none: throw exception to the\ + \ consumer if no previous offset is found for the consumer's group, anything\ + \ else: throw exception to the consumer." + type: "string" + default: "latest" + enum: + - "latest" + - "earliest" + - "none" + repeated_calls: + title: "Repeated calls" + description: "The number of repeated calls to poll() if no messages were\ + \ received." + type: "integer" + default: 3 + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] + supported_source_sync_modes: + - "append" +- dockerImage: "airbyte/source-klaviyo:0.1.2" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/klaviyo" + changelogUrl: "https://docs.airbyte.io/integrations/sources/klaviyo" + connectionSpecification: + title: "Klaviyo Spec" + type: "object" + properties: + api_key: + title: "Api Key" + description: "Klaviyo API Key. See our docs if you need help finding this key." + airbyte_secret: true + type: "string" + start_date: + title: "Start Date" + description: "UTC date and time in the format 2017-01-25T00:00:00Z. Any\ + \ data before this date will not be replicated." + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + examples: + - "2017-01-25T00:00:00Z" + type: "string" + required: + - "api_key" + - "start_date" + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "append" +- dockerImage: "airbyte/source-lever-hiring:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/lever-hiring" + changelogUrl: "https://docs.airbyte.io/integrations/sources/lever-hiring#changelog" + connectionSpecification: + title: "Lever Hiring Spec" + type: "object" + properties: + client_id: + title: "Client Id" + description: "The client application id as provided when registering the\ + \ application with Lever." + type: "string" + client_secret: + title: "Client Secret" + description: "The application secret as provided when registering the application\ + \ with Lever." + airbyte_secret: true + type: "string" + refresh_token: + title: "Refresh Token" + description: "The refresh token your application will need to submit to\ + \ get a new access token after it's expired." + type: "string" + environment: + title: "Environment" + description: "Sandbox or Production environment." + default: "Production" + enum: + - "Sandbox" + - "Production" + type: "string" + start_date: + title: "Start Date" + description: "UTC date and time in the format 2019-02-25T00:00:00Z. Any\ + \ data before this date will not be replicated." + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + examples: + - "2021-04-25T00:00:00Z" + type: "string" + required: + - "client_id" + - "client_secret" + - "refresh_token" + - "start_date" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] + authSpecification: + auth_type: "oauth2.0" + oauth2Specification: + rootObject: [] + oauthFlowInitParameters: + - - "client_id" + - - "client_secret" + - - "refresh_token" + oauthFlowOutputParameters: [] +- dockerImage: "airbyte/source-linkedin-ads:0.1.1" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/linkedin-ads" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Linkedin Ads Spec" + type: "object" + required: + - "start_date" + - "access_token" + additionalProperties: false + properties: + start_date: + type: "string" + title: "Start Date" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" + description: "Date in the format 2020-09-17. Any data before this date will\ + \ not be replicated." + examples: + - "2021-05-17" + access_token: + type: "string" + title: "Access Token" + description: "The token value ganerated using Auth Code" + airbyte_secret: true + account_ids: + title: "Account IDs" + type: "array" + description: "Specify the Account IDs separated by space, from which to\ + \ pull the data. Leave empty to pull from all associated accounts." + items: + type: "integer" + default: [] + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-looker:0.2.5" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/looker" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Looker Spec" + type: "object" + required: + - "domain" + - "client_id" + - "client_secret" + additionalProperties: false + properties: + domain: + type: "string" + examples: + - "domainname.looker.com" + - "looker.clientname.com" + - "123.123.124.123:8000" + description: "Domain for your Looker account, e.g. airbyte.cloud.looker.com,looker.[clientname].com,IP\ + \ address" + client_id: + title: "Client ID" + type: "string" + description: "The Client ID is first part of an API3 key that is specific\ + \ to each Looker user. See the docs for more information on how to generate this key." + client_secret: + title: "Client Secret" + type: "string" + description: "The Client Secret is second part of an API3 key." + run_look_ids: + title: "Look IDs to Run" + type: "array" + items: + type: "string" + pattern: "^[0-9]*$" + description: "The IDs of any Looks to run (optional)" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-mailchimp:0.2.8" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/mailchimp" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Mailchimp Spec" + type: "object" + required: + - "username" + - "apikey" + additionalProperties: false + properties: + username: + type: "string" + description: "The Username or email you use to sign into Mailchimp" + apikey: + type: "string" + airbyte_secret: true + description: "API Key. See the docs for information on how to generate this key." + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-marketo:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/marketo" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Source Marketo Spec" + type: "object" + required: + - "domain_url" + - "client_id" + - "client_secret" + - "start_date" + additionalProperties: false + properties: + domain_url: + type: "string" + description: "Your Marketo Base URL. See the docs for info on how to obtain this." + examples: + - "https://000-AAA-000.mktorest.com" + airbyte_secret: true + client_id: + type: "string" + description: "Your Marketo client_id. See the docs for info on how to obtain this." + airbyte_secret: true + client_secret: + type: "string" + description: "Your Marketo client secret. See the docs for info on how to obtain this." + airbyte_secret: true + start_date: + type: "string" + description: "Data generated in Marketo after this date will be replicated.\ + \ This date must be specified in the format YYYY-MM-DDT00:00:00Z." + examples: + - "2020-09-25T00:00:00Z" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + window_in_days: + type: "integer" + description: "The amount of days for each data-chunk begining from start_date.\ + \ (Min=1, as for a Day; Max=30, as for a Month)." + examples: + - 1 + - 5 + - 10 + - 15 + - 30 + default: 30 + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-mssql:0.3.6" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/mssql" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "MSSQL Source Spec" + type: "object" + required: + - "host" + - "port" + - "database" + - "username" + additionalProperties: false + properties: + host: + description: "Hostname of the database." + type: "string" + port: + description: "Port of the database." + type: "integer" + minimum: 0 + maximum: 65536 + examples: + - "1433" + database: + description: "Name of the database." + type: "string" + examples: + - "master" + username: + description: "Username to use to access the database." + type: "string" + password: + description: "Password associated with the username." + type: "string" + airbyte_secret: true + ssl_method: + title: "SSL Method" + type: "object" + description: "Encryption method to use when communicating with the database" + order: 6 + oneOf: + - title: "Unencrypted" + additionalProperties: false + description: "Data transfer will not be encrypted." + required: + - "ssl_method" + properties: + ssl_method: + type: "string" + const: "unencrypted" + enum: + - "unencrypted" + default: "unencrypted" + - title: "Encrypted (trust server certificate)" + additionalProperties: false + description: "Use the cert provided by the server without verification.\ + \ (For testing purposes only!)" + required: + - "ssl_method" + properties: + ssl_method: + type: "string" + const: "encrypted_trust_server_certificate" + enum: + - "encrypted_trust_server_certificate" + default: "encrypted_trust_server_certificate" + - title: "Encrypted (verify certificate)" + additionalProperties: false + description: "Verify and use the cert provided by the server." + required: + - "ssl_method" + - "trustStoreName" + - "trustStorePassword" + properties: + ssl_method: + type: "string" + const: "encrypted_verify_certificate" + enum: + - "encrypted_verify_certificate" + default: "encrypted_verify_certificate" + hostNameInCertificate: + title: "Host Name In Certificate" + type: "string" + description: "Specifies the host name of the server. The value of\ + \ this property must match the subject property of the certificate." + order: 7 + replication_method: + type: "string" + title: "Replication Method" + description: "Replication method to use for extracting data from the database.\ + \ STANDARD replication requires no setup on the DB side but will not be\ + \ able to represent deletions incrementally. CDC uses {TBC} to detect\ + \ inserts, updates, and deletes. This needs to be configured on the source\ + \ database itself." + default: "STANDARD" + enum: + - "STANDARD" + - "CDC" + tunnel_method: + type: "object" + title: "SSH Tunnel Method" + description: "Whether to initiate an SSH tunnel before connecting to the\ + \ database, and if so, which kind of authentication to use." + oneOf: + - title: "No Tunnel" + required: + - "tunnel_method" + properties: + tunnel_method: + description: "No ssh tunnel needed to connect to database" + type: "string" + const: "NO_TUNNEL" + order: 0 + - title: "SSH Key Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "ssh_key" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and ssh key" + type: "string" + const: "SSH_KEY_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host." + type: "string" + order: 3 + ssh_key: + title: "SSH Private Key" + description: "OS-level user account ssh key credentials for logging\ + \ into the jump server host." + type: "string" + airbyte_secret: true + multiline: true + order: 4 + - title: "Password Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "tunnel_user_password" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and password authentication" + type: "string" + const: "SSH_PASSWORD_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host" + type: "string" + order: 3 + tunnel_user_password: + title: "Password" + description: "OS-level password for logging into the jump server host" + type: "string" + airbyte_secret: true + order: 4 + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-microsoft-teams:0.2.2" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/microsoft-teams" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Microsoft Teams Spec" + type: "object" + required: + - "tenant_id" + - "client_id" + - "client_secret" + - "period" + additionalProperties: false + properties: + tenant_id: + title: "Directory (tenant) ID" + type: "string" + description: "Directory (tenant) ID" + client_id: + title: "Application (client) ID" + type: "string" + description: "Application (client) ID" + client_secret: + title: "Client Secret" + type: "string" + description: "Client secret" + airbyte_secret: true + period: + type: "string" + description: "Specifies the length of time over which the Team Device Report\ + \ stream is aggregated. The supported values are: D7, D30, D90, and D180." + examples: + - "D7" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-mixpanel:0.1.3" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/mixpanel" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Source Mixpanel Spec" + type: "object" + required: + - "api_secret" + additionalProperties: true + properties: + api_secret: + type: "string" + description: "Mixpanel API Secret. See the docs for more information on how to obtain this key." + airbyte_secret: true + attribution_window: + type: "integer" + description: "Latency minimum number of days to look-back to account for\ + \ delays in attributing accurate results. Default attribution window is\ + \ 5 days." + default: 5 + date_window_size: + type: "integer" + description: "Number of days for date window looping through transactional\ + \ endpoints with from_date and to_date. Default date_window_size is 30\ + \ days. Clients with large volumes of events may want to decrease this\ + \ to 14, 7, or even down to 1-2 days." + default: 30 + project_timezone: + type: "string" + description: "Time zone in which integer date times are stored. The project\ + \ timezone may be found in the project settings in the Mixpanel console." + default: "US/Pacific" + examples: + - "US/Pacific" + - "UTC" + select_properties_by_default: + type: "boolean" + description: "Setting this config parameter to true ensures that new properties\ + \ on events and engage records are captured. Otherwise new properties\ + \ will be ignored" + default: true + start_date: + type: "string" + description: "The default value to use if no bookmark exists for an endpoint.\ + \ Default is 1 year ago." + examples: + - "2021-11-16" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}(T[0-9]{2}:[0-9]{2}:[0-9]{2}Z)?$" + region: + type: "string" + enum: + - "US" + - "EU" + default: "US" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-mongodb-v2:0.1.3" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/mongodb-v2" + changelogUrl: "https://docs.airbyte.io/integrations/sources/mongodb-v2" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "MongoDb Source Spec" + type: "object" + required: + - "database" + additionalProperties: true + properties: + instance_type: + type: "object" + title: "MongoDb instance type" + description: "MongoDb instance to connect to. For MongoDB Atlas and Replica\ + \ Set TLS connection is used by default." + order: 0 + oneOf: + - title: "Standalone MongoDb Instance" + required: + - "instance" + - "host" + - "port" + properties: + instance: + type: "string" + enum: + - "standalone" + default: "standalone" + host: + title: "Host" + type: "string" + description: "Host of a Mongo database to be replicated." + order: 0 + port: + title: "Port" + type: "integer" + description: "Port of a Mongo database to be replicated." + minimum: 0 + maximum: 65536 + default: 27017 + examples: + - "27017" + order: 1 + tls: + title: "TLS connection" + type: "boolean" + description: "Indicates whether TLS encryption protocol will be used\ + \ to connect to MongoDB. It is recommended to use TLS connection\ + \ if possible. For more information see documentation." + default: false + order: 2 + - title: "Replica Set" + required: + - "instance" + - "server_addresses" + properties: + instance: + type: "string" + enum: + - "replica" + default: "replica" + server_addresses: + title: "Server addresses" + type: "string" + description: "The members of a replica set. Please specify `host`:`port`\ + \ of each member seperated by comma." + examples: + - "host1:27017,host2:27017,host3:27017" + order: 0 + replica_set: + title: "Replica Set" + type: "string" + description: "A replica set name." + order: 1 + - title: "MongoDB Atlas" + additionalProperties: false + required: + - "instance" + - "cluster_url" + properties: + instance: + type: "string" + enum: + - "atlas" + default: "atlas" + cluster_url: + title: "Cluster URL" + type: "string" + description: "URL of a cluster to connect to." + order: 0 + database: + title: "Database name" + type: "string" + description: "Database to be replicated." + order: 1 + user: + title: "User" + type: "string" + description: "User" + order: 2 + password: + title: "Password" + type: "string" + description: "Password" + airbyte_secret: true + order: 3 + auth_source: + title: "Authentication source" + type: "string" + description: "Authentication source where user information is stored" + default: "admin" + examples: + - "admin" + order: 4 + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-mysql:0.4.8" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/mysql" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "MySql Source Spec" + type: "object" + required: + - "host" + - "port" + - "database" + - "username" + - "replication_method" + additionalProperties: false + properties: + host: + description: "Hostname of the database." + type: "string" + order: 0 + port: + description: "Port of the database." + type: "integer" + minimum: 0 + maximum: 65536 + default: 3306 + examples: + - "3306" + order: 1 + database: + description: "Name of the database." + type: "string" + order: 2 + username: + description: "Username to use to access the database." + type: "string" + order: 3 + password: + description: "Password associated with the username." + type: "string" + airbyte_secret: true + order: 4 + jdbc_url_params: + description: "Additional properties to pass to the jdbc url string when\ + \ connecting to the database formatted as 'key=value' pairs separated\ + \ by the symbol '&'. (example: key1=value1&key2=value2&key3=value3)" + type: "string" + order: 5 + ssl: + title: "SSL Connection" + description: "Encrypt data using SSL." + type: "boolean" + default: true + order: 7 + replication_method: + type: "string" + title: "Replication Method" + description: "Replication method to use for extracting data from the database.\ + \ STANDARD replication requires no setup on the DB side but will not be\ + \ able to represent deletions incrementally. CDC uses the Binlog to detect\ + \ inserts, updates, and deletes. This needs to be configured on the source\ + \ database itself." + order: 6 + default: "STANDARD" + enum: + - "STANDARD" + - "CDC" + tunnel_method: + type: "object" + title: "SSH Tunnel Method" + description: "Whether to initiate an SSH tunnel before connecting to the\ + \ database, and if so, which kind of authentication to use." + oneOf: + - title: "No Tunnel" + required: + - "tunnel_method" + properties: + tunnel_method: + description: "No ssh tunnel needed to connect to database" + type: "string" + const: "NO_TUNNEL" + order: 0 + - title: "SSH Key Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "ssh_key" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and ssh key" + type: "string" + const: "SSH_KEY_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host." + type: "string" + order: 3 + ssh_key: + title: "SSH Private Key" + description: "OS-level user account ssh key credentials in RSA PEM\ + \ format ( created with ssh-keygen -t rsa -m PEM -f myuser_rsa )" + type: "string" + airbyte_secret: true + multiline: true + order: 4 + - title: "Password Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "tunnel_user_password" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and password authentication" + type: "string" + const: "SSH_PASSWORD_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host" + type: "string" + order: 3 + tunnel_user_password: + title: "Password" + description: "OS-level password for logging into the jump server host" + type: "string" + airbyte_secret: true + order: 4 + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-okta:0.1.4" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/okta" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Okta Spec" + type: "object" + required: + - "token" + - "base_url" + additionalProperties: false + properties: + token: + type: "string" + title: "API Token" + description: "A Okta token. See the docs for instructions on how to generate it." + airbyte_secret: true + base_url: + type: "string" + title: "Base URL" + description: "The Okta base URL." + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-onesignal:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/onesignal" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "OneSignal Source Spec" + type: "object" + required: + - "user_auth_key" + - "start_date" + - "outcome_names" + additionalProperties: false + properties: + user_auth_key: + type: "string" + description: "OneSignal User Auth Key, see the docs for more information on how to obtain this key." + airbyte_secret: true + start_date: + type: "string" + description: "The date from which you'd like to replicate data for OneSignal\ + \ API, in the format YYYY-MM-DDT00:00:00Z. All data generated after this\ + \ date will be replicated." + examples: + - "2020-11-16T00:00:00Z" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + outcome_names: + type: "string" + description: "Comma-separated list of names and the value (sum/count) for\ + \ the returned outcome data. See the docs for more details" + examples: + - "os__session_duration.count,os__click.count,CustomOutcomeName.sum" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-oracle:0.3.8" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/oracle" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Oracle Source Spec" + type: "object" + required: + - "host" + - "port" + - "sid" + - "username" + additionalProperties: false + properties: + host: + title: "Host" + description: "Hostname of the database." + type: "string" + port: + title: "Port" + description: "Port of the database.\nOracle Corporations recommends the\ + \ following port numbers:\n1521 - Default listening port for client connections\ + \ to the listener. \n2484 - Recommended and officially registered listening\ + \ port for client connections to the listener using TCP/IP with SSL" + type: "integer" + minimum: 0 + maximum: 65536 + default: 1521 + sid: + title: "SID (Oracle System Identifier)" + type: "string" + username: + title: "User" + description: "Username to use to access the database." + type: "string" + password: + title: "Password" + description: "Password associated with the username." + type: "string" + airbyte_secret: true + schemas: + title: "Schemas" + description: "List of schemas to sync from. Defaults to user. Case sensitive." + type: "array" + items: + type: "string" + minItems: 1 + uniqueItems: true + encryption: + title: "Encryption" + type: "object" + description: "Encryption method to use when communicating with the database" + order: 6 + oneOf: + - title: "Unencrypted" + additionalProperties: false + description: "Data transfer will not be encrypted." + required: + - "encryption_method" + properties: + encryption_method: + type: "string" + const: "unencrypted" + enum: + - "unencrypted" + default: "unencrypted" + - title: "Native Network Ecryption (NNE)" + additionalProperties: false + description: "Native network encryption gives you the ability to encrypt\ + \ database connections, without the configuration overhead of TCP/IP\ + \ and SSL/TLS and without the need to open and listen on different ports." + required: + - "encryption_method" + properties: + encryption_method: + type: "string" + const: "client_nne" + enum: + - "client_nne" + default: "client_nne" + encryption_algorithm: + type: "string" + description: "This parameter defines the encryption algorithm to be\ + \ used" + title: "Encryption Algorithm" + default: "AES256" + enum: + - "AES256" + - "RC4_56" + - "3DES168" + - title: "TLS Encrypted (verify certificate)" + additionalProperties: false + description: "Verify and use the cert provided by the server." + required: + - "encryption_method" + - "ssl_certificate" + properties: + encryption_method: + type: "string" + const: "encrypted_verify_certificate" + enum: + - "encrypted_verify_certificate" + default: "encrypted_verify_certificate" + ssl_certificate: + title: "SSL PEM file" + description: "Privacy Enhanced Mail (PEM) files are concatenated certificate\ + \ containers frequently used in certificate installations" + type: "string" + airbyte_secret: true + multiline: true + order: 4 + tunnel_method: + type: "object" + title: "SSH Tunnel Method" + description: "Whether to initiate an SSH tunnel before connecting to the\ + \ database, and if so, which kind of authentication to use." + oneOf: + - title: "No Tunnel" + required: + - "tunnel_method" + properties: + tunnel_method: + description: "No ssh tunnel needed to connect to database" + type: "string" + const: "NO_TUNNEL" + order: 0 + - title: "SSH Key Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "ssh_key" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and ssh key" + type: "string" + const: "SSH_KEY_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host." + type: "string" + order: 3 + ssh_key: + title: "SSH Private Key" + description: "OS-level user account ssh key credentials in RSA PEM\ + \ format ( created with ssh-keygen -t rsa -m PEM -f myuser_rsa )" + type: "string" + airbyte_secret: true + multiline: true + order: 4 + - title: "Password Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "tunnel_user_password" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and password authentication" + type: "string" + const: "SSH_PASSWORD_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host" + type: "string" + order: 3 + tunnel_user_password: + title: "Password" + description: "OS-level password for logging into the jump server host" + type: "string" + airbyte_secret: true + order: 4 + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-paypal-transaction:0.1.1" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/paypal-transactions" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Paypal Transaction Search" + type: "object" + required: + - "client_id" + - "secret" + - "start_date" + - "is_sandbox" + additionalProperties: true + properties: + client_id: + title: "Client ID" + type: "string" + description: "The Paypal Client ID for API credentials" + secret: + title: "Secret" + type: "string" + description: "The Secret for a given Client ID." + airbyte_secret: true + start_date: + type: "string" + title: "Start Date" + description: "Start Date for data extraction in ISO format. Date must be in range from 3 years till 12 hrs before\ + \ present time" + examples: + - "2021-06-11T23:59:59-00:00" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}[+-][0-9]{2}:[0-9]{2}$" + is_sandbox: + title: "Is Sandbox" + description: "Whether or not to Sandbox or Production environment to extract\ + \ data from" + type: "boolean" + default: false + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-paystack:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/paystack" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Paystack Source Spec" + type: "object" + required: + - "secret_key" + - "start_date" + additionalProperties: false + properties: + secret_key: + type: "string" + pattern: "^(s|r)k_(live|test)_[a-zA-Z0-9]+$" + description: "Paystack API key (usually starts with 'sk_live_'; find yours\ + \ here)." + airbyte_secret: true + start_date: + type: "string" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + description: "UTC date and time in the format 2017-01-25T00:00:00Z. Any\ + \ data before this date will not be replicated." + examples: + - "2017-01-25T00:00:00Z" + lookback_window_days: + type: "integer" + title: "Lookback Window (in days)" + default: 0 + minimum: 0 + description: "When set, the connector will always reload data from the past\ + \ N days, where N is the value set here. This is useful if your data is\ + \ updated after creation." + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-pipedrive:0.1.6" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/pipedrive" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Pipedrive Spec" + type: "object" + required: + - "replication_start_date" + additionalProperties: true + properties: + authorization: + type: "object" + title: "Authentication Type" + oneOf: + - title: "Sign in via Pipedrive (OAuth)" + type: "object" + required: + - "auth_type" + - "client_id" + - "client_secret" + - "refresh_token" + properties: + auth_type: + type: "string" + const: "Client" + enum: + - "Client" + default: "Client" + order: 0 + client_id: + title: "Client ID" + type: "string" + description: "The Client ID of your developer application" + airbyte_secret: true + client_secret: + title: "Client Secret" + type: "string" + description: "The client secret of your developer application" + airbyte_secret: true + access_token: + title: "Access Token" + type: "string" + description: "An access token generated using the above client ID\ + \ and secret" + airbyte_secret: true + refresh_token: + title: "Refresh Token" + type: "string" + description: "A refresh token generated using the above client ID\ + \ and secret" + airbyte_secret: true + - type: "object" + title: "API Key Authentication" + required: + - "auth_type" + - "api_token" + properties: + auth_type: + type: "string" + const: "Token" + enum: + - "Token" + default: "Token" + order: 0 + api_token: + title: "API Token" + type: "string" + description: "Pipedrive API Token" + airbyte_secret: true + replication_start_date: + title: "Replication Start Date" + description: "UTC date and time in the format 2017-01-25T00:00:00Z. Any\ + \ data before this date will not be replicated. When specified and not\ + \ None, then stream will behave as incremental" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + examples: + - "2017-01-25T00:00:00Z" + type: "string" + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "append" +- dockerImage: "airbyte/source-plaid:0.2.1" + spec: + documentationUrl: "https://plaid.com/docs/api/" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + type: "object" + required: + - "access_token" + - "api_key" + - "client_id" + additionalProperties: false + properties: + access_token: + type: "string" + title: "Access Token" + description: "The end-user's Link access token." + api_key: + title: "API Key" + type: "string" + description: "The Plaid API key to use to hit the API." + airbyte_secret: true + client_id: + title: "Client ID" + type: "string" + description: "The Plaid client id" + plaid_env: + title: "Plaid Environment" + type: "string" + enum: + - "sandbox" + - "development" + - "production" + description: "The Plaid environment" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-pokeapi:0.1.1" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/pokeapi" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Pokeapi Spec" + type: "object" + required: + - "pokemon_name" + additionalProperties: false + properties: + pokemon_name: + type: "string" + description: "Pokemon requested from the API." + pattern: "^[a-z0-9_\\-]+$" + examples: + - "ditto, luxray, snorlax" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-posthog:0.1.4" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/posthog" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "PostHog Spec" + type: "object" + required: + - "api_key" + - "start_date" + additionalProperties: false + properties: + start_date: + title: "Start Date" + type: "string" + description: "The date from which you'd like to replicate the data" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + examples: + - "2021-01-01T00:00:00Z" + api_key: + type: "string" + airbyte_secret: true + description: "API Key. See the docs for information on how to generate this key." + base_url: + type: "string" + default: "https://app.posthog.com" + description: "Base PostHog url. Defaults to PostHog Cloud (https://app.posthog.com)." + examples: + - "https://posthog.example.com" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-postgres:0.3.13" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/postgres" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Postgres Source Spec" + type: "object" + required: + - "host" + - "port" + - "database" + - "username" + additionalProperties: false + properties: + host: + title: "Host" + description: "Hostname of the database." + type: "string" + order: 0 + port: + title: "Port" + description: "Port of the database." + type: "integer" + minimum: 0 + maximum: 65536 + default: 5432 + examples: + - "5432" + order: 1 + database: + title: "DB Name" + description: "Name of the database." + type: "string" + order: 2 + username: + title: "User" + description: "Username to use to access the database." + type: "string" + order: 3 + password: + title: "Password" + description: "Password associated with the username." + type: "string" + airbyte_secret: true + order: 4 + ssl: + title: "Connect using SSL" + description: "Encrypt client/server communications for increased security." + type: "boolean" + default: false + order: 5 + replication_method: + type: "object" + title: "Replication Method" + description: "Replication method to use for extracting data from the database." + order: 6 + oneOf: + - title: "Standard" + additionalProperties: false + description: "Standard replication requires no setup on the DB side but\ + \ will not be able to represent deletions incrementally." + required: + - "method" + properties: + method: + type: "string" + const: "Standard" + enum: + - "Standard" + default: "Standard" + order: 0 + - title: "Logical Replication (CDC)" + additionalProperties: false + description: "Logical replication uses the Postgres write-ahead log (WAL)\ + \ to detect inserts, updates, and deletes. This needs to be configured\ + \ on the source database itself. Only available on Postgres 10 and above.\ + \ Read the Postgres Source docs for more information." + required: + - "method" + - "replication_slot" + - "publication" + properties: + method: + type: "string" + const: "CDC" + enum: + - "CDC" + default: "CDC" + order: 0 + plugin: + type: "string" + description: "A logical decoding plug-in installed on the PostgreSQL\ + \ server. `pgoutput` plug-in is used by default.\nIf replication\ + \ table contains a lot of big jsonb values it is recommended to\ + \ use `wal2json` plug-in. For more information about `wal2json`\ + \ plug-in read Postgres Source docs." + enum: + - "pgoutput" + - "wal2json" + default: "pgoutput" + order: 1 + replication_slot: + type: "string" + description: "A plug-in logical replication slot." + order: 2 + publication: + type: "string" + description: "A Postgres publication used for consuming changes." + order: 3 + tunnel_method: + type: "object" + title: "SSH Tunnel Method" + description: "Whether to initiate an SSH tunnel before connecting to the\ + \ database, and if so, which kind of authentication to use." + oneOf: + - title: "No Tunnel" + required: + - "tunnel_method" + properties: + tunnel_method: + description: "No ssh tunnel needed to connect to database" + type: "string" + const: "NO_TUNNEL" + order: 0 + - title: "SSH Key Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "ssh_key" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and ssh key" + type: "string" + const: "SSH_KEY_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host." + type: "string" + order: 3 + ssh_key: + title: "SSH Private Key" + description: "OS-level user account ssh key credentials in RSA PEM\ + \ format ( created with ssh-keygen -t rsa -m PEM -f myuser_rsa )" + type: "string" + airbyte_secret: true + multiline: true + order: 4 + - title: "Password Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "tunnel_user_password" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and password authentication" + type: "string" + const: "SSH_PASSWORD_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host" + type: "string" + order: 3 + tunnel_user_password: + title: "Password" + description: "OS-level password for logging into the jump server host" + type: "string" + airbyte_secret: true + order: 4 + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-prestashop:0.1.0" + spec: + documentationUrl: "https://docsurl.com" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "PrestaShop Spec" + type: "object" + required: + - "url" + - "access_key" + additionalProperties: false + properties: + url: + type: "string" + description: "Shop URL without trailing slash (domain name or IP address)" + access_key: + type: "string" + description: "Your PrestaShop access key. See the docs for info on how to obtain this." + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-quickbooks-singer:0.1.3" + spec: + documentationUrl: "https://docsurl.com" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Source Quickbooks Singer Spec" + type: "object" + required: + - "client_id" + - "client_secret" + - "refresh_token" + - "realm_id" + - "user_agent" + - "start_date" + - "sandbox" + additionalProperties: false + properties: + client_id: + type: "string" + description: "Identifies which app is making the request. Obtain this value\ + \ from the Keys tab on the app profile via My Apps on the developer site.\ + \ There are two versions of this key: development and production" + client_secret: + description: " Obtain this value from the Keys tab on the app profile via\ + \ My Apps on the developer site. There are two versions of this key: development\ + \ and production" + type: "string" + airbyte_secret: true + refresh_token: + description: "A token used when refreshing the access token." + type: "string" + airbyte_secret: true + realm_id: + description: "Labeled Company ID. The Make API Calls panel is populated\ + \ with the realm id and the current access token" + type: "string" + airbyte_secret: true + user_agent: + type: "string" + description: "Process and email for API logging purposes. Example: tap-quickbooks\ + \ " + start_date: + description: "The default value to use if no bookmark exists for an endpoint\ + \ (rfc3339 date string) E.g, 2021-03-20T00:00:00Z" + type: "string" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + examples: + - "2021-03-20T00:00:00Z" + sandbox: + description: "Development or Production." + type: "boolean" + default: false + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-recharge:0.1.3" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/recharge" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Recharge Spec" + type: "object" + required: + - "start_date" + - "access_token" + additionalProperties: false + properties: + start_date: + type: "string" + description: "The date from which you'd like to replicate data for Recharge\ + \ API, in the format YYYY-MM-DDT00:00:00Z." + examples: + - "2021-05-14T00:00:00Z" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + access_token: + type: "string" + description: "The value of the Access Token generated. See the docs for more\ + \ information" + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-recurly:0.2.4" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/recurly" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Recurly Source Spec" + type: "object" + required: + - "api_key" + additionalProperties: false + properties: + api_key: + type: "string" + description: "Recurly API Key. See the docs for more information on how to generate this key." + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-redshift:0.3.4" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/redshift" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Redshift Source Spec" + type: "object" + required: + - "host" + - "port" + - "database" + - "username" + - "password" + additionalProperties: false + properties: + host: + description: "Host Endpoint of the Redshift Cluster (must include the cluster-id,\ + \ region and end with .redshift.amazonaws.com)" + type: "string" + port: + description: "Port of the database." + type: "integer" + minimum: 0 + maximum: 65536 + default: 5439 + examples: + - "5439" + database: + description: "Name of the database." + type: "string" + examples: + - "master" + username: + description: "Username to use to access the database." + type: "string" + password: + description: "Password associated with the username." + type: "string" + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-retently:0.1.0" + spec: + documentationUrl: "https://docsurl.com" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Retently Api Spec" + type: "object" + required: + - "api_key" + additionalProperties: false + properties: + api_key: + type: "string" + description: "API key from https://app.retently.com/settings/api/tokens" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-s3:0.1.6" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/s3" + changelogUrl: "https://docs.airbyte.io/integrations/sources/s3" + connectionSpecification: + title: "S3 Source Spec" + type: "object" + properties: + dataset: + title: "Dataset" + description: "This source creates one table per connection, this field is\ + \ the name of that table. This should include only letters, numbers, dash\ + \ and underscores. Note that this may be altered according to destination." + pattern: "^([A-Za-z0-9-_]+)$" + type: "string" + path_pattern: + title: "Path Pattern" + description: "Add at least 1 pattern here to match filepaths against. Use\ + \ | to separate multiple patterns. Airbyte uses these patterns to determine\ + \ which files to pick up from the provider storage. See wcmatch.glob to understand pattern syntax (GLOBSTAR\ + \ and SPLIT flags are enabled). Use pattern ** to pick\ + \ up all files." + examples: + - "**" + - "myFolder/myTableFiles/*.csv|myFolder/myOtherTableFiles/*.csv" + type: "string" + schema: + title: "Schema" + description: "Optionally provide a schema to enforce, as a valid JSON string.\ + \ Ensure this is a mapping of { \"column\" : \"type\" },\ + \ where types are valid JSON Schema datatypes. Leave as {} to auto-infer\ + \ the schema." + default: "{}" + examples: + - "{\"column_1\": \"number\", \"column_2\": \"string\", \"column_3\": \"\ + array\", \"column_4\": \"object\", \"column_5\": \"boolean\"}" + type: "string" + format: + title: "Format" + default: "csv" + type: "object" + oneOf: + - title: "csv" + description: "This connector utilises PyArrow (Apache Arrow) for CSV parsing." + type: "object" + properties: + filetype: + title: "Filetype" + const: "csv" + type: "string" + delimiter: + title: "Delimiter" + description: "The character delimiting individual cells in the CSV\ + \ data. This may only be a 1-character string." + default: "," + minLength: 1 + type: "string" + quote_char: + title: "Quote Char" + description: "The character used optionally for quoting CSV values.\ + \ To disallow quoting, make this field blank." + default: "\"" + type: "string" + escape_char: + title: "Escape Char" + description: "The character used optionally for escaping special characters.\ + \ To disallow escaping, leave this field blank." + type: "string" + encoding: + title: "Encoding" + description: "The character encoding of the CSV data. Leave blank\ + \ to default to UTF-8. See list of python encodings for allowable options." + type: "string" + double_quote: + title: "Double Quote" + description: "Whether two quotes in a quoted CSV value denote a single\ + \ quote in the data." + default: true + type: "boolean" + newlines_in_values: + title: "Newlines In Values" + description: "Whether newline characters are allowed in CSV values.\ + \ Turning this on may affect performance. Leave blank to default\ + \ to False." + default: false + type: "boolean" + block_size: + title: "Block Size" + description: "The chunk size in bytes to process at a time in memory\ + \ from each file. If your data is particularly wide and failing\ + \ during schema detection, increasing this should solve it. Beware\ + \ of raising this too high as you could hit OOM errors." + default: 10000 + type: "integer" + additional_reader_options: + title: "Additional Reader Options" + description: "Optionally add a valid JSON string here to provide additional\ + \ options to the csv reader. Mappings must correspond to options\ + \ detailed here. 'column_types' is used internally\ + \ to handle schema so overriding that would likely cause problems." + default: "{}" + examples: + - "{\"timestamp_parsers\": [\"%m/%d/%Y %H:%M\", \"%Y/%m/%d %H:%M\"\ + ], \"strings_can_be_null\": true, \"null_values\": [\"NA\", \"NULL\"\ + ]}" + type: "string" + advanced_options: + title: "Advanced Options" + description: "Optionally add a valid JSON string here to provide additional\ + \ Pyarrow ReadOptions. Specify 'column_names'\ + \ here if your CSV doesn't have header, or if you want to use custom\ + \ column names. 'block_size' and 'encoding' are already used above,\ + \ specify them again here will override the values above." + default: "{}" + examples: + - "{\"column_names\": [\"column1\", \"column2\"]}" + type: "string" + - title: "parquet" + description: "This connector utilises PyArrow (Apache Arrow) for Parquet parsing." + type: "object" + properties: + filetype: + title: "Filetype" + const: "parquet" + type: "string" + buffer_size: + title: "Buffer Size" + description: "Perform read buffering when deserializing individual\ + \ column chunks. By default every group column will be loaded fully\ + \ to memory. This option can help to optimize a work with memory\ + \ if your data is particularly wide or failing during detection\ + \ of OOM errors." + default: 0 + type: "integer" + columns: + title: "Columns" + description: "If you only want to sync a subset of the columns from\ + \ the file(s), add the columns you want here. Leave it empty to\ + \ sync all columns." + type: "array" + items: + type: "string" + batch_size: + title: "Batch Size" + description: "Maximum number of records per batch. Batches may be\ + \ smaller if there aren’t enough rows in the file. This option can\ + \ help to optimize a work with memory if your data is particularly\ + \ wide or failing during detection of OOM errors." + default: 65536 + type: "integer" + provider: + title: "S3: Amazon Web Services" + type: "object" + properties: + bucket: + title: "Bucket" + description: "Name of the S3 bucket where the file(s) exist." + type: "string" + aws_access_key_id: + title: "Aws Access Key Id" + description: "In order to access private Buckets stored on AWS S3, this\ + \ connector requires credentials with the proper permissions. If accessing\ + \ publicly available data, this field is not necessary." + airbyte_secret: true + type: "string" + aws_secret_access_key: + title: "Aws Secret Access Key" + description: "In order to access private Buckets stored on AWS S3, this\ + \ connector requires credentials with the proper permissions. If accessing\ + \ publicly available data, this field is not necessary." + airbyte_secret: true + type: "string" + path_prefix: + title: "Path Prefix" + description: "By providing a path-like prefix (e.g. myFolder/thisTable/)\ + \ under which all the relevant files sit, we can optimise finding\ + \ these in S3. This is optional but recommended if your bucket contains\ + \ many folders/files." + default: "" + type: "string" + endpoint: + title: "Endpoint" + description: "Endpoint to an S3 compatible service. Leave empty to use\ + \ AWS." + default: "" + type: "string" + use_ssl: + title: "Use Ssl" + description: "Is remote server using secure SSL/TLS connection" + type: "boolean" + verify_ssl_cert: + title: "Verify Ssl Cert" + description: "Allow self signed certificates" + type: "boolean" + required: + - "bucket" + required: + - "dataset" + - "path_pattern" + - "provider" + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "overwrite" + - "append" + - "append_dedup" +- dockerImage: "airbyte/source-salesloft:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/salesloft" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Source Salesloft Spec" + type: "object" + required: + - "client_id" + - "client_secret" + - "refresh_token" + - "start_date" + additionalProperties: false + properties: + client_id: + type: "string" + description: "Salesloft client id." + client_secret: + type: "string" + description: "Salesloft client secret." + airbyte_secret: true + refresh_token: + type: "string" + description: "Salesloft refresh token." + airbyte_secret: true + start_date: + type: "string" + description: "The date from which you'd like to replicate data for Salesloft\ + \ API, in the format YYYY-MM-DDT00:00:00Z. All data generated after this\ + \ date will be replicated." + examples: + - "2020-11-16T00:00:00Z" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-salesforce:0.1.2" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/salesforce" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Salesforce Source Spec" + type: "object" + required: + - "client_id" + - "client_secret" + - "refresh_token" + - "start_date" + - "api_type" + additionalProperties: false + properties: + client_id: + description: "The Consumer Key that can be found when viewing your app in\ + \ Salesforce" + type: "string" + client_secret: + description: "The Consumer Secret that can be found when viewing your app\ + \ in Salesforce" + type: "string" + airbyte_secret: true + refresh_token: + description: "Salesforce Refresh Token used for Airbyte to access your Salesforce\ + \ account. If you don't know what this is, follow this guide to retrieve it." + type: "string" + airbyte_secret: true + start_date: + description: "UTC date and time in the format 2017-01-25T00:00:00Z. Any\ + \ data before this date will not be replicated." + type: "string" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + examples: + - "2021-07-25T00:00:00Z" + is_sandbox: + description: "Whether or not the the app is in a Salesforce sandbox. If\ + \ you do not know what this, assume it is false. We provide more info\ + \ on this field in the docs." + type: "boolean" + default: false + api_type: + description: "Unless you know that you are transferring a very small amount\ + \ of data, prefer using the BULK API. This will help avoid using up all\ + \ of your API call quota with Salesforce. Valid values are BULK or REST." + type: "string" + enum: + - "BULK" + - "REST" + default: "BULK" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] + authSpecification: + auth_type: "oauth2.0" + oauth2Specification: + rootObject: [] + oauthFlowInitParameters: + - - "client_id" + - - "client_secret" + oauthFlowOutputParameters: + - - "refresh_token" +- dockerImage: "airbyte/source-sendgrid:0.2.6" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/sendgrid" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Sendgrid Spec" + type: "object" + required: + - "apikey" + additionalProperties: false + properties: + apikey: + type: "string" + description: "API Key, use admin to generate this key." + start_time: + type: "integer" + description: "Start time in timestamp integer format. Any data before this\ + \ timestamp will not be replicated." + examples: + - 1558359837 + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-shopify:0.1.21" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/shopify" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Shopify Source CDK Specifications" + type: "object" + required: + - "shop" + - "start_date" + - "auth_method" + additionalProperties: false + properties: + shop: + type: "string" + description: "The name of the shopify store. For https://EXAMPLE.myshopify.com,\ + \ the shop name is 'EXAMPLE'." + start_date: + type: "string" + description: "The date you would like to replicate data. Format: YYYY-MM-DD." + examples: + - "2021-01-01" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" + auth_method: + title: "Shopify Authorization Method" + type: "object" + oneOf: + - type: "object" + title: "OAuth2.0" + required: + - "client_id" + - "client_secret" + - "access_token" + properties: + auth_method: + type: "string" + const: "access_token" + enum: + - "access_token" + default: "access_token" + order: 0 + client_id: + type: "string" + description: "The API Key of the Shopify developer application." + airbyte_secret: true + client_secret: + type: "string" + description: "The API Secret the Shopify developer application." + airbyte_secret: true + access_token: + type: "string" + description: "Access Token for making authenticated requests." + airbyte_secret: true + - title: "API Password" + type: "object" + required: + - "api_password" + properties: + auth_method: + type: "string" + const: "api_password" + enum: + - "api_password" + default: "api_password" + order: 0 + api_password: + type: "string" + description: "The API PASSWORD for your private application in `Shopify`\ + \ shop." + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] + authSpecification: + auth_type: "oauth2.0" + oauth2Specification: + rootObject: + - "auth_method" + - "0" + oauthFlowInitParameters: + - - "client_id" + - - "client_secret" + oauthFlowOutputParameters: + - - "access_token" +- dockerImage: "airbyte/source-shortio:0.1.0" + spec: + documentationUrl: "https://developers.short.io/reference" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Shortio Spec" + type: "object" + required: + - "domain_id" + - "secret_key" + - "start_date" + additionalProperties: false + properties: + domain_id: + type: "string" + description: "Domain ID" + airbyte_secret: false + secret_key: + type: "string" + description: "Short.io Secret key" + airbyte_secret: true + start_date: + type: "string" + description: "Start Date, YYYY-MM-DD" + airbyte_secret: false + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-slack:0.1.12" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/slack" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Slack Spec" + type: "object" + required: + - "start_date" + - "lookback_window" + - "join_channels" + additionalProperties: true + properties: + start_date: + type: "string" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + description: "UTC date and time in the format 2017-01-25T00:00:00Z. Any\ + \ data before this date will not be replicated." + examples: + - "2017-01-25T00:00:00Z" + title: "Start Date" + lookback_window: + type: "integer" + title: "Threads Lookback window (Days)" + description: "How far into the past to look for messages in threads." + examples: + - 7 + - 14 + join_channels: + type: "boolean" + default: true + title: "Join all channels" + description: "Whether to join all channels or to sync data only from channels\ + \ the bot is already in. If false, you'll need to manually add the bot\ + \ to all the channels from which you'd like to sync messages. " + credentials: + title: "Authentication mechanism" + description: "Choose how to authenticate into Slack" + type: "object" + oneOf: + - type: "object" + title: "Sign in via Slack (OAuth)" + required: + - "access_token" + - "client_id" + - "client_secret" + - "option_title" + properties: + option_title: + type: "string" + const: "Default OAuth2.0 authorization" + client_id: + title: "Client ID" + description: "Slack client_id. See our docs if you need help finding this id." + type: "string" + examples: + - "slack-client-id-example" + client_secret: + title: "Client Secret" + description: "Slack client_secret. See our docs if you need help finding this secret." + type: "string" + examples: + - "slack-client-secret-example" + airbyte_secret: true + access_token: + title: "Access token" + description: "Slack access_token. See our docs if you need help generating the token." + type: "string" + examples: + - "slack-access-token-example" + airbyte_secret: true + refresh_token: + title: "Refresh token" + description: "Slack refresh_token. See our docs if you need help generating the token." + type: "string" + examples: + - "slack-refresh-token-example" + airbyte_secret: true + order: 0 + - type: "object" + title: "API Token" + required: + - "api_token" + - "option_title" + properties: + option_title: + type: "string" + const: "API Token Credentials" + api_token: + type: "string" + title: "API Token" + description: "A Slack bot token. See the docs for instructions on how to generate it." + airbyte_secret: true + order: 1 + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] + authSpecification: + auth_type: "oauth2.0" + oauth2Specification: + rootObject: + - "credentials" + - "0" + oauthFlowInitParameters: + - - "client_id" + - - "client_secret" + oauthFlowOutputParameters: + - - "access_token" + - - "refresh_token" +- dockerImage: "airbyte/source-smartsheets:0.1.5" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/smartsheets" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Smartsheets Source Spec" + type: "object" + required: + - "access_token" + - "spreadsheet_id" + additionalProperties: false + properties: + access_token: + title: "API Access token" + description: "Found in Profile > Apps & Integrations > API Access within\ + \ Smartsheet app" + type: "string" + airbyte_secret: true + spreadsheet_id: + title: "Smartsheet ID" + description: "Found in File > Properties" + type: "string" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-snapchat-marketing:0.1.1" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/snapchat-marketing" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Snapchat Marketing Spec" + type: "object" + required: + - "client_id" + - "client_secret" + - "refresh_token" + additionalProperties: false + properties: + client_id: + title: "Client ID" + type: "string" + description: "The Snapchat Client ID for API credentials." + airbyte_secret: true + client_secret: + title: "Client Secret" + type: "string" + description: "The Client Secret for a given Client ID." + airbyte_secret: true + refresh_token: + title: "API Refresh Token" + type: "string" + description: "Refresh Token to get next api key after expiration. Is given\ + \ with API Key" + airbyte_secret: true + start_date: + title: "Start Date" + type: "string" + description: "The start date to sync data. Leave blank for full sync. Format:\ + \ YYYY-MM-DD." + examples: + - "2021-01-01" + default: "1970-01-01" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-snowflake:0.1.2" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/snowflake" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Snowflake Source Spec" + type: "object" + required: + - "host" + - "role" + - "warehouse" + - "database" + - "schema" + - "username" + - "password" + additionalProperties: false + properties: + host: + description: "Host domain of the snowflake instance (must include the account,\ + \ region, cloud environment, and end with snowflakecomputing.com)." + examples: + - "accountname.us-east-2.aws.snowflakecomputing.com" + type: "string" + title: "Account name" + order: 0 + role: + description: "The role you created for Airbyte to access Snowflake." + examples: + - "AIRBYTE_ROLE" + type: "string" + title: "Role" + order: 1 + warehouse: + description: "The warehouse you created for Airbyte to access data into." + examples: + - "AIRBYTE_WAREHOUSE" + type: "string" + title: "Warehouse" + order: 2 + database: + description: "The database you created for Airbyte to access data into." + examples: + - "AIRBYTE_DATABASE" + type: "string" + title: "Database" + order: 3 + schema: + description: "The source Snowflake schema tables." + examples: + - "AIRBYTE_SCHEMA" + type: "string" + title: "Schema" + order: 4 + username: + description: "The username you created to allow Airbyte to access the database." + examples: + - "AIRBYTE_USER" + type: "string" + title: "Username" + order: 5 + password: + description: "Password associated with the username." + type: "string" + airbyte_secret: true + title: "Password" + order: 6 + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-square:0.1.1" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/square" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Square Source CDK Specifications" + type: "object" + required: + - "api_key" + - "is_sandbox" + additionalProperties: false + properties: + api_key: + type: "string" + description: "The API key for a Square application" + airbyte_secret: true + is_sandbox: + type: "boolean" + description: "Determines the sandbox (true) or production (false) API version" + examples: + - true + - false + default: true + start_date: + type: "string" + description: "The start date to sync data. Leave blank for full sync. Format:\ + \ YYYY-MM-DD." + examples: + - "2021-01-01" + default: "1970-01-01" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" + include_deleted_objects: + type: "boolean" + description: "In some streams there is and option to include deleted objects\ + \ (Items, Categories, Discounts, Taxes)" + examples: + - true + - false + default: false + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-strava:0.1.0" + spec: + documentationUrl: "https://docsurl.com" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Strava Spec" + type: "object" + required: + - "client_id" + - "client_secret" + - "refresh_token" + - "athlete_id" + - "start_date" + additionalProperties: false + properties: + client_id: + type: "string" + description: "Strava Client ID" + pattern: "^[0-9_\\-]+$" + examples: + - "12345" + client_secret: + type: "string" + description: "Strava Client Secret" + pattern: "^[0-9a-fA-F]+$" + examples: + - "fc6243f283e51f6ca989aab298b17da125496f50" + airbyte_secret: true + refresh_token: + type: "string" + description: "Strava Refresh Token with activity:read_all permissions" + pattern: "^[0-9a-fA-F]+$" + examples: + - "fc6243f283e51f6ca989aab298b17da125496f50" + airbyte_secret: true + athlete_id: + type: "integer" + description: "Strava Athlete ID" + pattern: "^[0-9_\\-]+$" + examples: + - "17831421" + start_date: + type: "string" + description: "Start Query Timestamp in UTC" + examples: + - "2016-12-31 23:59:59" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-stripe:0.1.21" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/stripe" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Stripe Source Spec" + type: "object" + required: + - "client_secret" + - "account_id" + - "start_date" + additionalProperties: false + properties: + client_secret: + type: "string" + pattern: "^(s|r)k_(live|test)_[a-zA-Z0-9]+$" + description: "Stripe API key (usually starts with 'sk_live_'; find yours\ + \ here)." + airbyte_secret: true + account_id: + type: "string" + description: "Your Stripe account ID (starts with 'acct_', find yours here)." + start_date: + type: "string" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + description: "UTC date and time in the format 2017-01-25T00:00:00Z. Any\ + \ data before this date will not be replicated." + examples: + - "2017-01-25T00:00:00Z" + lookback_window_days: + type: "integer" + title: "Lookback Window (in days)" + default: 0 + minimum: 0 + description: "When set, the connector will always reload data from the past\ + \ N days, where N is the value set here. This is useful if your data is\ + \ updated after creation." + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-surveymonkey:0.1.3" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/surveymonkey" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "SurveyMonkey Spec" + type: "object" + required: + - "start_date" + additionalProperties: true + properties: + start_date: + title: "Start Date" + type: "string" + description: "The date from which you'd like to replicate the data" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z?$" + examples: + - "2021-01-01T00:00:00Z" + access_token: + title: "Access Token" + type: "string" + airbyte_secret: true + description: "API Token. See the docs for information on how to generate this key." + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] + authSpecification: + auth_type: "oauth2.0" + oauth2Specification: + rootObject: [] + oauthFlowInitParameters: [] + oauthFlowOutputParameters: + - - "access_token" +- dockerImage: "airbyte/source-tempo:0.2.3" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Tempo Spec" + type: "object" + required: + - "api_token" + additionalProperties: false + properties: + api_token: + type: "string" + description: "Tempo API Token." + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-tiktok-marketing:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/tiktok-marketing" + changelogUrl: "https://docs.airbyte.io/integrations/sources/tiktok-marketing" + connectionSpecification: + title: "TikTok Marketing Source Spec" + type: "object" + properties: + environment: + title: "Environment" + default: "Production" + oneOf: + - title: "Production" + type: "object" + properties: + environment: + title: "Environment" + const: "prod" + type: "string" + app_id: + title: "App Id" + description: "The App id applied by the developer." + type: "string" + secret: + title: "Secret" + description: "The private key of the developer's application." + airbyte_secret: true + type: "string" + required: + - "app_id" + - "secret" + - title: "Sandbox" + type: "object" + properties: + environment: + title: "Environment" + const: "sandbox" + type: "string" + advertiser_id: + title: "Advertiser Id" + description: "The Advertiser ID which generated for the developer's\ + \ Sandbox application." + type: "string" + required: + - "advertiser_id" + type: "object" + access_token: + title: "Access Token" + description: "Long-term Authorized Access Token." + airbyte_secret: true + type: "string" + start_date: + title: "Start Date" + description: "Start Date in format: YYYY-MM-DD." + default: "01-09-2016" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" + type: "string" + required: + - "access_token" + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "overwrite" + - "append" + - "append_dedup" +- dockerImage: "airbyte/source-trello:0.1.1" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/trello" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Trello Spec" + type: "object" + required: + - "token" + - "key" + - "start_date" + additionalProperties: true + properties: + token: + type: "string" + title: "API token" + description: "A Trello token. See the docs for instructions on how to generate it." + airbyte_secret: true + key: + type: "string" + title: "API key" + description: "A Trello token. See the docs for instructions on how to generate it." + airbyte_secret: true + start_date: + type: "string" + title: "Start date" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}.[0-9]{3}Z$" + description: "UTC date and time in the format 2017-01-25T00:00:00Z. Any\ + \ data before this date will not be replicated." + examples: + - "2021-03-01T00:00:00.000Z" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] + authSpecification: + auth_type: "oauth2.0" + oauth2Specification: + rootObject: [] + oauthFlowInitParameters: [] + oauthFlowOutputParameters: + - - "token" + - - "key" +- dockerImage: "airbyte/source-twilio:0.1.1" + spec: + documentationUrl: "https://hub.docker.com/r/airbyte/source-twilio" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Twilio Spec" + type: "object" + required: + - "account_sid" + - "auth_token" + - "start_date" + additionalProperties: false + properties: + account_sid: + title: "Account ID" + description: "Twilio account SID" + airbyte_secret: true + type: "string" + auth_token: + title: "Auth Token" + description: "Twilio Auth Token." + airbyte_secret: true + type: "string" + start_date: + title: "Replication Start Date" + description: "UTC date and time in the format 2020-10-01T00:00:00Z. Any\ + \ data before this date will not be replicated." + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + examples: + - "2020-10-01T00:00:00Z" + type: "string" + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "append" +- dockerImage: "airbyte/source-typeform:0.1.2" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/typeform" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Typeform Spec" + type: "object" + required: + - "token" + - "start_date" + additionalProperties: true + properties: + start_date: + type: "string" + description: "The date you would like to replicate data. Format: YYYY-MM-DDTHH:mm:ss[Z]." + examples: + - "2020-01-01T00:00:00Z" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + token: + type: "string" + description: "The API Token for a Typeform account." + airbyte_secret: true + form_ids: + title: "Form IDs to replicate" + description: "When this parameter is set, the connector will replicate data\ + \ only from the input forms. Otherwise, all forms in your Typeform account\ + \ will be replicated. You can find form IDs in your form URLs. For example,\ + \ in the URL \"https://mysite.typeform.com/to/u6nXL7\" the form_id is\ + \ u6nXL7. You can find form URLs on Share panel" + type: "array" + items: + type: "string" + uniqueItems: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-us-census:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/us-census" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "https://api.census.gov/ Source Spec" + type: "object" + required: + - "api_key" + - "query_path" + additionalProperties: false + properties: + query_params: + type: "string" + description: "The query parameters portion of the GET request, without the\ + \ api key" + pattern: "^\\w+=[\\w,:*]+(&(?!key)\\w+=[\\w,:*]+)*$" + examples: + - "get=NAME,NAICS2017_LABEL,LFO_LABEL,EMPSZES_LABEL,ESTAB,PAYANN,PAYQTR1,EMP&for=us:*&NAICS2017=72&LFO=001&EMPSZES=001" + - "get=MOVEDIN,GEOID1,GEOID2,MOVEDOUT,FULL1_NAME,FULL2_NAME,MOVEDNET&for=county:*" + query_path: + type: "string" + description: "The path portion of the GET request" + pattern: "^data(\\/[\\w\\d]+)+$" + examples: + - "data/2019/cbp" + - "data/2018/acs" + - "data/timeseries/healthins/sahie" + api_key: + type: "string" + description: "Your API Key. Get your key here." + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-zendesk-chat:0.1.3" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/zendesk-chat" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Zendesk Chat Spec" + type: "object" + required: + - "start_date" + - "access_token" + additionalProperties: false + properties: + start_date: + type: "string" + description: "The date from which you'd like to replicate data for Zendesk\ + \ Chat API, in the format YYYY-MM-DDT00:00:00Z." + examples: + - "2021-02-01T00:00:00Z" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + access_token: + type: "string" + description: "The value of the Access Token generated. See the docs for\ + \ more information" + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-zendesk-sunshine:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/zendesk_sunshine" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Zendesk Sunshine Spec" + type: "object" + required: + - "api_token" + - "email" + - "start_date" + - "subdomain" + additionalProperties: false + properties: + api_token: + type: "string" + airbyte_secret: true + description: "API Token. See the docs for information on how to generate this key." + email: + type: "string" + description: "The user email for your Zendesk account" + subdomain: + type: "string" + description: "The subdomain for your Zendesk Account" + start_date: + title: "Start Date" + type: "string" + description: "The date from which you'd like to replicate the data" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + examples: "2021-01-01T00:00:00.000000Z" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-zendesk-support:0.1.4" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/zendesk-support" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Source Zendesk Support Spec" + type: "object" + required: + - "start_date" + - "subdomain" + - "auth_method" + additionalProperties: false + properties: + start_date: + type: "string" + description: "The date from which you'd like to replicate data for Zendesk\ + \ Support API, in the format YYYY-MM-DDT00:00:00Z. All data generated\ + \ after this date will be replicated." + examples: + - "2020-10-15T00:00:00Z" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + subdomain: + type: "string" + description: "The subdomain for your Zendesk Support" + auth_method: + title: "ZenDesk Authorization Method" + type: "object" + default: "api_token" + description: "Zendesk service provides 2 auth method: API token and oAuth2.\ + \ Now only the first one is available. Another one will be added in the\ + \ future" + oneOf: + - title: "API Token" + type: "object" + required: + - "email" + - "api_token" + additionalProperties: false + properties: + auth_method: + type: "string" + const: "api_token" + email: + type: "string" + description: "The user email for your Zendesk account" + api_token: + type: "string" + description: "The value of the API token generated. See the docs\ + \ for more information" + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-zendesk-talk:0.1.2" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/zendesk-talk" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Zendesk Talk Spec" + type: "object" + required: + - "start_date" + - "subdomain" + - "access_token" + - "email" + additionalProperties: false + properties: + start_date: + type: "string" + description: "The date from which you'd like to replicate data for Zendesk\ + \ Talk API, in the format YYYY-MM-DDT00:00:00Z." + examples: + - "2021-04-01T00:00:00Z" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + subdomain: + type: "string" + description: "The subdomain for your Zendesk Talk" + access_token: + type: "string" + description: "The value of the API token generated. See the docs for more information" + airbyte_secret: true + email: + type: "string" + description: "The user email for your Zendesk account" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-sentry:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/sentry" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Sentry Spec" + type: "object" + required: + - "auth_token" + - "organization" + - "project" + additionalProperties: false + properties: + auth_token: + type: "string" + title: "Authentication tokens" + description: "Log into Sentry and then create authentication tokens.For self-hosted, you can find or create\ + \ authentication tokens by visiting \"{instance_url_prefix}/settings/account/api/auth-tokens/\"" + airbyte_secret: true + hostname: + type: "string" + title: "Host Name" + description: "Host name of Sentry API server.For self-hosted, specify your\ + \ host name here. Otherwise, leave it empty." + default: "sentry.io" + organization: + type: "string" + title: "Organization" + description: "The slug of the organization the groups belong to." + project: + type: "string" + title: "Project" + description: "The slug of the project the groups belong to." + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-zoom-singer:0.2.4" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/zoom" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Source Zoom Singer Spec" + type: "object" + required: + - "jwt" + additionalProperties: false + properties: + jwt: + title: "JWT Token" + type: "string" + description: "Zoom JWT Token. See the docs for more information on how to obtain this key." + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-zuora:0.1.3" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/zuora" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Zuora Connector Configuration" + type: "object" + required: + - "start_date" + - "tenant_endpoint" + - "data_query" + - "client_id" + - "client_secret" + properties: + start_date: + type: "string" + title: "Start Date" + description: "Start Date in format: YYYY-MM-DD" + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" + window_in_days: + type: "string" + title: "Query Window (in days)" + description: "The amount of days for each data-chunk begining from start_date.\ + \ Bigger the value - faster the fetch. (0.1 - as for couple of hours,\ + \ 1 - as for a Day; 364 - as for a Year)." + examples: + - "0.5" + - "1" + - "30" + - "60" + - "90" + - "120" + - "200" + - "364" + pattern: "^(0|[1-9]\\d*)(\\.\\d+)?$" + default: "90" + tenant_endpoint: + title: "Tenant Endpoint Location" + type: "string" + description: "Please choose the right endpoint where your Tenant is located.\ + \ More info by this Link" + enum: + - "US Production" + - "US Cloud Production" + - "US API Sandbox" + - "US Cloud API Sandbox" + - "US Central Sandbox" + - "US Performance Test" + - "EU Production" + - "EU API Sandbox" + - "EU Central Sandbox" + data_query: + title: "Data Query Type" + type: "string" + description: "Choose between `Live`, or `Unlimited` - the optimized, replicated\ + \ database at 12 hours freshness for high volume extraction Link" + enum: + - "Live" + - "Unlimited" + default: "Live" + client_id: + type: "string" + title: "Client ID" + description: "Your OAuth user Client ID" + airbyte_secret: true + client_secret: + type: "string" + title: "Client Secret" + description: "Your OAuth user Client Secret" + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] diff --git a/airbyte-config/persistence/src/test/java/io/airbyte/config/persistence/YamlSeedConfigPersistenceTest.java b/airbyte-config/init/src/test/java/io/airbyte/config/init/YamlSeedConfigPersistenceTest.java similarity index 86% rename from airbyte-config/persistence/src/test/java/io/airbyte/config/persistence/YamlSeedConfigPersistenceTest.java rename to airbyte-config/init/src/test/java/io/airbyte/config/init/YamlSeedConfigPersistenceTest.java index 8a740ba535688..57090570f3e21 100644 --- a/airbyte-config/persistence/src/test/java/io/airbyte/config/persistence/YamlSeedConfigPersistenceTest.java +++ b/airbyte-config/init/src/test/java/io/airbyte/config/init/YamlSeedConfigPersistenceTest.java @@ -2,7 +2,7 @@ * Copyright (c) 2021 Airbyte, Inc., all rights reserved. */ -package io.airbyte.config.persistence; +package io.airbyte.config.init; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; @@ -14,7 +14,9 @@ import io.airbyte.config.StandardSourceDefinition; import io.airbyte.config.StandardSync; import io.airbyte.config.StandardWorkspace; +import io.airbyte.config.persistence.ConfigNotFoundException; import java.io.IOException; +import java.net.URI; import java.util.Collections; import java.util.Map; import java.util.stream.Stream; @@ -41,6 +43,7 @@ public void testGetConfig() throws Exception { assertEquals("airbyte/source-mysql", mysqlSource.getDockerRepository()); assertEquals("https://docs.airbyte.io/integrations/sources/mysql", mysqlSource.getDocumentationUrl()); assertEquals("mysql.svg", mysqlSource.getIcon()); + assertEquals(URI.create("https://docs.airbyte.io/integrations/sources/mysql"), mysqlSource.getSpec().getDocumentationUrl()); // destination final String s3DestinationId = "4816b78f-1489-44c1-9060-4b19d5fa9362"; @@ -50,13 +53,16 @@ public void testGetConfig() throws Exception { assertEquals("S3", s3Destination.getName()); assertEquals("airbyte/destination-s3", s3Destination.getDockerRepository()); assertEquals("https://docs.airbyte.io/integrations/destinations/s3", s3Destination.getDocumentationUrl()); + assertEquals(URI.create("https://docs.airbyte.io/integrations/destinations/s3"), s3Destination.getSpec().getDocumentationUrl()); } @Test public void testGetInvalidConfig() { - assertThrows(UnsupportedOperationException.class, + assertThrows( + UnsupportedOperationException.class, () -> PERSISTENCE.getConfig(ConfigSchema.STANDARD_SYNC, "invalid_id", StandardSync.class)); - assertThrows(ConfigNotFoundException.class, + assertThrows( + ConfigNotFoundException.class, () -> PERSISTENCE.getConfig(ConfigSchema.STANDARD_SOURCE_DEFINITION, "invalid_id", StandardWorkspace.class)); } diff --git a/airbyte-config/models/build.gradle b/airbyte-config/models/build.gradle index 271b3fe685e7e..d62c88c7e1633 100644 --- a/airbyte-config/models/build.gradle +++ b/airbyte-config/models/build.gradle @@ -7,10 +7,11 @@ plugins { dependencies { implementation project(':airbyte-json-validation') implementation project(':airbyte-protocol:models') + implementation project(':airbyte-commons') } jsonSchema2Pojo { - sourceType = SourceType.YAMLSCHEMA + sourceType = SourceType.YAMLSCHEMA source = files("${sourceSets.main.output.resourcesDir}/types") targetDirectory = new File(project.buildDir, 'generated/src/gen/java/') diff --git a/airbyte-config/models/src/main/resources/types/DockerImageSpec.yaml b/airbyte-config/models/src/main/resources/types/DockerImageSpec.yaml new file mode 100644 index 0000000000000..0d3becf8e74c6 --- /dev/null +++ b/airbyte-config/models/src/main/resources/types/DockerImageSpec.yaml @@ -0,0 +1,16 @@ +--- +"$schema": http://json-schema.org/draft-07/schema# +"$id": https://github.com/airbytehq/airbyte/blob/master/airbyte-config/models/src/main/resources/types/DockerImageSpec.yaml +title: DockerImageSpec +description: docker image name and the connector specification associated with it +type: object +required: + - dockerImage + - spec +additionalProperties: false +properties: + dockerImage: + type: string + spec: + type: object + existingJavaType: io.airbyte.protocol.models.ConnectorSpecification diff --git a/airbyte-config/persistence/build.gradle b/airbyte-config/persistence/build.gradle index 834f38596c98d..6b072911359db 100644 --- a/airbyte-config/persistence/build.gradle +++ b/airbyte-config/persistence/build.gradle @@ -11,7 +11,6 @@ dependencies { implementation project(':airbyte-db:jooq') implementation project(':airbyte-protocol:models') implementation project(':airbyte-config:models') - implementation project(':airbyte-config:init') implementation project(':airbyte-json-validation') implementation 'com.google.cloud:google-cloud-secretmanager:1.7.2' testImplementation "org.testcontainers:postgresql:1.15.3" diff --git a/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/DatabaseConfigPersistence.java b/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/DatabaseConfigPersistence.java index 19035ed42295d..2181bcb162640 100644 --- a/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/DatabaseConfigPersistence.java +++ b/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/DatabaseConfigPersistence.java @@ -366,7 +366,14 @@ ConnectorCounter updateConnectorDefinitions(final DSLContext ctx, final ConnectorInfo connectorInfo = connectorRepositoryToIdVersionMap.get(repository); final JsonNode currentDefinition = connectorInfo.definition; - final Set newFields = getNewFields(currentDefinition, latestDefinition); + + // todo (lmossman) - this logic to remove the "spec" field is temporary; it is necessary to avoid + // breaking users who are actively using an old connector version, otherwise specs from the most + // recent connector versions may be inserted into the db which could be incompatible with the + // version they are actually using. + // Once the faux major version bump has been merged, this "new field" logic will be removed + // entirely. + final Set newFields = Sets.difference(getNewFields(currentDefinition, latestDefinition), Set.of("spec")); // Process connector in use if (connectorRepositoriesInUse.contains(repository)) { diff --git a/airbyte-config/persistence/src/test/java/io/airbyte/config/persistence/BaseDatabaseConfigPersistenceTest.java b/airbyte-config/persistence/src/test/java/io/airbyte/config/persistence/BaseDatabaseConfigPersistenceTest.java index 02091e35e0f0a..4dcbf918c9e6c 100644 --- a/airbyte-config/persistence/src/test/java/io/airbyte/config/persistence/BaseDatabaseConfigPersistenceTest.java +++ b/airbyte-config/persistence/src/test/java/io/airbyte/config/persistence/BaseDatabaseConfigPersistenceTest.java @@ -13,10 +13,12 @@ import io.airbyte.config.ConfigSchema; import io.airbyte.config.StandardDestinationDefinition; import io.airbyte.config.StandardSourceDefinition; +import io.airbyte.config.StandardSourceDefinition.SourceType; import io.airbyte.db.Database; import java.util.Map; import java.util.Map.Entry; import java.util.Set; +import java.util.UUID; import java.util.stream.Collectors; import java.util.stream.Stream; import org.jooq.Record1; @@ -48,26 +50,34 @@ public static void dbDown() { container.close(); } - protected static final StandardSourceDefinition SOURCE_GITHUB; - protected static final StandardSourceDefinition SOURCE_POSTGRES; - protected static final StandardDestinationDefinition DESTINATION_SNOWFLAKE; - protected static final StandardDestinationDefinition DESTINATION_S3; - - static { - try { - final ConfigPersistence seedPersistence = YamlSeedConfigPersistence.getDefault(); - SOURCE_GITHUB = seedPersistence - .getConfig(ConfigSchema.STANDARD_SOURCE_DEFINITION, "ef69ef6e-aa7f-4af1-a01d-ef775033524e", StandardSourceDefinition.class); - SOURCE_POSTGRES = seedPersistence - .getConfig(ConfigSchema.STANDARD_SOURCE_DEFINITION, "decd338e-5647-4c0b-adf4-da0e75f5a750", StandardSourceDefinition.class); - DESTINATION_SNOWFLAKE = seedPersistence - .getConfig(ConfigSchema.STANDARD_DESTINATION_DEFINITION, "424892c4-daac-4491-b35d-c6688ba547ba", StandardDestinationDefinition.class); - DESTINATION_S3 = seedPersistence - .getConfig(ConfigSchema.STANDARD_DESTINATION_DEFINITION, "4816b78f-1489-44c1-9060-4b19d5fa9362", StandardDestinationDefinition.class); - } catch (final Exception e) { - throw new RuntimeException(e); - } - } + protected static final StandardSourceDefinition SOURCE_GITHUB = new StandardSourceDefinition() + .withName("GitHub") + .withSourceDefinitionId(UUID.fromString("ef69ef6e-aa7f-4af1-a01d-ef775033524e")) + .withDockerRepository("airbyte/source-github") + .withDockerImageTag("0.2.3") + .withDocumentationUrl("https://docs.airbyte.io/integrations/sources/github") + .withIcon("github.svg") + .withSourceType(SourceType.API); + protected static final StandardSourceDefinition SOURCE_POSTGRES = new StandardSourceDefinition() + .withName("Postgres") + .withSourceDefinitionId(UUID.fromString("decd338e-5647-4c0b-adf4-da0e75f5a750")) + .withDockerRepository("airbyte/source-postgres") + .withDockerImageTag("0.3.11") + .withDocumentationUrl("https://docs.airbyte.io/integrations/sources/postgres") + .withIcon("postgresql.svg") + .withSourceType(SourceType.DATABASE); + protected static final StandardDestinationDefinition DESTINATION_SNOWFLAKE = new StandardDestinationDefinition() + .withName("Snowflake") + .withDestinationDefinitionId(UUID.fromString("424892c4-daac-4491-b35d-c6688ba547ba")) + .withDockerRepository("airbyte/destination-snowflake") + .withDockerImageTag("0.3.16") + .withDocumentationUrl("https://docs.airbyte.io/integrations/destinations/snowflake"); + protected static final StandardDestinationDefinition DESTINATION_S3 = new StandardDestinationDefinition() + .withName("S3") + .withDestinationDefinitionId(UUID.fromString("4816b78f-1489-44c1-9060-4b19d5fa9362")) + .withDockerRepository("airbyte/destination-s3") + .withDockerImageTag("0.1.12") + .withDocumentationUrl("https://docs.airbyte.io/integrations/destinations/s3"); protected static void writeSource(final ConfigPersistence configPersistence, final StandardSourceDefinition source) throws Exception { configPersistence.writeConfig(ConfigSchema.STANDARD_SOURCE_DEFINITION, source.getSourceDefinitionId().toString(), source); diff --git a/airbyte-config/persistence/src/test/java/io/airbyte/config/persistence/DatabaseConfigPersistenceLoadDataTest.java b/airbyte-config/persistence/src/test/java/io/airbyte/config/persistence/DatabaseConfigPersistenceLoadDataTest.java index 1a94209294d6b..9177115f48865 100644 --- a/airbyte-config/persistence/src/test/java/io/airbyte/config/persistence/DatabaseConfigPersistenceLoadDataTest.java +++ b/airbyte-config/persistence/src/test/java/io/airbyte/config/persistence/DatabaseConfigPersistenceLoadDataTest.java @@ -13,6 +13,7 @@ import static org.mockito.Mockito.when; import com.google.common.collect.Lists; +import io.airbyte.commons.json.Jsons; import io.airbyte.config.ConfigSchema; import io.airbyte.config.DestinationConnection; import io.airbyte.config.SourceConnection; @@ -80,14 +81,10 @@ public void testUpdateConfigsInNonEmptyDatabase() throws Exception { @DisplayName("When a connector is in use, its definition should not be updated") public void testNoUpdateForUsedConnector() throws Exception { // the seed has a newer version of s3 destination and github source - final StandardDestinationDefinition destinationS3V2 = YamlSeedConfigPersistence.getDefault() - .getConfig(ConfigSchema.STANDARD_DESTINATION_DEFINITION, "4816b78f-1489-44c1-9060-4b19d5fa9362", StandardDestinationDefinition.class) - .withDockerImageTag("10000.1.0"); + final StandardDestinationDefinition destinationS3V2 = Jsons.clone(DESTINATION_S3).withDockerImageTag("10000.1.0"); when(seedPersistence.listConfigs(ConfigSchema.STANDARD_DESTINATION_DEFINITION, StandardDestinationDefinition.class)) .thenReturn(Collections.singletonList(destinationS3V2)); - final StandardSourceDefinition sourceGithubV2 = YamlSeedConfigPersistence.getDefault() - .getConfig(ConfigSchema.STANDARD_SOURCE_DEFINITION, "ef69ef6e-aa7f-4af1-a01d-ef775033524e", StandardSourceDefinition.class) - .withDockerImageTag("10000.15.3"); + final StandardSourceDefinition sourceGithubV2 = Jsons.clone(SOURCE_GITHUB).withDockerImageTag("10000.15.3"); when(seedPersistence.listConfigs(ConfigSchema.STANDARD_SOURCE_DEFINITION, StandardSourceDefinition.class)) .thenReturn(Collections.singletonList(sourceGithubV2)); @@ -112,9 +109,7 @@ public void testNoUpdateForUsedConnector() throws Exception { @DisplayName("When a connector is not in use, its definition should be updated") public void testUpdateForUnusedConnector() throws Exception { // the seed has a newer version of snowflake destination - final StandardDestinationDefinition snowflakeV2 = YamlSeedConfigPersistence.getDefault() - .getConfig(ConfigSchema.STANDARD_DESTINATION_DEFINITION, "424892c4-daac-4491-b35d-c6688ba547ba", StandardDestinationDefinition.class) - .withDockerImageTag("10000.2.0"); + final StandardDestinationDefinition snowflakeV2 = Jsons.clone(DESTINATION_SNOWFLAKE).withDockerImageTag("10000.2.0"); when(seedPersistence.listConfigs(ConfigSchema.STANDARD_DESTINATION_DEFINITION, StandardDestinationDefinition.class)) .thenReturn(Collections.singletonList(snowflakeV2)); diff --git a/airbyte-config/specs/README.md b/airbyte-config/specs/README.md new file mode 100644 index 0000000000000..8d043e1ec9729 --- /dev/null +++ b/airbyte-config/specs/README.md @@ -0,0 +1,16 @@ +# Generating Seed Connector Specs + +The catalog of seeded connector definitions is stored and manually updated in the `airbyte-config/init/src/main/resources/seed/*_definitions.yaml` +files. These manually-maintained connector definitions intentionally _do not_ contain the connector specs, in an effort to keep these files +human-readable and easily-editable, and because specs can be automatically fetched. + +This automatic fetching of connector specs is the goal of the SeedConnectorSpecGenerator. This class reads the connector definitions in +the `airbyte-config/init/src/main/resources/seed/*_definitions.yaml` files, fetches the corresponding specs from the GCS bucket cache, and writes the +specs to the `airbyte-config/init/src/main/resources/seed/*_specs.yaml` files. See the +[SeedConnectorSpecGenerator](src/main/java/io/airbyte/config/specs/SeedConnectorSpecGenerator.java) class for more details. + +Therefore, whenever a connector definition is updated in the `airbyte-config/init/src/main/resources/seed/*_definitions.yaml` files, the +SeedConnectorSpecGenerator should be re-ran to generate the updated connector specs files. To do so, +run `./gradlew :airbyte-config:init:processResources`, or just build the platform project, and commit the changes to your PR. If you do not do this, +the build in the CI will fail because there will be a diff in the generated files as you have not checked in the changes that were applied by the +generator. diff --git a/airbyte-config/specs/build.gradle b/airbyte-config/specs/build.gradle new file mode 100644 index 0000000000000..91d1fd0921706 --- /dev/null +++ b/airbyte-config/specs/build.gradle @@ -0,0 +1,24 @@ +plugins { + id 'java' +} + +dependencies { + implementation 'commons-cli:commons-cli:1.4' + + implementation project(':airbyte-commons') + implementation project(':airbyte-commons-cli') + implementation project(':airbyte-config:models') + implementation project(':airbyte-protocol:models') + implementation project(':airbyte-json-validation') +} + +task generateSeedConnectorSpecs(type: JavaExec, dependsOn: compileJava) { + classpath = sourceSets.main.runtimeClasspath + + mainClass = 'io.airbyte.config.specs.SeedConnectorSpecGenerator' + + args '--seed-root' + args new File(project(":airbyte-config:init").projectDir, '/src/main/resources/seed') +} + +project(":airbyte-config:init").tasks.processResources.dependsOn(generateSeedConnectorSpecs) diff --git a/airbyte-config/specs/src/main/java/io/airbyte/config/specs/GcsBucketSpecFetcher.java b/airbyte-config/specs/src/main/java/io/airbyte/config/specs/GcsBucketSpecFetcher.java new file mode 100644 index 0000000000000..832326c551c46 --- /dev/null +++ b/airbyte-config/specs/src/main/java/io/airbyte/config/specs/GcsBucketSpecFetcher.java @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.config.specs; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.api.client.util.Preconditions; +import com.google.cloud.storage.Blob; +import com.google.cloud.storage.Storage; +import io.airbyte.commons.json.Jsons; +import io.airbyte.protocol.models.AirbyteProtocolSchema; +import io.airbyte.protocol.models.ConnectorSpecification; +import io.airbyte.validation.json.JsonSchemaValidator; +import io.airbyte.validation.json.JsonValidationException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Path; +import java.util.Optional; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class GcsBucketSpecFetcher { + + private static final Logger LOGGER = LoggerFactory.getLogger(GcsBucketSpecFetcher.class); + + private final Storage storage; + private final String bucketName; + + public GcsBucketSpecFetcher(final Storage storage, final String bucketName) { + this.storage = storage; + this.bucketName = bucketName; + } + + public String getBucketName() { + return bucketName; + } + + public Optional attemptFetch(final String dockerImage) { + final String[] dockerImageComponents = dockerImage.split(":"); + Preconditions.checkArgument(dockerImageComponents.length == 2, "Invalidate docker image: " + dockerImage); + final String dockerImageName = dockerImageComponents[0]; + final String dockerImageTag = dockerImageComponents[1]; + + final Path specPath = Path.of("specs").resolve(dockerImageName).resolve(dockerImageTag).resolve("spec.json"); + LOGGER.debug("Checking path for cached spec: {} {}", bucketName, specPath); + final Blob specAsBlob = storage.get(bucketName, specPath.toString()); + + // if null it means the object was not found. + if (specAsBlob == null) { + LOGGER.debug("Spec not found in bucket storage"); + return Optional.empty(); + } + + final String specAsString = new String(specAsBlob.getContent(), StandardCharsets.UTF_8); + try { + validateConfig(Jsons.deserialize(specAsString)); + } catch (final JsonValidationException e) { + LOGGER.error("Received invalid spec from bucket store. {}", e.toString()); + return Optional.empty(); + } + return Optional.of(Jsons.deserialize(specAsString, ConnectorSpecification.class)); + } + + private static void validateConfig(final JsonNode json) throws JsonValidationException { + final JsonSchemaValidator jsonSchemaValidator = new JsonSchemaValidator(); + final JsonNode specJsonSchema = JsonSchemaValidator.getSchema(AirbyteProtocolSchema.PROTOCOL.getFile(), "ConnectorSpecification"); + jsonSchemaValidator.ensure(specJsonSchema, json); + } + +} diff --git a/airbyte-config/specs/src/main/java/io/airbyte/config/specs/SeedConnectorSpecGenerator.java b/airbyte-config/specs/src/main/java/io/airbyte/config/specs/SeedConnectorSpecGenerator.java new file mode 100644 index 0000000000000..980772ccf6ea1 --- /dev/null +++ b/airbyte-config/specs/src/main/java/io/airbyte/config/specs/SeedConnectorSpecGenerator.java @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.config.specs; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.cloud.storage.StorageOptions; +import com.google.common.annotations.VisibleForTesting; +import io.airbyte.commons.cli.Clis; +import io.airbyte.commons.io.IOs; +import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.util.MoreIterators; +import io.airbyte.commons.yaml.Yamls; +import io.airbyte.config.DockerImageSpec; +import io.airbyte.config.EnvConfigs; +import io.airbyte.protocol.models.ConnectorSpecification; +import java.io.IOException; +import java.nio.file.Path; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.Options; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This script is responsible for ensuring that up-to-date {@link ConnectorSpecification}s for every + * connector definition in the seed are stored in a corresponding resource file, for the purpose of + * seeding the specs into the config database on server startup. See + * ./airbyte-config/specs/readme.md for more details on how this class is run and how it fits into + * the project. + *

+ * Specs are stored in a separate file from the definitions in an effort to keep the definitions + * yaml files human-readable and easily-editable, as specs can be rather large. + *

+ * Specs are fetched from the GCS spec cache bucket, so if any specs are missing from the bucket + * then this will fail. Note that this script only pulls specs from the bucket cache; it never + * pushes specs to the bucket. Since this script runs at build time, the decision was to depend on + * the bucket cache rather than running a docker container to fetch the spec during the build which + * could be slow and unwieldy. If there is a failure, check the bucket cache and figure out how to + * get the correct spec in there. + */ +public class SeedConnectorSpecGenerator { + + private static final String DOCKER_REPOSITORY_FIELD = "dockerRepository"; + private static final String DOCKER_IMAGE_TAG_FIELD = "dockerImageTag"; + private static final String DOCKER_IMAGE_FIELD = "dockerImage"; + private static final String SPEC_FIELD = "spec"; + private static final String SPEC_BUCKET_NAME = new EnvConfigs().getSpecCacheBucket(); + + private static final Logger LOGGER = LoggerFactory.getLogger(SeedConnectorSpecGenerator.class); + + private static final Option SEED_ROOT_OPTION = Option.builder("s").longOpt("seed-root").hasArg(true).required(true) + .desc("path to where seed resource files are stored").build(); + private static final Options OPTIONS = new Options().addOption(SEED_ROOT_OPTION); + + private final GcsBucketSpecFetcher bucketSpecFetcher; + + public SeedConnectorSpecGenerator(final GcsBucketSpecFetcher bucketSpecFetcher) { + this.bucketSpecFetcher = bucketSpecFetcher; + } + + public static void main(final String[] args) throws Exception { + final CommandLine parsed = Clis.parse(args, OPTIONS); + final Path outputRoot = Path.of(parsed.getOptionValue(SEED_ROOT_OPTION.getOpt())); + + final GcsBucketSpecFetcher bucketSpecFetcher = new GcsBucketSpecFetcher(StorageOptions.getDefaultInstance().getService(), SPEC_BUCKET_NAME); + final SeedConnectorSpecGenerator seedConnectorSpecGenerator = new SeedConnectorSpecGenerator(bucketSpecFetcher); + seedConnectorSpecGenerator.run(outputRoot, SeedConnectorType.SOURCE); + seedConnectorSpecGenerator.run(outputRoot, SeedConnectorType.DESTINATION); + } + + public void run(final Path seedRoot, final SeedConnectorType seedConnectorType) throws IOException { + LOGGER.info("Updating seeded {} definition specs if necessary...", seedConnectorType.name()); + + final JsonNode seedDefinitionsJson = yamlToJson(seedRoot, seedConnectorType.getDefinitionFileName()); + final JsonNode seedSpecsJson = yamlToJson(seedRoot, seedConnectorType.getSpecFileName()); + + final List updatedSeedSpecs = fetchUpdatedSeedSpecs(seedDefinitionsJson, seedSpecsJson); + + final String outputString = String.format("# This file is generated by %s.\n", this.getClass().getName()) + + "# Do NOT edit this file directly. See generator class for more details.\n" + + Yamls.serialize(updatedSeedSpecs); + final Path outputPath = IOs.writeFile(seedRoot.resolve(seedConnectorType.getSpecFileName()), outputString); + + LOGGER.info("Finished updating {}", outputPath); + } + + private JsonNode yamlToJson(final Path root, final String fileName) { + final String yamlString = IOs.readFile(root, fileName); + return Yamls.deserialize(yamlString); + } + + @VisibleForTesting + final List fetchUpdatedSeedSpecs(final JsonNode seedDefinitions, final JsonNode currentSeedSpecs) { + final List seedDefinitionsDockerImages = MoreIterators.toList(seedDefinitions.elements()) + .stream() + .map(json -> String.format("%s:%s", json.get(DOCKER_REPOSITORY_FIELD).asText(), json.get(DOCKER_IMAGE_TAG_FIELD).asText())) + .collect(Collectors.toList()); + + final Map currentSeedImageToSpec = MoreIterators.toList(currentSeedSpecs.elements()) + .stream() + .collect(Collectors.toMap( + json -> json.get(DOCKER_IMAGE_FIELD).asText(), + json -> new DockerImageSpec().withDockerImage(json.get(DOCKER_IMAGE_FIELD).asText()) + .withSpec(Jsons.object(json.get(SPEC_FIELD), ConnectorSpecification.class)))); + + return seedDefinitionsDockerImages + .stream() + .map(dockerImage -> currentSeedImageToSpec.containsKey(dockerImage) ? currentSeedImageToSpec.get(dockerImage) : fetchSpecFromGCS(dockerImage)) + .collect(Collectors.toList()); + } + + private DockerImageSpec fetchSpecFromGCS(final String dockerImage) { + LOGGER.info("Seeded spec not found for docker image {} - fetching from GCS bucket {}...", dockerImage, bucketSpecFetcher.getBucketName()); + final ConnectorSpecification spec = bucketSpecFetcher.attemptFetch(dockerImage) + .orElseThrow(() -> new RuntimeException(String.format( + "Failed to fetch valid spec file for docker image %s from GCS bucket %s. This will continue to fail until the connector change has been approved and published. See https://github.com/airbytehq/airbyte/tree/master/docs/connector-development#publishing-a-connector for more details.", + dockerImage, + bucketSpecFetcher.getBucketName()))); + return new DockerImageSpec().withDockerImage(dockerImage).withSpec(spec); + } + +} diff --git a/airbyte-config/specs/src/main/java/io/airbyte/config/specs/SeedConnectorType.java b/airbyte-config/specs/src/main/java/io/airbyte/config/specs/SeedConnectorType.java new file mode 100644 index 0000000000000..36d1326af215b --- /dev/null +++ b/airbyte-config/specs/src/main/java/io/airbyte/config/specs/SeedConnectorType.java @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.config.specs; + +public enum SeedConnectorType { + + SOURCE( + "source_definitions.yaml", + "source_specs.yaml"), + DESTINATION( + "destination_definitions.yaml", + "destination_specs.yaml"); + + private final String definitionFileName; + private final String specFileName; + + SeedConnectorType(final String definitionFileName, + final String specFileName) { + this.definitionFileName = definitionFileName; + this.specFileName = specFileName; + } + + public String getDefinitionFileName() { + return definitionFileName; + } + + public String getSpecFileName() { + return specFileName; + } + +} diff --git a/airbyte-config/specs/src/test/java/io/airbyte/config/specs/GcsBucketSpecFetcherTest.java b/airbyte-config/specs/src/test/java/io/airbyte/config/specs/GcsBucketSpecFetcherTest.java new file mode 100644 index 0000000000000..25e16bea545bf --- /dev/null +++ b/airbyte-config/specs/src/test/java/io/airbyte/config/specs/GcsBucketSpecFetcherTest.java @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.config.specs; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import com.google.cloud.storage.Blob; +import com.google.cloud.storage.Storage; +import com.google.common.collect.ImmutableMap; +import io.airbyte.commons.json.Jsons; +import io.airbyte.protocol.models.ConnectorSpecification; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Path; +import java.util.Optional; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +class GcsBucketSpecFetcherTest { + + private static final String BUCKET_NAME = "bucket"; + private static final String DOCKER_REPOSITORY = "image"; + private static final String DOCKER_IMAGE_TAG = "0.1.0"; + private static final String DOCKER_IMAGE = DOCKER_REPOSITORY + ":" + DOCKER_IMAGE_TAG; + private static final String SPEC_PATH = Path.of("specs").resolve(DOCKER_REPOSITORY).resolve(DOCKER_IMAGE_TAG).resolve("spec.json").toString(); + + private Storage storage; + private Blob specBlob; + private final ConnectorSpecification spec = new ConnectorSpecification().withConnectionSpecification(Jsons.jsonNode(ImmutableMap.of("foo", "bar"))); + + @SuppressWarnings("unchecked") + @BeforeEach + void setup() throws IOException { + storage = mock(Storage.class); + + final byte[] specBytes = Jsons.toBytes(Jsons.jsonNode(spec)); + specBlob = mock(Blob.class); + when(specBlob.getContent()).thenReturn(specBytes); + } + + @Test + void testGetsSpecIfPresent() throws IOException { + when(storage.get(BUCKET_NAME, SPEC_PATH)).thenReturn(specBlob); + + final GcsBucketSpecFetcher bucketSpecFetcher = new GcsBucketSpecFetcher(storage, BUCKET_NAME); + final Optional returnedSpec = bucketSpecFetcher.attemptFetch(DOCKER_IMAGE); + + assertTrue(returnedSpec.isPresent()); + assertEquals(spec, returnedSpec.get()); + } + + @Test + void testReturnsEmptyIfNotPresent() throws IOException { + when(storage.get(BUCKET_NAME, SPEC_PATH)).thenReturn(null); + + final GcsBucketSpecFetcher bucketSpecFetcher = new GcsBucketSpecFetcher(storage, BUCKET_NAME); + final Optional returnedSpec = bucketSpecFetcher.attemptFetch(DOCKER_IMAGE); + + assertTrue(returnedSpec.isEmpty()); + } + + @Test + void testReturnsEmptyIfInvalidSpec() throws IOException { + final Blob invalidSpecBlob = mock(Blob.class); + when(invalidSpecBlob.getContent()).thenReturn("{\"notASpec\": true}".getBytes(StandardCharsets.UTF_8)); + when(storage.get(BUCKET_NAME, SPEC_PATH)).thenReturn(invalidSpecBlob); + + final GcsBucketSpecFetcher bucketSpecFetcher = new GcsBucketSpecFetcher(storage, BUCKET_NAME); + final Optional returnedSpec = bucketSpecFetcher.attemptFetch(DOCKER_IMAGE); + + assertTrue(returnedSpec.isEmpty()); + } + +} diff --git a/airbyte-config/specs/src/test/java/io/airbyte/config/specs/SeedConnectorSpecGeneratorTest.java b/airbyte-config/specs/src/test/java/io/airbyte/config/specs/SeedConnectorSpecGeneratorTest.java new file mode 100644 index 0000000000000..0925608a2f62f --- /dev/null +++ b/airbyte-config/specs/src/test/java/io/airbyte/config/specs/SeedConnectorSpecGeneratorTest.java @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.config.specs; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; +import io.airbyte.commons.json.Jsons; +import io.airbyte.config.DockerImageSpec; +import io.airbyte.config.StandardDestinationDefinition; +import io.airbyte.protocol.models.ConnectorSpecification; +import java.util.Arrays; +import java.util.List; +import java.util.Optional; +import java.util.UUID; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +class SeedConnectorSpecGeneratorTest { + + private static final UUID DEF_ID1 = java.util.UUID.randomUUID(); + private static final UUID DEF_ID2 = java.util.UUID.randomUUID(); + private static final String CONNECTOR_NAME1 = "connector1"; + private static final String CONNECTOR_NAME2 = "connector2"; + private static final String DOCUMENTATION_URL = "https://wwww.example.com"; + private static final String DOCKER_REPOSITORY1 = "airbyte/connector1"; + private static final String DOCKER_REPOSITORY2 = "airbyte/connector2"; + private static final String DOCKER_TAG1 = "0.1.0"; + private static final String DOCKER_TAG2 = "0.2.0"; + private static final String BUCKET_NAME = "bucket"; + + private SeedConnectorSpecGenerator seedConnectorSpecGenerator; + private GcsBucketSpecFetcher bucketSpecFetcherMock; + + @BeforeEach + void setup() { + bucketSpecFetcherMock = mock(GcsBucketSpecFetcher.class); + when(bucketSpecFetcherMock.getBucketName()).thenReturn(BUCKET_NAME); + + seedConnectorSpecGenerator = new SeedConnectorSpecGenerator(bucketSpecFetcherMock); + } + + @Test + void testMissingSpecIsFetched() { + final StandardDestinationDefinition sourceDefinition1 = new StandardDestinationDefinition() + .withDestinationDefinitionId(DEF_ID1) + .withDockerRepository(DOCKER_REPOSITORY1) + .withDockerImageTag(DOCKER_TAG1) + .withName(CONNECTOR_NAME1) + .withDocumentationUrl(DOCUMENTATION_URL); + final ConnectorSpecification spec1 = new ConnectorSpecification().withConnectionSpecification(Jsons.jsonNode(ImmutableMap.of("foo1", "bar1"))); + final DockerImageSpec dockerImageSpec1 = new DockerImageSpec().withDockerImage(DOCKER_REPOSITORY1 + ":" + DOCKER_TAG1).withSpec(spec1); + + final StandardDestinationDefinition sourceDefinition2 = new StandardDestinationDefinition() + .withDestinationDefinitionId(DEF_ID2) + .withDockerRepository(DOCKER_REPOSITORY2) + .withDockerImageTag(DOCKER_TAG2) + .withName(CONNECTOR_NAME2) + .withDocumentationUrl(DOCUMENTATION_URL); + final ConnectorSpecification spec2 = new ConnectorSpecification().withConnectionSpecification(Jsons.jsonNode(ImmutableMap.of("foo2", "bar2"))); + final DockerImageSpec dockerImageSpec2 = new DockerImageSpec().withDockerImage(DOCKER_REPOSITORY2 + ":" + DOCKER_TAG2).withSpec(spec2); + + final JsonNode seedDefinitions = Jsons.jsonNode(Arrays.asList(sourceDefinition1, sourceDefinition2)); + final JsonNode seedSpecs = Jsons.jsonNode(List.of(dockerImageSpec1)); + + when(bucketSpecFetcherMock.attemptFetch(DOCKER_REPOSITORY2 + ":" + DOCKER_TAG2)).thenReturn(Optional.of(spec2)); + + final List actualSeedSpecs = seedConnectorSpecGenerator.fetchUpdatedSeedSpecs(seedDefinitions, seedSpecs); + final List expectedSeedSpecs = Arrays.asList(dockerImageSpec1, dockerImageSpec2); + + assertEquals(expectedSeedSpecs, actualSeedSpecs); + } + + @Test + void testOutdatedSpecIsFetched() { + final StandardDestinationDefinition sourceDefinition = new StandardDestinationDefinition() + .withDestinationDefinitionId(DEF_ID1) + .withDockerRepository(DOCKER_REPOSITORY1) + .withDockerImageTag(DOCKER_TAG2) + .withName(CONNECTOR_NAME1) + .withDocumentationUrl(DOCUMENTATION_URL); + final ConnectorSpecification outdatedSpec = new ConnectorSpecification().withConnectionSpecification(Jsons.jsonNode(ImmutableMap.of( + "foo1", + "bar1"))); + final DockerImageSpec outdatedDockerImageSpec = new DockerImageSpec().withDockerImage(DOCKER_REPOSITORY1 + ":" + DOCKER_TAG1) + .withSpec(outdatedSpec); + + final JsonNode seedDefinitions = Jsons.jsonNode(List.of(sourceDefinition)); + final JsonNode seedSpecs = Jsons.jsonNode(List.of(outdatedDockerImageSpec)); + + final ConnectorSpecification newSpec = new ConnectorSpecification().withConnectionSpecification(Jsons.jsonNode(ImmutableMap.of("foo2", "bar2"))); + final DockerImageSpec newDockerImageSpec = new DockerImageSpec().withDockerImage(DOCKER_REPOSITORY1 + ":" + DOCKER_TAG2).withSpec(newSpec); + + when(bucketSpecFetcherMock.attemptFetch(DOCKER_REPOSITORY1 + ":" + DOCKER_TAG2)).thenReturn(Optional.of(newSpec)); + + final List actualSeedSpecs = seedConnectorSpecGenerator.fetchUpdatedSeedSpecs(seedDefinitions, seedSpecs); + final List expectedSeedSpecs = List.of(newDockerImageSpec); + + assertEquals(expectedSeedSpecs, actualSeedSpecs); + } + + @Test + void testExtraneousSpecIsRemoved() { + final StandardDestinationDefinition sourceDefinition = new StandardDestinationDefinition() + .withDestinationDefinitionId(DEF_ID1) + .withDockerRepository(DOCKER_REPOSITORY1) + .withDockerImageTag(DOCKER_TAG1) + .withName(CONNECTOR_NAME1) + .withDocumentationUrl(DOCUMENTATION_URL); + final ConnectorSpecification spec1 = new ConnectorSpecification().withConnectionSpecification(Jsons.jsonNode(ImmutableMap.of("foo1", "bar1"))); + final DockerImageSpec dockerImageSpec1 = new DockerImageSpec().withDockerImage(DOCKER_REPOSITORY1 + ":" + DOCKER_TAG1).withSpec(spec1); + + final ConnectorSpecification spec2 = new ConnectorSpecification().withConnectionSpecification(Jsons.jsonNode(ImmutableMap.of("foo2", "bar2"))); + final DockerImageSpec dockerImageSpec2 = new DockerImageSpec().withDockerImage(DOCKER_REPOSITORY2 + ":" + DOCKER_TAG2).withSpec(spec2); + + final JsonNode seedDefinitions = Jsons.jsonNode(List.of(sourceDefinition)); + final JsonNode seedSpecs = Jsons.jsonNode(Arrays.asList(dockerImageSpec1, dockerImageSpec2)); + + final List actualSeedSpecs = seedConnectorSpecGenerator.fetchUpdatedSeedSpecs(seedDefinitions, seedSpecs); + final List expectedSeedSpecs = List.of(dockerImageSpec1); + + assertEquals(expectedSeedSpecs, actualSeedSpecs); + } + + @Test + void testNoFetchIsPerformedIfAllSpecsUpToDate() { + final StandardDestinationDefinition sourceDefinition = new StandardDestinationDefinition() + .withDestinationDefinitionId(DEF_ID1) + .withDockerRepository(DOCKER_REPOSITORY1) + .withDockerImageTag(DOCKER_TAG1) + .withName(CONNECTOR_NAME1) + .withDocumentationUrl(DOCUMENTATION_URL); + final ConnectorSpecification spec = new ConnectorSpecification().withConnectionSpecification(Jsons.jsonNode(ImmutableMap.of("foo", "bar"))); + final DockerImageSpec dockerImageSpec = new DockerImageSpec().withDockerImage(DOCKER_REPOSITORY1 + ":" + DOCKER_TAG1).withSpec(spec); + + final JsonNode seedDefinitions = Jsons.jsonNode(List.of(sourceDefinition)); + final JsonNode seedSpecs = Jsons.jsonNode(List.of(dockerImageSpec)); + + final List actualSeedSpecs = seedConnectorSpecGenerator.fetchUpdatedSeedSpecs(seedDefinitions, seedSpecs); + final List expectedSeedSpecs = List.of(dockerImageSpec); + + assertEquals(expectedSeedSpecs, actualSeedSpecs); + verify(bucketSpecFetcherMock, never()).attemptFetch(any()); + } + +} diff --git a/airbyte-scheduler/client/build.gradle b/airbyte-scheduler/client/build.gradle index d90a0262c97c7..5e319c0418efa 100644 --- a/airbyte-scheduler/client/build.gradle +++ b/airbyte-scheduler/client/build.gradle @@ -5,6 +5,7 @@ plugins { dependencies { implementation project(':airbyte-config:models') implementation project(':airbyte-config:persistence') + implementation project(':airbyte-config:specs') implementation project(':airbyte-json-validation') implementation project(':airbyte-protocol:models') implementation project(':airbyte-scheduler:models') diff --git a/airbyte-scheduler/client/src/main/java/io/airbyte/scheduler/client/BucketSpecCacheSchedulerClient.java b/airbyte-scheduler/client/src/main/java/io/airbyte/scheduler/client/BucketSpecCacheSchedulerClient.java index a615643d0830e..bcdc972c2cb4a 100644 --- a/airbyte-scheduler/client/src/main/java/io/airbyte/scheduler/client/BucketSpecCacheSchedulerClient.java +++ b/airbyte-scheduler/client/src/main/java/io/airbyte/scheduler/client/BucketSpecCacheSchedulerClient.java @@ -4,27 +4,17 @@ package io.airbyte.scheduler.client; -import com.fasterxml.jackson.databind.JsonNode; -import com.google.api.client.util.Preconditions; -import com.google.cloud.storage.Blob; -import com.google.cloud.storage.Storage; import com.google.cloud.storage.StorageOptions; import com.google.common.annotations.VisibleForTesting; -import io.airbyte.commons.json.Jsons; import io.airbyte.config.DestinationConnection; import io.airbyte.config.JobConfig.ConfigType; import io.airbyte.config.SourceConnection; import io.airbyte.config.StandardCheckConnectionOutput; +import io.airbyte.config.specs.GcsBucketSpecFetcher; import io.airbyte.protocol.models.AirbyteCatalog; -import io.airbyte.protocol.models.AirbyteProtocolSchema; import io.airbyte.protocol.models.ConnectorSpecification; -import io.airbyte.validation.json.JsonSchemaValidator; -import io.airbyte.validation.json.JsonValidationException; import java.io.IOException; -import java.nio.charset.StandardCharsets; -import java.nio.file.Path; import java.util.Optional; -import java.util.function.Function; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -33,17 +23,15 @@ public class BucketSpecCacheSchedulerClient implements SynchronousSchedulerClien private static final Logger LOGGER = LoggerFactory.getLogger(BucketSpecCacheSchedulerClient.class); private final SynchronousSchedulerClient client; - private final Function> bucketSpecFetcher; + private final GcsBucketSpecFetcher bucketSpecFetcher; public BucketSpecCacheSchedulerClient(final SynchronousSchedulerClient client, final String bucketName) { - this( - client, - dockerImage -> attemptToFetchSpecFromBucket(StorageOptions.getDefaultInstance().getService(), bucketName, dockerImage)); + this.client = client; + this.bucketSpecFetcher = new GcsBucketSpecFetcher(StorageOptions.getDefaultInstance().getService(), bucketName); } @VisibleForTesting - BucketSpecCacheSchedulerClient(final SynchronousSchedulerClient client, - final Function> bucketSpecFetcher) { + BucketSpecCacheSchedulerClient(final SynchronousSchedulerClient client, final GcsBucketSpecFetcher bucketSpecFetcher) { this.client = client; this.bucketSpecFetcher = bucketSpecFetcher; } @@ -72,7 +60,7 @@ public SynchronousResponse createGetSpecJob(final String Optional cachedSpecOptional; // never want to fail because we could not fetch from off board storage. try { - cachedSpecOptional = bucketSpecFetcher.apply(dockerImage); + cachedSpecOptional = bucketSpecFetcher.attemptFetch(dockerImage); LOGGER.debug("Spec bucket cache: Call to cache did not fail."); } catch (final RuntimeException e) { cachedSpecOptional = Optional.empty(); @@ -88,38 +76,4 @@ public SynchronousResponse createGetSpecJob(final String } } - private static void validateConfig(final JsonNode json) throws JsonValidationException { - final JsonSchemaValidator jsonSchemaValidator = new JsonSchemaValidator(); - final JsonNode specJsonSchema = JsonSchemaValidator.getSchema(AirbyteProtocolSchema.PROTOCOL.getFile(), "ConnectorSpecification"); - jsonSchemaValidator.ensure(specJsonSchema, json); - } - - public static Optional attemptToFetchSpecFromBucket(final Storage storage, - final String bucketName, - final String dockerImage) { - final String[] dockerImageComponents = dockerImage.split(":"); - Preconditions.checkArgument(dockerImageComponents.length == 2, "Invalidate docker image: " + dockerImage); - final String dockerImageName = dockerImageComponents[0]; - final String dockerImageTag = dockerImageComponents[1]; - - final Path specPath = Path.of("specs").resolve(dockerImageName).resolve(dockerImageTag).resolve("spec.json"); - LOGGER.debug("Checking path for cached spec: {} {}", bucketName, specPath); - final Blob specAsBlob = storage.get(bucketName, specPath.toString()); - - // if null it means the object was not found. - if (specAsBlob == null) { - LOGGER.debug("Spec not found in bucket storage"); - return Optional.empty(); - } - - final String specAsString = new String(specAsBlob.getContent(), StandardCharsets.UTF_8); - try { - validateConfig(Jsons.deserialize(specAsString)); - } catch (final JsonValidationException e) { - LOGGER.error("Received invalid spec from bucket store. {}", e.toString()); - return Optional.empty(); - } - return Optional.of(Jsons.deserialize(specAsString, ConnectorSpecification.class)); - } - } diff --git a/airbyte-scheduler/client/src/test/java/io/airbyte/scheduler/client/BucketSpecCacheSchedulerClientTest.java b/airbyte-scheduler/client/src/test/java/io/airbyte/scheduler/client/BucketSpecCacheSchedulerClientTest.java index cf21fd2b160df..01f4595b94685 100644 --- a/airbyte-scheduler/client/src/test/java/io/airbyte/scheduler/client/BucketSpecCacheSchedulerClientTest.java +++ b/airbyte-scheduler/client/src/test/java/io/airbyte/scheduler/client/BucketSpecCacheSchedulerClientTest.java @@ -10,10 +10,10 @@ import static org.mockito.Mockito.verifyNoInteractions; import static org.mockito.Mockito.when; +import io.airbyte.config.specs.GcsBucketSpecFetcher; import io.airbyte.protocol.models.ConnectorSpecification; import java.io.IOException; import java.util.Optional; -import java.util.function.Function; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; @@ -21,18 +21,18 @@ class BucketSpecCacheSchedulerClientTest { private SynchronousSchedulerClient defaultClientMock; - private Function> bucketSpecFetcherMock; + private GcsBucketSpecFetcher bucketSpecFetcherMock; @SuppressWarnings("unchecked") @BeforeEach void setup() { defaultClientMock = mock(SynchronousSchedulerClient.class); - bucketSpecFetcherMock = mock(Function.class); + bucketSpecFetcherMock = mock(GcsBucketSpecFetcher.class); } @Test void testGetsSpecIfPresent() throws IOException { - when(bucketSpecFetcherMock.apply("source-pokeapi:0.1.0")).thenReturn(Optional.of(new ConnectorSpecification())); + when(bucketSpecFetcherMock.attemptFetch("source-pokeapi:0.1.0")).thenReturn(Optional.of(new ConnectorSpecification())); final BucketSpecCacheSchedulerClient client = new BucketSpecCacheSchedulerClient(defaultClientMock, bucketSpecFetcherMock); assertEquals(new ConnectorSpecification(), client.createGetSpecJob("source-pokeapi:0.1.0").getOutput()); verifyNoInteractions(defaultClientMock); @@ -40,7 +40,7 @@ void testGetsSpecIfPresent() throws IOException { @Test void testCallsDelegateIfNotPresent() throws IOException { - when(bucketSpecFetcherMock.apply("source-pokeapi:0.1.0")).thenReturn(Optional.empty()); + when(bucketSpecFetcherMock.attemptFetch("source-pokeapi:0.1.0")).thenReturn(Optional.empty()); when(defaultClientMock.createGetSpecJob("source-pokeapi:0.1.0")) .thenReturn(new SynchronousResponse<>(new ConnectorSpecification(), mock(SynchronousJobMetadata.class))); final BucketSpecCacheSchedulerClient client = new BucketSpecCacheSchedulerClient(defaultClientMock, bucketSpecFetcherMock); @@ -49,7 +49,7 @@ void testCallsDelegateIfNotPresent() throws IOException { @Test void testCallsDelegateIfException() throws IOException { - when(bucketSpecFetcherMock.apply("source-pokeapi:0.1.0")).thenThrow(new RuntimeException("induced exception")); + when(bucketSpecFetcherMock.attemptFetch("source-pokeapi:0.1.0")).thenThrow(new RuntimeException("induced exception")); when(defaultClientMock.createGetSpecJob("source-pokeapi:0.1.0")) .thenReturn(new SynchronousResponse<>(new ConnectorSpecification(), mock(SynchronousJobMetadata.class))); final BucketSpecCacheSchedulerClient client = new BucketSpecCacheSchedulerClient(defaultClientMock, bucketSpecFetcherMock); @@ -62,7 +62,7 @@ void testCallsDelegateIfException() throws IOException { @Disabled @Test void testGetsSpecFromBucket() throws IOException { - when(bucketSpecFetcherMock.apply("source-pokeapi:0.1.0")).thenReturn(Optional.of(new ConnectorSpecification())); + when(bucketSpecFetcherMock.attemptFetch("source-pokeapi:0.1.0")).thenReturn(Optional.of(new ConnectorSpecification())); // todo (cgardens) - replace with prod bucket. final BucketSpecCacheSchedulerClient client = new BucketSpecCacheSchedulerClient(defaultClientMock, "cg-specs"); final ConnectorSpecification actualSpec = client.createGetSpecJob("source-pokeapi:0.1.0").getOutput(); diff --git a/airbyte-server/build.gradle b/airbyte-server/build.gradle index edc7c55fb2506..a38db0edc5dbe 100644 --- a/airbyte-server/build.gradle +++ b/airbyte-server/build.gradle @@ -66,6 +66,7 @@ dependencies { implementation project(':airbyte-config:init') implementation project(':airbyte-config:models') implementation project(':airbyte-config:persistence') + implementation project(':airbyte-config:specs') implementation project(':airbyte-db:lib') implementation project(":airbyte-json-validation") implementation project(':airbyte-migration') diff --git a/airbyte-server/src/main/java/io/airbyte/server/ServerApp.java b/airbyte-server/src/main/java/io/airbyte/server/ServerApp.java index 271c7d6bf8856..83eaf151822e8 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/ServerApp.java +++ b/airbyte-server/src/main/java/io/airbyte/server/ServerApp.java @@ -15,10 +15,10 @@ import io.airbyte.config.EnvConfigs; import io.airbyte.config.StandardWorkspace; import io.airbyte.config.helpers.LogClientSingleton; +import io.airbyte.config.init.YamlSeedConfigPersistence; import io.airbyte.config.persistence.ConfigPersistence; import io.airbyte.config.persistence.ConfigRepository; import io.airbyte.config.persistence.DatabaseConfigPersistence; -import io.airbyte.config.persistence.YamlSeedConfigPersistence; import io.airbyte.config.persistence.split_secrets.SecretPersistence; import io.airbyte.config.persistence.split_secrets.SecretsHydrator; import io.airbyte.db.Database; diff --git a/airbyte-server/src/test/java/io/airbyte/server/handlers/ArchiveHandlerTest.java b/airbyte-server/src/test/java/io/airbyte/server/handlers/ArchiveHandlerTest.java index 8a91172c99936..396820fed21d3 100644 --- a/airbyte-server/src/test/java/io/airbyte/server/handlers/ArchiveHandlerTest.java +++ b/airbyte-server/src/test/java/io/airbyte/server/handlers/ArchiveHandlerTest.java @@ -27,10 +27,10 @@ import io.airbyte.config.StandardDestinationDefinition; import io.airbyte.config.StandardSourceDefinition; import io.airbyte.config.StandardWorkspace; +import io.airbyte.config.init.YamlSeedConfigPersistence; import io.airbyte.config.persistence.ConfigPersistence; import io.airbyte.config.persistence.ConfigRepository; import io.airbyte.config.persistence.DatabaseConfigPersistence; -import io.airbyte.config.persistence.YamlSeedConfigPersistence; import io.airbyte.config.persistence.split_secrets.NoOpSecretsHydrator; import io.airbyte.db.Database; import io.airbyte.db.instance.test.TestDatabaseProviders; @@ -319,13 +319,16 @@ private void assertSameConfigDump(final Map> expected, final Set expectedRecords = expected.get(stream).collect(Collectors.toSet()); final Set actualRecords = actual.get(stream).collect(Collectors.toSet()); for (final var expectedRecord : expectedRecords) { - assertTrue(actualRecords.contains(expectedRecord), - String.format("\n Expected record was not found:\n%s\n Actual records were:\n%s\n", + assertTrue( + actualRecords.contains(expectedRecord), + String.format( + "\n Expected record was not found:\n%s\n Actual records were:\n%s\n", expectedRecord, Strings.join(actualRecords, "\n"))); } assertEquals(expectedRecords.size(), actualRecords.size(), - String.format("The expected vs actual records does not match:\n expected records:\n%s\n actual records\n%s\n", + String.format( + "The expected vs actual records does not match:\n expected records:\n%s\n actual records\n%s\n", Strings.join(expectedRecords, "\n"), Strings.join(actualRecords, "\n"))); } diff --git a/airbyte-server/src/test/java/io/airbyte/server/migration/RunMigrationTest.java b/airbyte-server/src/test/java/io/airbyte/server/migration/RunMigrationTest.java index 017eab48404af..962b4509fec7e 100644 --- a/airbyte-server/src/test/java/io/airbyte/server/migration/RunMigrationTest.java +++ b/airbyte-server/src/test/java/io/airbyte/server/migration/RunMigrationTest.java @@ -27,11 +27,11 @@ import io.airbyte.config.StandardSyncOperation; import io.airbyte.config.StandardSyncOperation.OperatorType; import io.airbyte.config.StandardWorkspace; +import io.airbyte.config.init.YamlSeedConfigPersistence; import io.airbyte.config.persistence.ConfigNotFoundException; import io.airbyte.config.persistence.ConfigPersistence; import io.airbyte.config.persistence.ConfigRepository; import io.airbyte.config.persistence.DatabaseConfigPersistence; -import io.airbyte.config.persistence.YamlSeedConfigPersistence; import io.airbyte.config.persistence.split_secrets.MemorySecretPersistence; import io.airbyte.config.persistence.split_secrets.NoOpSecretsHydrator; import io.airbyte.config.persistence.split_secrets.SecretPersistence; diff --git a/build.gradle b/build.gradle index 271b27c23feb4..39d9957cd26b1 100644 --- a/build.gradle +++ b/build.gradle @@ -73,7 +73,8 @@ def createSpotlessTarget = { pattern -> 'normalization_test_output', 'tools', 'secrets', - 'charts' // Helm charts often have injected template strings that will fail general linting. Helm linting is done separately. + 'charts', // Helm charts often have injected template strings that will fail general linting. Helm linting is done separately. + 'resources/seed/*_specs.yaml' ] if (System.getenv().containsKey("SUB_BUILD")) { diff --git a/docs/connector-development/README.md b/docs/connector-development/README.md index fe8ce35eb402c..ebf9705e57843 100644 --- a/docs/connector-development/README.md +++ b/docs/connector-development/README.md @@ -107,14 +107,8 @@ The steps for updating an existing connector are the same as for building a new Once you've finished iterating on the changes to a connector as specified in its `README.md`, follow these instructions to ship the new version of the connector with Airbyte out of the box. 1. Bump the version in the `Dockerfile` of the connector \(`LABEL io.airbyte.version=X.X.X`\). -2. Update the connector definition in the Airbyte connector index to use the new version: - * `airbyte-config/init/src/main/resources/seed/source_definitions.yaml` if it is a source - * `airbyte-config/init/src/main/resources/seed/destination_definitions.yaml` if it is a destination. -3. Update the connector JSON definition. To find the appropriate JSON file to update, find a JSON file `.json` where the UUID portion is the ID specified in the YAML file you modified in step 2. The relevant directories are: - * `airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/.json` for sources - * `airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/.json` for destinations -4. Submit a PR containing the changes you made. -5. One of Airbyte maintainers will review the change and publish the new version of the connector to Docker hub. Triggering tests and publishing connectors can be done by leaving a comment on the PR with the following format \(the PR must be from the Airbyte repo, not a fork\): +2. Submit a PR containing the changes you made. +3. One of Airbyte maintainers will review the change and publish the new version of the connector to Docker hub. Triggering tests and publishing connectors can be done by leaving a comment on the PR with the following format \(the PR must be from the Airbyte repo, not a fork\): ```text # to run integration tests for the connector @@ -125,8 +119,13 @@ Once you've finished iterating on the changes to a connector as specified in its # Example: /publish connector=connectors/source-hubspot /publish connector=(connectors|bases)/ ``` - -6. The new version of the connector is now available for everyone who uses it. Thank you! +4. Update the connector definition in the Airbyte connector index to use the new version: + * `airbyte-config/init/src/main/resources/seed/source_definitions.yaml` if it is a source + * `airbyte-config/init/src/main/resources/seed/destination_definitions.yaml` if it is a destination. + + Then rebuild the platform to generate the seed spec yaml files, and commit the changes to the PR. See [this readme](https://github.com/airbytehq/airbyte/tree/a534bb2a8f29b20e3cc7c52fef1bc3c34783695d/airbyte-config/specs) for more information. + +5. The new version of the connector is now available for everyone who uses it. Thank you! ## Using credentials in CI diff --git a/settings.gradle b/settings.gradle index 207c947b3895a..a263a988dde73 100644 --- a/settings.gradle +++ b/settings.gradle @@ -20,12 +20,12 @@ sourceControl { rootProject.name = 'airbyte' // SUB_BUILD is an enum of , PLATFORM, CONNECTORS_BASE. Blank is equivalent to all. -if(!System.getenv().containsKey("SUB_BUILD")) { +if (!System.getenv().containsKey("SUB_BUILD")) { println("Building all of Airbyte.") } else { def subBuild = System.getenv().get("SUB_BUILD") println("Building Airbyte Sub Build: " + subBuild) - if(subBuild != "PLATFORM" && subBuild != "CONNECTORS_BASE") { + if (subBuild != "PLATFORM" && subBuild != "CONNECTORS_BASE") { throw new IllegalArgumentException(String.format("%s is invalid. Must be unset or PLATFORM or CONNECTORS_BASE", subBuild)) } } @@ -46,7 +46,6 @@ include ':airbyte-test-utils' // airbyte-workers has a lot of dependencies. include ':airbyte-workers' // reused by acceptance tests in connector base. include ':airbyte-analytics' // transitively used by airbyte-workers. -include ':airbyte-config:init' // transitively used by airbyte-workers. include ':airbyte-config:persistence' // transitively used by airbyte-workers. include ':airbyte-db:jooq' // transitively used by airbyte-workers. include ':airbyte-notification' // transitively used by airbyte-workers. @@ -54,7 +53,7 @@ include ':airbyte-scheduler:models' // transitively used by airbyte-workers. include ':airbyte-scheduler:persistence' // used by airbyte-workers. // platform -if(!System.getenv().containsKey("SUB_BUILD") || System.getenv().get("SUB_BUILD") == "PLATFORM") { +if (!System.getenv().containsKey("SUB_BUILD") || System.getenv().get("SUB_BUILD") == "PLATFORM") { include ':airbyte-cli' include ':airbyte-e2e-testing' include ':airbyte-migration' @@ -64,10 +63,12 @@ if(!System.getenv().containsKey("SUB_BUILD") || System.getenv().get("SUB_BUILD") include ':airbyte-server' include ':airbyte-tests' include ':airbyte-webapp' + include ':airbyte-config:init' + include ':airbyte-config:specs' } // connectors base -if(!System.getenv().containsKey("SUB_BUILD") || System.getenv().get("SUB_BUILD") == "CONNECTORS_BASE") { +if (!System.getenv().containsKey("SUB_BUILD") || System.getenv().get("SUB_BUILD") == "CONNECTORS_BASE") { include ':airbyte-cdk:python' include ':airbyte-integrations:bases:airbyte-protocol' include ':airbyte-integrations:bases:base' @@ -103,7 +104,7 @@ if(!System.getenv().containsKey("SUB_BUILD") || System.getenv().get("SUB_BUILD") } // connectors -if(!System.getenv().containsKey("SUB_BUILD")) { +if (!System.getenv().containsKey("SUB_BUILD")) { // include all connector projects def integrationsPath = rootDir.toPath().resolve('airbyte-integrations/connectors') println integrationsPath From b9e445a491e562b84de7323f36c2e469412712dd Mon Sep 17 00:00:00 2001 From: lmossman Date: Thu, 4 Nov 2021 17:25:40 -0700 Subject: [PATCH 49/83] regenerate specs --- .../init/src/main/resources/seed/source_specs.yaml | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index ced64ea39fcab..a7157e93bc41c 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -1229,7 +1229,7 @@ oauthFlowInitParameters: [] oauthFlowOutputParameters: - - "access_token" -- dockerImage: "airbyte/source-facebook-pages:0.1.2" +- dockerImage: "airbyte/source-facebook-pages:0.1.3" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/facebook-pages" connectionSpecification: @@ -1239,14 +1239,16 @@ required: - "access_token" - "page_id" - additionalProperties: false + additionalProperties: true properties: access_token: type: "string" + title: "Page Access Token" description: "Facebook Page Access Token" airbyte_secret: true page_id: type: "string" + title: "Page ID" description: "Page ID" supportsNormalization: false supportsDBT: false @@ -2447,7 +2449,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-iterable:0.1.10" +- dockerImage: "airbyte/source-iterable:0.1.11" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/iterable" connectionSpecification: @@ -3441,7 +3443,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-mysql:0.4.8" +- dockerImage: "airbyte/source-mysql:0.4.9" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/mysql" connectionSpecification: @@ -3493,7 +3495,7 @@ description: "Encrypt data using SSL." type: "boolean" default: true - order: 7 + order: 6 replication_method: type: "string" title: "Replication Method" @@ -3502,7 +3504,7 @@ \ able to represent deletions incrementally. CDC uses the Binlog to detect\ \ inserts, updates, and deletes. This needs to be configured on the source\ \ database itself." - order: 6 + order: 7 default: "STANDARD" enum: - "STANDARD" From e7a1acc3cae406c0150c7bcbf3981461aa4961cb Mon Sep 17 00:00:00 2001 From: Jared Rhizor Date: Thu, 4 Nov 2021 17:32:03 -0700 Subject: [PATCH 50/83] Bump Airbyte version from 0.30.30-alpha to 0.30.31-alpha (#7662) Co-authored-by: jrhizor --- .bumpversion.cfg | 2 +- .env | 2 +- airbyte-scheduler/app/Dockerfile | 4 ++-- airbyte-server/Dockerfile | 4 ++-- airbyte-webapp/package-lock.json | 2 +- airbyte-webapp/package.json | 2 +- airbyte-workers/Dockerfile | 4 ++-- charts/airbyte/Chart.yaml | 2 +- charts/airbyte/README.md | 8 ++++---- charts/airbyte/values.yaml | 8 ++++---- docs/operator-guides/upgrading-airbyte.md | 2 +- kube/overlays/stable-with-resource-limits/.env | 2 +- .../stable-with-resource-limits/kustomization.yaml | 10 +++++----- kube/overlays/stable/.env | 2 +- kube/overlays/stable/kustomization.yaml | 10 +++++----- 15 files changed, 32 insertions(+), 32 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 3d4b2e2d1b4d1..5b463ca898bed 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.30.30-alpha +current_version = 0.30.31-alpha commit = False tag = False parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-[a-z]+)? diff --git a/.env b/.env index cab5426a970c0..ece63bb799b42 100644 --- a/.env +++ b/.env @@ -1,4 +1,4 @@ -VERSION=0.30.30-alpha +VERSION=0.30.31-alpha # Airbyte Internal Job Database, see https://docs.airbyte.io/operator-guides/configuring-airbyte-db DATABASE_USER=docker diff --git a/airbyte-scheduler/app/Dockerfile b/airbyte-scheduler/app/Dockerfile index 93013fabec3f3..f4cb9b7d537ad 100644 --- a/airbyte-scheduler/app/Dockerfile +++ b/airbyte-scheduler/app/Dockerfile @@ -5,7 +5,7 @@ ENV APPLICATION airbyte-scheduler WORKDIR /app -ADD build/distributions/${APPLICATION}-0.30.30-alpha.tar /app +ADD build/distributions/${APPLICATION}-0.30.31-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.30-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.31-alpha/bin/${APPLICATION}"] diff --git a/airbyte-server/Dockerfile b/airbyte-server/Dockerfile index 2538836be92a8..ec7aefe8c4c11 100644 --- a/airbyte-server/Dockerfile +++ b/airbyte-server/Dockerfile @@ -7,7 +7,7 @@ ENV APPLICATION airbyte-server WORKDIR /app -ADD build/distributions/${APPLICATION}-0.30.30-alpha.tar /app +ADD build/distributions/${APPLICATION}-0.30.31-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.30-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.31-alpha/bin/${APPLICATION}"] diff --git a/airbyte-webapp/package-lock.json b/airbyte-webapp/package-lock.json index c71bd0c6670f2..1c99dcdcf0498 100644 --- a/airbyte-webapp/package-lock.json +++ b/airbyte-webapp/package-lock.json @@ -1,6 +1,6 @@ { "name": "airbyte-webapp", - "version": "0.30.30-alpha", + "version": "0.30.31-alpha", "lockfileVersion": 1, "requires": true, "dependencies": { diff --git a/airbyte-webapp/package.json b/airbyte-webapp/package.json index efb541fe5803a..814b84907962c 100644 --- a/airbyte-webapp/package.json +++ b/airbyte-webapp/package.json @@ -1,6 +1,6 @@ { "name": "airbyte-webapp", - "version": "0.30.30-alpha", + "version": "0.30.31-alpha", "private": true, "scripts": { "start": "react-scripts start", diff --git a/airbyte-workers/Dockerfile b/airbyte-workers/Dockerfile index 1f8af0b48530f..030bccd8f1f27 100644 --- a/airbyte-workers/Dockerfile +++ b/airbyte-workers/Dockerfile @@ -23,7 +23,7 @@ ENV APPLICATION airbyte-workers WORKDIR /app # Move worker app -ADD build/distributions/${APPLICATION}-0.30.30-alpha.tar /app +ADD build/distributions/${APPLICATION}-0.30.31-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.30-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.31-alpha/bin/${APPLICATION}"] diff --git a/charts/airbyte/Chart.yaml b/charts/airbyte/Chart.yaml index f3c1543c90491..fd030c9bf6ffc 100644 --- a/charts/airbyte/Chart.yaml +++ b/charts/airbyte/Chart.yaml @@ -21,7 +21,7 @@ version: 0.3.0 # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "0.30.30-alpha" +appVersion: "0.30.31-alpha" dependencies: - name: common diff --git a/charts/airbyte/README.md b/charts/airbyte/README.md index be62e41092d22..42c1706c94713 100644 --- a/charts/airbyte/README.md +++ b/charts/airbyte/README.md @@ -29,7 +29,7 @@ | `webapp.replicaCount` | Number of webapp replicas | `1` | | `webapp.image.repository` | The repository to use for the airbyte webapp image. | `airbyte/webapp` | | `webapp.image.pullPolicy` | the pull policy to use for the airbyte webapp image | `IfNotPresent` | -| `webapp.image.tag` | The airbyte webapp image tag. Defaults to the chart's AppVersion | `0.30.30-alpha` | +| `webapp.image.tag` | The airbyte webapp image tag. Defaults to the chart's AppVersion | `0.30.31-alpha` | | `webapp.podAnnotations` | Add extra annotations to the webapp pod(s) | `{}` | | `webapp.service.type` | The service type to use for the webapp service | `ClusterIP` | | `webapp.service.port` | The service port to expose the webapp on | `80` | @@ -56,7 +56,7 @@ | `scheduler.replicaCount` | Number of scheduler replicas | `1` | | `scheduler.image.repository` | The repository to use for the airbyte scheduler image. | `airbyte/scheduler` | | `scheduler.image.pullPolicy` | the pull policy to use for the airbyte scheduler image | `IfNotPresent` | -| `scheduler.image.tag` | The airbyte scheduler image tag. Defaults to the chart's AppVersion | `0.30.30-alpha` | +| `scheduler.image.tag` | The airbyte scheduler image tag. Defaults to the chart's AppVersion | `0.30.31-alpha` | | `scheduler.podAnnotations` | Add extra annotations to the scheduler pod | `{}` | | `scheduler.resources.limits` | The resources limits for the scheduler container | `{}` | | `scheduler.resources.requests` | The requested resources for the scheduler container | `{}` | @@ -87,7 +87,7 @@ | `server.replicaCount` | Number of server replicas | `1` | | `server.image.repository` | The repository to use for the airbyte server image. | `airbyte/server` | | `server.image.pullPolicy` | the pull policy to use for the airbyte server image | `IfNotPresent` | -| `server.image.tag` | The airbyte server image tag. Defaults to the chart's AppVersion | `0.30.30-alpha` | +| `server.image.tag` | The airbyte server image tag. Defaults to the chart's AppVersion | `0.30.31-alpha` | | `server.podAnnotations` | Add extra annotations to the server pod | `{}` | | `server.livenessProbe.enabled` | Enable livenessProbe on the server | `true` | | `server.livenessProbe.initialDelaySeconds` | Initial delay seconds for livenessProbe | `30` | @@ -121,7 +121,7 @@ | `worker.replicaCount` | Number of worker replicas | `1` | | `worker.image.repository` | The repository to use for the airbyte worker image. | `airbyte/worker` | | `worker.image.pullPolicy` | the pull policy to use for the airbyte worker image | `IfNotPresent` | -| `worker.image.tag` | The airbyte worker image tag. Defaults to the chart's AppVersion | `0.30.30-alpha` | +| `worker.image.tag` | The airbyte worker image tag. Defaults to the chart's AppVersion | `0.30.31-alpha` | | `worker.podAnnotations` | Add extra annotations to the worker pod(s) | `{}` | | `worker.livenessProbe.enabled` | Enable livenessProbe on the worker | `true` | | `worker.livenessProbe.initialDelaySeconds` | Initial delay seconds for livenessProbe | `30` | diff --git a/charts/airbyte/values.yaml b/charts/airbyte/values.yaml index 24ddd1f04c1a9..051a5b3b61300 100644 --- a/charts/airbyte/values.yaml +++ b/charts/airbyte/values.yaml @@ -44,7 +44,7 @@ webapp: image: repository: airbyte/webapp pullPolicy: IfNotPresent - tag: 0.30.30-alpha + tag: 0.30.31-alpha ## @param webapp.podAnnotations [object] Add extra annotations to the webapp pod(s) ## @@ -141,7 +141,7 @@ scheduler: image: repository: airbyte/scheduler pullPolicy: IfNotPresent - tag: 0.30.30-alpha + tag: 0.30.31-alpha ## @param scheduler.podAnnotations [object] Add extra annotations to the scheduler pod ## @@ -248,7 +248,7 @@ server: image: repository: airbyte/server pullPolicy: IfNotPresent - tag: 0.30.30-alpha + tag: 0.30.31-alpha ## @param server.podAnnotations [object] Add extra annotations to the server pod ## @@ -360,7 +360,7 @@ worker: image: repository: airbyte/worker pullPolicy: IfNotPresent - tag: 0.30.30-alpha + tag: 0.30.31-alpha ## @param worker.podAnnotations [object] Add extra annotations to the worker pod(s) ## diff --git a/docs/operator-guides/upgrading-airbyte.md b/docs/operator-guides/upgrading-airbyte.md index 6cc26a272052b..2b01414738a4b 100644 --- a/docs/operator-guides/upgrading-airbyte.md +++ b/docs/operator-guides/upgrading-airbyte.md @@ -82,7 +82,7 @@ If you are upgrading from \(i.e. your current version of Airbyte is\) Airbyte ve Here's an example of what it might look like with the values filled in. It assumes that the downloaded `airbyte_archive.tar.gz` is in `/tmp`. ```bash - docker run --rm -v /tmp:/config airbyte/migration:0.30.30-alpha --\ + docker run --rm -v /tmp:/config airbyte/migration:0.30.31-alpha --\ --input /config/airbyte_archive.tar.gz\ --output /config/airbyte_archive_migrated.tar.gz ``` diff --git a/kube/overlays/stable-with-resource-limits/.env b/kube/overlays/stable-with-resource-limits/.env index 9fa79fe31efc7..012c2866c7644 100644 --- a/kube/overlays/stable-with-resource-limits/.env +++ b/kube/overlays/stable-with-resource-limits/.env @@ -1,4 +1,4 @@ -AIRBYTE_VERSION=0.30.30-alpha +AIRBYTE_VERSION=0.30.31-alpha # Airbyte Internal Database, see https://docs.airbyte.io/operator-guides/configuring-airbyte-db DATABASE_HOST=airbyte-db-svc diff --git a/kube/overlays/stable-with-resource-limits/kustomization.yaml b/kube/overlays/stable-with-resource-limits/kustomization.yaml index e28d687e012cf..6a7c6f06e0050 100644 --- a/kube/overlays/stable-with-resource-limits/kustomization.yaml +++ b/kube/overlays/stable-with-resource-limits/kustomization.yaml @@ -8,15 +8,15 @@ bases: images: - name: airbyte/db - newTag: 0.30.30-alpha + newTag: 0.30.31-alpha - name: airbyte/scheduler - newTag: 0.30.30-alpha + newTag: 0.30.31-alpha - name: airbyte/server - newTag: 0.30.30-alpha + newTag: 0.30.31-alpha - name: airbyte/webapp - newTag: 0.30.30-alpha + newTag: 0.30.31-alpha - name: airbyte/worker - newTag: 0.30.30-alpha + newTag: 0.30.31-alpha - name: temporalio/auto-setup newTag: 1.7.0 diff --git a/kube/overlays/stable/.env b/kube/overlays/stable/.env index 9fa79fe31efc7..012c2866c7644 100644 --- a/kube/overlays/stable/.env +++ b/kube/overlays/stable/.env @@ -1,4 +1,4 @@ -AIRBYTE_VERSION=0.30.30-alpha +AIRBYTE_VERSION=0.30.31-alpha # Airbyte Internal Database, see https://docs.airbyte.io/operator-guides/configuring-airbyte-db DATABASE_HOST=airbyte-db-svc diff --git a/kube/overlays/stable/kustomization.yaml b/kube/overlays/stable/kustomization.yaml index 0434fee12cbfb..534696d1ad17f 100644 --- a/kube/overlays/stable/kustomization.yaml +++ b/kube/overlays/stable/kustomization.yaml @@ -8,15 +8,15 @@ bases: images: - name: airbyte/db - newTag: 0.30.30-alpha + newTag: 0.30.31-alpha - name: airbyte/scheduler - newTag: 0.30.30-alpha + newTag: 0.30.31-alpha - name: airbyte/server - newTag: 0.30.30-alpha + newTag: 0.30.31-alpha - name: airbyte/webapp - newTag: 0.30.30-alpha + newTag: 0.30.31-alpha - name: airbyte/worker - newTag: 0.30.30-alpha + newTag: 0.30.31-alpha - name: temporalio/auto-setup newTag: 1.7.0 From 93afe184f3b2fc70920ff57c54b203d1ce905b88 Mon Sep 17 00:00:00 2001 From: Abhi Vaidyanatha Date: Thu, 4 Nov 2021 22:26:06 -0700 Subject: [PATCH 51/83] Remove onboarding call reference. (#7665) --- README.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/README.md b/README.md index aa1b60a45f528..719944ec993e8 100644 --- a/README.md +++ b/README.md @@ -34,8 +34,6 @@ Now visit [http://localhost:8000](http://localhost:8000) Here is a [step-by-step guide](https://github.com/airbytehq/airbyte/tree/e378d40236b6a34e1c1cb481c8952735ec687d88/docs/quickstart/getting-started.md) showing you how to load data from an API into a file, all on your computer. -If you want to schedule a 20-min call with our team to help you get set up, please select [some time directly here](https://calendly.com/nataliekwong/airbyte-onboarding). - ## Features * **Built for extensibility**: Adapt an existing connector to your needs or build a new one with ease. From 4a99ac3489a75c0822e940b1ec1b152a1d5003f8 Mon Sep 17 00:00:00 2001 From: Tuan Nguyen Date: Fri, 5 Nov 2021 16:16:12 +0700 Subject: [PATCH 52/83] =?UTF-8?q?=F0=9F=8E=89=20New=20Source:=20Confluence?= =?UTF-8?q?=20(#7241)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * initial commit * add remaining streams * fix warning and unit test * clean up * revert unrelated files * remove state files * lint * add documentation strings --- .../source-confluence/.dockerignore | 7 + .../connectors/source-confluence/Dockerfile | 38 ++++ .../connectors/source-confluence/README.md | 132 ++++++++++++ .../acceptance-test-config.yml | 20 ++ .../acceptance-test-docker.sh | 16 ++ .../connectors/source-confluence/build.gradle | 14 ++ .../integration_tests/__init__.py | 3 + .../integration_tests/acceptance.py | 16 ++ .../integration_tests/configured_catalog.json | 54 +++++ .../integration_tests/invalid_config.json | 5 + .../integration_tests/sample_config.json | 5 + .../connectors/source-confluence/main.py | 13 ++ .../source-confluence/requirements.txt | 2 + .../connectors/source-confluence/setup.py | 30 +++ .../source_confluence/__init__.py | 8 + .../source_confluence/schemas/audit.json | 17 ++ .../source_confluence/schemas/blog_posts.json | 199 ++++++++++++++++++ .../source_confluence/schemas/group.json | 10 + .../source_confluence/schemas/pages.json | 199 ++++++++++++++++++ .../source_confluence/schemas/space.json | 16 ++ .../source_confluence/source.py | 141 +++++++++++++ .../source_confluence/spec.json | 27 +++ .../source-confluence/unit_tests/__init__.py | 3 + .../source-confluence/unit_tests/conftest.py | 10 + .../unit_tests/test_source.py | 32 +++ 25 files changed, 1017 insertions(+) create mode 100644 airbyte-integrations/connectors/source-confluence/.dockerignore create mode 100644 airbyte-integrations/connectors/source-confluence/Dockerfile create mode 100644 airbyte-integrations/connectors/source-confluence/README.md create mode 100644 airbyte-integrations/connectors/source-confluence/acceptance-test-config.yml create mode 100644 airbyte-integrations/connectors/source-confluence/acceptance-test-docker.sh create mode 100644 airbyte-integrations/connectors/source-confluence/build.gradle create mode 100644 airbyte-integrations/connectors/source-confluence/integration_tests/__init__.py create mode 100644 airbyte-integrations/connectors/source-confluence/integration_tests/acceptance.py create mode 100644 airbyte-integrations/connectors/source-confluence/integration_tests/configured_catalog.json create mode 100644 airbyte-integrations/connectors/source-confluence/integration_tests/invalid_config.json create mode 100644 airbyte-integrations/connectors/source-confluence/integration_tests/sample_config.json create mode 100644 airbyte-integrations/connectors/source-confluence/main.py create mode 100644 airbyte-integrations/connectors/source-confluence/requirements.txt create mode 100644 airbyte-integrations/connectors/source-confluence/setup.py create mode 100644 airbyte-integrations/connectors/source-confluence/source_confluence/__init__.py create mode 100644 airbyte-integrations/connectors/source-confluence/source_confluence/schemas/audit.json create mode 100644 airbyte-integrations/connectors/source-confluence/source_confluence/schemas/blog_posts.json create mode 100644 airbyte-integrations/connectors/source-confluence/source_confluence/schemas/group.json create mode 100644 airbyte-integrations/connectors/source-confluence/source_confluence/schemas/pages.json create mode 100644 airbyte-integrations/connectors/source-confluence/source_confluence/schemas/space.json create mode 100644 airbyte-integrations/connectors/source-confluence/source_confluence/source.py create mode 100644 airbyte-integrations/connectors/source-confluence/source_confluence/spec.json create mode 100644 airbyte-integrations/connectors/source-confluence/unit_tests/__init__.py create mode 100644 airbyte-integrations/connectors/source-confluence/unit_tests/conftest.py create mode 100644 airbyte-integrations/connectors/source-confluence/unit_tests/test_source.py diff --git a/airbyte-integrations/connectors/source-confluence/.dockerignore b/airbyte-integrations/connectors/source-confluence/.dockerignore new file mode 100644 index 0000000000000..b98d666d0fe7a --- /dev/null +++ b/airbyte-integrations/connectors/source-confluence/.dockerignore @@ -0,0 +1,7 @@ +* +!Dockerfile +!Dockerfile.test +!main.py +!source_confluence +!setup.py +!secrets diff --git a/airbyte-integrations/connectors/source-confluence/Dockerfile b/airbyte-integrations/connectors/source-confluence/Dockerfile new file mode 100644 index 0000000000000..c5a090512f941 --- /dev/null +++ b/airbyte-integrations/connectors/source-confluence/Dockerfile @@ -0,0 +1,38 @@ +FROM python:3.7.11-alpine3.14 as base + +# build and load all requirements +FROM base as builder +WORKDIR /airbyte/integration_code + +# upgrade pip to the latest version +RUN apk --no-cache upgrade \ + && pip install --upgrade pip \ + && apk --no-cache add tzdata build-base + + +COPY setup.py ./ +# install necessary packages to a temporary folder +RUN pip install --prefix=/install . + +# build a clean environment +FROM base +WORKDIR /airbyte/integration_code + +# copy all loaded and built libraries to a pure basic image +COPY --from=builder /install /usr/local +# add default timezone settings +COPY --from=builder /usr/share/zoneinfo/Etc/UTC /etc/localtime +RUN echo "Etc/UTC" > /etc/timezone + +# bash is installed for more convenient debugging. +RUN apk --no-cache add bash + +# copy payload code only +COPY main.py ./ +COPY source_confluence ./source_confluence + +ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" +ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] + +LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.name=airbyte/source-confluence diff --git a/airbyte-integrations/connectors/source-confluence/README.md b/airbyte-integrations/connectors/source-confluence/README.md new file mode 100644 index 0000000000000..2710ecd57c58d --- /dev/null +++ b/airbyte-integrations/connectors/source-confluence/README.md @@ -0,0 +1,132 @@ +# Confluence Source + +This is the repository for the Confluence source connector, written in Python. +For information about how to use this connector within Airbyte, see [the documentation](https://docs.airbyte.io/integrations/sources/confluence). + +## Local development + +### Prerequisites +**To iterate on this connector, make sure to complete this prerequisites section.** + +#### Minimum Python version required `= 3.7.0` + +#### Build & Activate Virtual Environment and install dependencies +From this connector directory, create a virtual environment: +``` +python -m venv .venv +``` + +This will generate a virtualenv for this module in `.venv/`. Make sure this venv is active in your +development environment of choice. To activate it from the terminal, run: +``` +source .venv/bin/activate +pip install -r requirements.txt +pip install '.[tests]' +``` +If you are in an IDE, follow your IDE's instructions to activate the virtualenv. + +Note that while we are installing dependencies from `requirements.txt`, you should only edit `setup.py` for your dependencies. `requirements.txt` is +used for editable installs (`pip install -e`) to pull in Python dependencies from the monorepo and will call `setup.py`. +If this is mumbo jumbo to you, don't worry about it, just put your deps in `setup.py` but install using `pip install -r requirements.txt` and everything +should work as you expect. + +#### Building via Gradle +You can also build the connector in Gradle. This is typically used in CI and not needed for your development workflow. + +To build using Gradle, from the Airbyte repository root, run: +``` +./gradlew :airbyte-integrations:connectors:source-confluence:build +``` + +#### Create credentials +**If you are a community contributor**, follow the instructions in the [documentation](https://docs.airbyte.io/integrations/sources/confluence) +to generate the necessary credentials. Then create a file `secrets/config.json` conforming to the `source_confluence/spec.json` file. +Note that any directory named `secrets` is gitignored across the entire Airbyte repo, so there is no danger of accidentally checking in sensitive information. +See `integration_tests/sample_config.json` for a sample config file. + +**If you are an Airbyte core member**, copy the credentials in Lastpass under the secret name `source confluence test creds` +and place them into `secrets/config.json`. + +### Locally running the connector +``` +python main.py spec +python main.py check --config secrets/config.json +python main.py discover --config secrets/config.json +python main.py read --config secrets/config.json --catalog integration_tests/configured_catalog.json +``` + +### Locally running the connector docker image + +#### Build +First, make sure you build the latest Docker image: +``` +docker build . -t airbyte/source-confluence:dev +``` + +You can also build the connector image via Gradle: +``` +./gradlew :airbyte-integrations:connectors:source-confluence:airbyteDocker +``` +When building via Gradle, the docker image name and tag, respectively, are the values of the `io.airbyte.name` and `io.airbyte.version` `LABEL`s in +the Dockerfile. + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/source-confluence:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-confluence:dev check --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-confluence:dev discover --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/source-confluence:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json +``` +## Testing +Make sure to familiarize yourself with [pytest test discovery](https://docs.pytest.org/en/latest/goodpractices.html#test-discovery) to know how your test files and methods should be named. +First install test dependencies into your virtual environment: +``` +pip install .[tests] +``` +### Unit Tests +To run unit tests locally, from the connector directory run: +``` +python -m pytest unit_tests +``` + +### Integration Tests +There are two types of integration tests: Acceptance Tests (Airbyte's test suite for all source connectors) and custom integration tests (which are specific to this connector). +#### Custom Integration tests +Place custom tests inside `integration_tests/` folder, then, from the connector root, run +``` +python -m pytest integration_tests +``` +#### Acceptance Tests +Customize `acceptance-test-config.yml` file to configure tests. See [Source Acceptance Tests](https://docs.airbyte.io/connector-development/testing-connectors/source-acceptance-tests-reference) for more information. +If your connector requires to create or destroy resources for use during acceptance tests create fixtures for it and place them inside integration_tests/acceptance.py. +To run your integration tests with acceptance tests, from the connector root, run +``` +python -m pytest integration_tests -p integration_tests.acceptance +``` +To run your integration tests with docker + +### Using gradle to run tests +All commands should be run from airbyte project root. +To run unit tests: +``` +./gradlew :airbyte-integrations:connectors:source-confluence:unitTest +``` +To run acceptance and custom integration tests: +``` +./gradlew :airbyte-integrations:connectors:source-confluence:integrationTest +``` + +## Dependency Management +All of your dependencies should go in `setup.py`, NOT `requirements.txt`. The requirements file is only used to connect internal Airbyte dependencies in the monorepo for local development. +We split dependencies between two groups, dependencies that are: +* required for your connector to work need to go to `MAIN_REQUIREMENTS` list. +* required for the testing need to go to `TEST_REQUIREMENTS` list + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing unit and integration tests. +1. Bump the connector version in `Dockerfile` -- just increment the value of the `LABEL io.airbyte.version` appropriately (we use [SemVer](https://semver.org/)). +1. Create a Pull Request. +1. Pat yourself on the back for being an awesome contributor. +1. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. diff --git a/airbyte-integrations/connectors/source-confluence/acceptance-test-config.yml b/airbyte-integrations/connectors/source-confluence/acceptance-test-config.yml new file mode 100644 index 0000000000000..48739b8cf72ee --- /dev/null +++ b/airbyte-integrations/connectors/source-confluence/acceptance-test-config.yml @@ -0,0 +1,20 @@ +# See [Source Acceptance Tests](https://docs.airbyte.io/connector-development/testing-connectors/source-acceptance-tests-reference) +# for more information about how to configure these tests +connector_image: airbyte/source-confluence:dev +tests: + spec: + - spec_path: "source_confluence/spec.json" + connection: + - config_path: "secrets/config.json" + status: "succeed" + - config_path: "integration_tests/invalid_config.json" + status: "failed" + discovery: + - config_path: "secrets/config.json" + basic_read: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" + empty_streams: [] + full_refresh: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" diff --git a/airbyte-integrations/connectors/source-confluence/acceptance-test-docker.sh b/airbyte-integrations/connectors/source-confluence/acceptance-test-docker.sh new file mode 100644 index 0000000000000..e4d8b1cef8961 --- /dev/null +++ b/airbyte-integrations/connectors/source-confluence/acceptance-test-docker.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env sh + +# Build latest connector image +docker build . -t $(cat acceptance-test-config.yml | grep "connector_image" | head -n 1 | cut -d: -f2) + +# Pull latest acctest image +docker pull airbyte/source-acceptance-test:latest + +# Run +docker run --rm -it \ + -v /var/run/docker.sock:/var/run/docker.sock \ + -v /tmp:/tmp \ + -v $(pwd):/test_input \ + airbyte/source-acceptance-test \ + --acceptance-test-config /test_input + diff --git a/airbyte-integrations/connectors/source-confluence/build.gradle b/airbyte-integrations/connectors/source-confluence/build.gradle new file mode 100644 index 0000000000000..7638a29f031b6 --- /dev/null +++ b/airbyte-integrations/connectors/source-confluence/build.gradle @@ -0,0 +1,14 @@ +plugins { + id 'airbyte-python' + id 'airbyte-docker' + id 'airbyte-source-acceptance-test' +} + +airbytePython { + moduleDirectory 'source_confluence' +} + +dependencies { + implementation files(project(':airbyte-integrations:bases:source-acceptance-test').airbyteDocker.outputs) + implementation files(project(':airbyte-integrations:bases:base-python').airbyteDocker.outputs) +} diff --git a/airbyte-integrations/connectors/source-confluence/integration_tests/__init__.py b/airbyte-integrations/connectors/source-confluence/integration_tests/__init__.py new file mode 100644 index 0000000000000..46b7376756ec6 --- /dev/null +++ b/airbyte-integrations/connectors/source-confluence/integration_tests/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# diff --git a/airbyte-integrations/connectors/source-confluence/integration_tests/acceptance.py b/airbyte-integrations/connectors/source-confluence/integration_tests/acceptance.py new file mode 100644 index 0000000000000..58c194c5d1376 --- /dev/null +++ b/airbyte-integrations/connectors/source-confluence/integration_tests/acceptance.py @@ -0,0 +1,16 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +import pytest + +pytest_plugins = ("source_acceptance_test.plugin",) + + +@pytest.fixture(scope="session", autouse=True) +def connector_setup(): + """ This fixture is a placeholder for external resources that acceptance test might require.""" + # TODO: setup test dependencies if needed. otherwise remove the TODO comments + yield + # TODO: clean up test dependencies diff --git a/airbyte-integrations/connectors/source-confluence/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/source-confluence/integration_tests/configured_catalog.json new file mode 100644 index 0000000000000..8d03fca880e43 --- /dev/null +++ b/airbyte-integrations/connectors/source-confluence/integration_tests/configured_catalog.json @@ -0,0 +1,54 @@ +{ + "streams": [ + { + "stream": { + "name": "pages", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "blog_posts", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "space", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "group", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "audit", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + } + ] +} diff --git a/airbyte-integrations/connectors/source-confluence/integration_tests/invalid_config.json b/airbyte-integrations/connectors/source-confluence/integration_tests/invalid_config.json new file mode 100644 index 0000000000000..7ca7ac5f19ac5 --- /dev/null +++ b/airbyte-integrations/connectors/source-confluence/integration_tests/invalid_config.json @@ -0,0 +1,5 @@ +{ + "api_token": "", + "domain_name": "example.atlassian.net", + "email": "test@example.com" +} diff --git a/airbyte-integrations/connectors/source-confluence/integration_tests/sample_config.json b/airbyte-integrations/connectors/source-confluence/integration_tests/sample_config.json new file mode 100644 index 0000000000000..bc9993815948c --- /dev/null +++ b/airbyte-integrations/connectors/source-confluence/integration_tests/sample_config.json @@ -0,0 +1,5 @@ +{ + "api_token": "abcd", + "domain_name": "example.atlassian.net", + "email": "test@example.com" +} diff --git a/airbyte-integrations/connectors/source-confluence/main.py b/airbyte-integrations/connectors/source-confluence/main.py new file mode 100644 index 0000000000000..11b575ce0c4a8 --- /dev/null +++ b/airbyte-integrations/connectors/source-confluence/main.py @@ -0,0 +1,13 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +import sys + +from airbyte_cdk.entrypoint import launch +from source_confluence import SourceConfluence + +if __name__ == "__main__": + source = SourceConfluence() + launch(source, sys.argv[1:]) diff --git a/airbyte-integrations/connectors/source-confluence/requirements.txt b/airbyte-integrations/connectors/source-confluence/requirements.txt new file mode 100644 index 0000000000000..0411042aa0911 --- /dev/null +++ b/airbyte-integrations/connectors/source-confluence/requirements.txt @@ -0,0 +1,2 @@ +-e ../../bases/source-acceptance-test +-e . diff --git a/airbyte-integrations/connectors/source-confluence/setup.py b/airbyte-integrations/connectors/source-confluence/setup.py new file mode 100644 index 0000000000000..c5e5646e909fd --- /dev/null +++ b/airbyte-integrations/connectors/source-confluence/setup.py @@ -0,0 +1,30 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +from setuptools import find_packages, setup + +MAIN_REQUIREMENTS = [ + "airbyte-cdk", +] + +TEST_REQUIREMENTS = [ + "pytest~=6.1", + "pytest-mock~=3.6.1", + "source-acceptance-test", + "responses~=0.13.3", +] + +setup( + name="source_confluence", + description="Source implementation for Confluence.", + author="Tuan Nguyen", + author_email="anhtuan.nguyen@me.com", + packages=find_packages(), + install_requires=MAIN_REQUIREMENTS, + package_data={"": ["*.json", "schemas/*.json", "schemas/shared/*.json"]}, + extras_require={ + "tests": TEST_REQUIREMENTS, + }, +) diff --git a/airbyte-integrations/connectors/source-confluence/source_confluence/__init__.py b/airbyte-integrations/connectors/source-confluence/source_confluence/__init__.py new file mode 100644 index 0000000000000..b73058e73def3 --- /dev/null +++ b/airbyte-integrations/connectors/source-confluence/source_confluence/__init__.py @@ -0,0 +1,8 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +from .source import SourceConfluence + +__all__ = ["SourceConfluence"] diff --git a/airbyte-integrations/connectors/source-confluence/source_confluence/schemas/audit.json b/airbyte-integrations/connectors/source-confluence/source_confluence/schemas/audit.json new file mode 100644 index 0000000000000..ee1d7cca3f43a --- /dev/null +++ b/airbyte-integrations/connectors/source-confluence/source_confluence/schemas/audit.json @@ -0,0 +1,17 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "author": { "type": ["null", "object"] }, + "remoteAddress": { "type": ["null", "string"] }, + "creationDate": { "type": ["null", "string"] }, + "summary": { "type": ["null", "string"] }, + "description": { "type": ["null", "string"] }, + "category": { "type": ["null", "string"] }, + "sysAdmin": { "type": ["null", "boolean"] }, + "superAdmin": { "type": ["null", "boolean"] }, + "affectedObject": { "type": ["null", "object"] }, + "changedValues": { "type": ["null", "array"] }, + "associatedObjects": { "type": ["null", "array"] } + } +} diff --git a/airbyte-integrations/connectors/source-confluence/source_confluence/schemas/blog_posts.json b/airbyte-integrations/connectors/source-confluence/source_confluence/schemas/blog_posts.json new file mode 100644 index 0000000000000..023d04af6716e --- /dev/null +++ b/airbyte-integrations/connectors/source-confluence/source_confluence/schemas/blog_posts.json @@ -0,0 +1,199 @@ +{ + "definitions": { + "user": { + "type": "object", + "properties": { + "type": { + "type": ["string", "null"] + }, + "accountId": { + "type": ["string", "null"] + }, + "email": { + "type": ["string", "null"] + }, + "publicName": { + "type": ["string", "null"] + } + } + }, + "content": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "title": { + "type": "string" + }, + "type": { + "type": "string" + }, + "status": { + "type": "string" + } + } + }, + "contentRestriction": { + "type": "object", + "properties": { + "operations": { + "type": ["string", "null"] + }, + "restrictions": { + "user": { + "type": "object", + "properties": { + "results": { + "type": "array", + "items": { + "$ref": "#/definitions/user" + } + } + } + } + } + } + }, + "usersUserKeys": { + "type": "object", + "properties": { + "users": { + "type": "array", + "items": { + "$ref": "#/definitions/user" + } + }, + "userKeys": { + "type": "array", + "items": { + "type": "string" + } + } + } + }, + "version": { + "type": "object", + "properties": { + "by": { + "$ref": "#/definitions/user" + }, + "when": { + "type": ["string", "null"], + "format": "date-time" + }, + "friendlyWhen": { + "type": ["string", "null"] + }, + "message": { + "type": ["string", "null"] + }, + "number": { + "type": ["integer", "null"] + }, + "minorEdit": { + "type": ["boolean", "null"] + }, + "collaborators": { + "$ref": "#/definitions/usersUserKeys" + } + } + } + }, + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "title": { + "type": "string" + }, + "type": { + "type": "string" + }, + "status": { + "type": "string" + }, + "history": { + "type": "object", + "properties": { + "latest": { + "type": "boolean" + }, + "createdBy": { + "$ref": "#/definitions/user" + }, + "createdDate": { + "type": "string", + "format": "date-time" + }, + "contributors": { + "type": "object", + "properties": { + "publishers": { + "$ref": "#/definitions/usersUserKeys" + } + } + }, + "previousVersion": { + "$ref": "#/definitions/version" + } + } + }, + "version": { + "$ref": "#/definitions/version" + }, + "descendants": { + "type": "object", + "properties": { + "comment": { + "type": "object", + "properties": { + "results": { + "type": "array", + "items": { + "$ref": "#/definitions/content" + } + } + } + } + } + }, + "restrictions": { + "type": "object", + "properties": { + "read": { + "$ref": "#/definitions/contentRestriction" + } + } + }, + "_expandable": { + "type": "object", + "properties": { + "container": { + "type": "string" + }, + "space": { + "type": "string" + } + } + }, + "_links": { + "type": "object", + "properties": { + "self": { + "type": "string" + }, + "tinyui": { + "type": "string" + }, + "editui": { + "type": "string" + }, + "webui": { + "type": "string" + } + } + } + } +} diff --git a/airbyte-integrations/connectors/source-confluence/source_confluence/schemas/group.json b/airbyte-integrations/connectors/source-confluence/source_confluence/schemas/group.json new file mode 100644 index 0000000000000..e430f4dfaa039 --- /dev/null +++ b/airbyte-integrations/connectors/source-confluence/source_confluence/schemas/group.json @@ -0,0 +1,10 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "id": { "type": ["null", "string"] }, + "name": { "type": ["null", "string"] }, + "type": { "type": ["null", "string"] }, + "_links": { "type": ["null", "object"] } + } +} diff --git a/airbyte-integrations/connectors/source-confluence/source_confluence/schemas/pages.json b/airbyte-integrations/connectors/source-confluence/source_confluence/schemas/pages.json new file mode 100644 index 0000000000000..023d04af6716e --- /dev/null +++ b/airbyte-integrations/connectors/source-confluence/source_confluence/schemas/pages.json @@ -0,0 +1,199 @@ +{ + "definitions": { + "user": { + "type": "object", + "properties": { + "type": { + "type": ["string", "null"] + }, + "accountId": { + "type": ["string", "null"] + }, + "email": { + "type": ["string", "null"] + }, + "publicName": { + "type": ["string", "null"] + } + } + }, + "content": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "title": { + "type": "string" + }, + "type": { + "type": "string" + }, + "status": { + "type": "string" + } + } + }, + "contentRestriction": { + "type": "object", + "properties": { + "operations": { + "type": ["string", "null"] + }, + "restrictions": { + "user": { + "type": "object", + "properties": { + "results": { + "type": "array", + "items": { + "$ref": "#/definitions/user" + } + } + } + } + } + } + }, + "usersUserKeys": { + "type": "object", + "properties": { + "users": { + "type": "array", + "items": { + "$ref": "#/definitions/user" + } + }, + "userKeys": { + "type": "array", + "items": { + "type": "string" + } + } + } + }, + "version": { + "type": "object", + "properties": { + "by": { + "$ref": "#/definitions/user" + }, + "when": { + "type": ["string", "null"], + "format": "date-time" + }, + "friendlyWhen": { + "type": ["string", "null"] + }, + "message": { + "type": ["string", "null"] + }, + "number": { + "type": ["integer", "null"] + }, + "minorEdit": { + "type": ["boolean", "null"] + }, + "collaborators": { + "$ref": "#/definitions/usersUserKeys" + } + } + } + }, + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "title": { + "type": "string" + }, + "type": { + "type": "string" + }, + "status": { + "type": "string" + }, + "history": { + "type": "object", + "properties": { + "latest": { + "type": "boolean" + }, + "createdBy": { + "$ref": "#/definitions/user" + }, + "createdDate": { + "type": "string", + "format": "date-time" + }, + "contributors": { + "type": "object", + "properties": { + "publishers": { + "$ref": "#/definitions/usersUserKeys" + } + } + }, + "previousVersion": { + "$ref": "#/definitions/version" + } + } + }, + "version": { + "$ref": "#/definitions/version" + }, + "descendants": { + "type": "object", + "properties": { + "comment": { + "type": "object", + "properties": { + "results": { + "type": "array", + "items": { + "$ref": "#/definitions/content" + } + } + } + } + } + }, + "restrictions": { + "type": "object", + "properties": { + "read": { + "$ref": "#/definitions/contentRestriction" + } + } + }, + "_expandable": { + "type": "object", + "properties": { + "container": { + "type": "string" + }, + "space": { + "type": "string" + } + } + }, + "_links": { + "type": "object", + "properties": { + "self": { + "type": "string" + }, + "tinyui": { + "type": "string" + }, + "editui": { + "type": "string" + }, + "webui": { + "type": "string" + } + } + } + } +} diff --git a/airbyte-integrations/connectors/source-confluence/source_confluence/schemas/space.json b/airbyte-integrations/connectors/source-confluence/source_confluence/schemas/space.json new file mode 100644 index 0000000000000..a021d235c3ab5 --- /dev/null +++ b/airbyte-integrations/connectors/source-confluence/source_confluence/schemas/space.json @@ -0,0 +1,16 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "id": { "type": ["null", "integer"] }, + "key": { "type": ["null", "string"] }, + "name": { "type": ["null", "string"] }, + "type": { "type": ["null", "string"] }, + "status": { "type": ["null", "string"] }, + "permissions": { "type": ["null", "array"] }, + "icon": { "type": ["null", "object"] }, + "description": { "type": ["null", "object"] }, + "_expandable": { "type": ["null", "object"] }, + "_links": { "type": ["null", "object"] } + } +} diff --git a/airbyte-integrations/connectors/source-confluence/source_confluence/source.py b/airbyte-integrations/connectors/source-confluence/source_confluence/source.py new file mode 100644 index 0000000000000..278dc1a84cab7 --- /dev/null +++ b/airbyte-integrations/connectors/source-confluence/source_confluence/source.py @@ -0,0 +1,141 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +from abc import ABC +from base64 import b64encode +from typing import Any, Dict, Iterable, List, Mapping, MutableMapping, Optional, Tuple + +import requests +from airbyte_cdk.sources import AbstractSource +from airbyte_cdk.sources.streams import Stream +from airbyte_cdk.sources.streams.http import HttpStream +from airbyte_cdk.sources.streams.http.requests_native_auth import TokenAuthenticator +from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer + + +# Basic full refresh stream +class ConfluenceStream(HttpStream, ABC): + url_base = "https://{}/wiki/rest/api/" + primary_key = "id" + limit = 50 + start = 0 + expand = [] + transformer: TypeTransformer = TypeTransformer(TransformConfig.DefaultSchemaNormalization) + + def __init__(self, config: Dict): + super().__init__(authenticator=config["authenticator"]) + self.config = config + self.url_base = self.url_base.format(config["domain_name"]) + + def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: + json_response = response.json() + links = json_response.get("_links") + next_link = links.get("next") + if next_link: + self.start += self.limit + return {"start": self.start} + + def request_params( + self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, any] = None, next_page_token: Mapping[str, Any] = None + ) -> MutableMapping[str, Any]: + params = {"limit": self.limit, "expand": ",".join(self.expand)} + if next_page_token: + params.update({"start": next_page_token["start"]}) + return params + + def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: + json_response = response.json() + records = json_response.get("results", []) + yield from records + + def path( + self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None + ) -> str: + return self.api_name + + +class BaseContentStream(ConfluenceStream, ABC): + api_name = "content" + expand = [ + "history", + "history.lastUpdated", + "history.previousVersion", + "history.contributors", + "restrictions.read.restrictions.user", + "version", + "descendants.comment", + ] + limit = 25 + content_type = None + + def request_params( + self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, any] = None, next_page_token: Mapping[str, Any] = None + ) -> MutableMapping[str, Any]: + params = super().request_params(stream_state, stream_slice=stream_slice, next_page_token=next_page_token) + params.update({"type": self.content_type}) + return params + + +class Pages(BaseContentStream): + """ + API documentation: https://developer.atlassian.com/cloud/confluence/rest/api-group-content/#api-wiki-rest-api-content-get + """ + content_type = "page" + + +class BlogPosts(BaseContentStream): + """ + API documentation: https://developer.atlassian.com/cloud/confluence/rest/api-group-content/#api-wiki-rest-api-content-get + """ + content_type = "blogpost" + + +class Space(ConfluenceStream): + """ + API documentation: https://developer.atlassian.com/cloud/confluence/rest/api-group-space/#api-wiki-rest-api-space-get + """ + api_name = "space" + expand = ["permissions", "icon", "description.plain", "description.view"] + + +class Group(ConfluenceStream): + """ + API documentation: https://developer.atlassian.com/cloud/confluence/rest/api-group-group/#api-wiki-rest-api-group-get + """ + api_name = "group" + + +class Audit(ConfluenceStream): + """ + API documentation: https://developer.atlassian.com/cloud/confluence/rest/api-group-audit/#api-wiki-rest-api-audit-get + """ + primary_key = "author" + api_name = "audit" + limit = 1000 + + +# Source +class HttpBasicAuthenticator(TokenAuthenticator): + def __init__(self, email: str, token: str, auth_method: str = "Basic", **kwargs): + auth_string = f"{email}:{token}".encode("utf8") + b64_encoded = b64encode(auth_string).decode("utf8") + super().__init__(token=b64_encoded, auth_method=auth_method, **kwargs) + + +class SourceConfluence(AbstractSource): + def check_connection(self, logger, config) -> Tuple[bool, any]: + auth = HttpBasicAuthenticator(config["email"], config["api_token"], auth_method="Basic").get_auth_header() + url = f"https://{config['domain_name']}/wiki/rest/api/space" + try: + response = requests.get(url, headers=auth) + response.raise_for_status() + return True, None + except requests.exceptions.RequestException as e: + return False, e + + def streams(self, config: Mapping[str, Any]) -> List[Stream]: + auth = HttpBasicAuthenticator(config["email"], config["api_token"], auth_method="Basic") + config["authenticator"] = auth + return [Pages(config), BlogPosts(config), Space(config), Group(config), Audit(config)] diff --git a/airbyte-integrations/connectors/source-confluence/source_confluence/spec.json b/airbyte-integrations/connectors/source-confluence/source_confluence/spec.json new file mode 100644 index 0000000000000..136bd0bead4db --- /dev/null +++ b/airbyte-integrations/connectors/source-confluence/source_confluence/spec.json @@ -0,0 +1,27 @@ +{ + "documentationUrl": "https://docsurl.com", + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Confluence Spec", + "type": "object", + "required": ["api_token", "domain_name", "email"], + "additionalProperties": false, + "properties": { + "api_token": { + "type": "string", + "description": "Please follow the Jira confluence for generating an API token: https://support.atlassian.com/atlassian-account/docs/manage-api-tokens-for-your-atlassian-account/", + "airbyte_secret": true + }, + "domain_name": { + "type": "string", + "description": "Your Confluence domain name", + "examples": ["example.atlassian.net"] + }, + "email": { + "type": "string", + "description": "Your Confluence login email", + "examples": ["abc@example.com"] + } + } + } +} diff --git a/airbyte-integrations/connectors/source-confluence/unit_tests/__init__.py b/airbyte-integrations/connectors/source-confluence/unit_tests/__init__.py new file mode 100644 index 0000000000000..46b7376756ec6 --- /dev/null +++ b/airbyte-integrations/connectors/source-confluence/unit_tests/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# diff --git a/airbyte-integrations/connectors/source-confluence/unit_tests/conftest.py b/airbyte-integrations/connectors/source-confluence/unit_tests/conftest.py new file mode 100644 index 0000000000000..29078266ccf9a --- /dev/null +++ b/airbyte-integrations/connectors/source-confluence/unit_tests/conftest.py @@ -0,0 +1,10 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + +import pytest + + +@pytest.fixture(scope="session", name="config") +def config_fixture(): + return {"api_token": "test_api_key", "domain_name": "example.atlassian.net", "email": "test@example.com"} diff --git a/airbyte-integrations/connectors/source-confluence/unit_tests/test_source.py b/airbyte-integrations/connectors/source-confluence/unit_tests/test_source.py new file mode 100644 index 0000000000000..e03a348803683 --- /dev/null +++ b/airbyte-integrations/connectors/source-confluence/unit_tests/test_source.py @@ -0,0 +1,32 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + +from unittest.mock import MagicMock + +import responses +from source_confluence.source import SourceConfluence + + +def setup_responses(): + responses.add( + responses.GET, + "https://example.atlassian.net/wiki/rest/api/space", + json={"access_token": "test_api_key", "expires_in": 3600}, + ) + + +@responses.activate +def test_check_connection(config): + setup_responses() + source = SourceConfluence() + logger_mock = MagicMock() + assert source.check_connection(logger_mock, config) == (True, None) + + +def test_streams_count(mocker): + source = SourceConfluence() + config_mock = MagicMock() + streams = source.streams(config_mock) + expected_streams_number = 5 + assert len(streams) == expected_streams_number From af1c62f0c609de899655a8d6cb5b0a3f0c610a65 Mon Sep 17 00:00:00 2001 From: Harshith Mullapudi Date: Fri, 5 Nov 2021 15:17:26 +0530 Subject: [PATCH 53/83] Publish new source Confluence #7241 (#7666) * initial commit * add remaining streams * fix warning and unit test * clean up * revert unrelated files * remove state files * lint * add documentation strings * added confluence credentials to ci * fix: removed audit from configured catalog as it cannot be pulled in free account * fix: format Co-authored-by: Tuan Nguyen --- .github/workflows/publish-command.yml | 1 + .github/workflows/test-command.yml | 1 + .../integration_tests/configured_catalog.json | 10 ---------- .../source-confluence/source_confluence/source.py | 5 +++++ tools/bin/ci_credentials.sh | 1 + 5 files changed, 8 insertions(+), 10 deletions(-) diff --git a/.github/workflows/publish-command.yml b/.github/workflows/publish-command.yml index 56ade97c3d252..9d45d2dc2c2e0 100644 --- a/.github/workflows/publish-command.yml +++ b/.github/workflows/publish-command.yml @@ -191,6 +191,7 @@ jobs: MONGODB_TEST_CREDS: ${{ secrets.MONGODB_TEST_CREDS }} SOURCE_ONESIGNAL_TEST_CREDS: ${{ secrets.SOURCE_ONESIGNAL_TEST_CREDS }} SOURCE_SALESLOFT_TEST_CREDS: ${{ secrets.SOURCE_SALESLOFT_TEST_CREDS }} + SOURCE_CONFLUENCE_TEST_CREDS: ${{ secrets.SOURCE_CONFLUENCE_TEST_CREDS }} SOURCE_AMAZON_SQS_TEST_CREDS: ${{ secrets.SOURCE_AMAZON_SQS_TEST_CREDS }} SOURCE_FRESHSERVICE_TEST_CREDS: ${{ secrets.SOURCE_FRESHSERVICE_TEST_CREDS }} SOURCE_LEMLIST_TEST_CREDS: ${{ secrets.SOURCE_LEMLIST_TEST_CREDS }} diff --git a/.github/workflows/test-command.yml b/.github/workflows/test-command.yml index 94e50a3f3ae41..cf685295ee0af 100644 --- a/.github/workflows/test-command.yml +++ b/.github/workflows/test-command.yml @@ -186,6 +186,7 @@ jobs: MONGODB_TEST_CREDS: ${{ secrets.MONGODB_TEST_CREDS }} SOURCE_ONESIGNAL_TEST_CREDS: ${{ secrets.SOURCE_ONESIGNAL_TEST_CREDS }} SOURCE_SALESLOFT_TEST_CREDS: ${{ secrets.SOURCE_SALESLOFT_TEST_CREDS }} + SOURCE_CONFLUENCE_TEST_CREDS: ${{ secrets.SOURCE_CONFLUENCE_TEST_CREDS }} SOURCE_AMAZON_SQS_TEST_CREDS: ${{ secrets.SOURCE_AMAZON_SQS_TEST_CREDS }} SOURCE_FRESHSERVICE_TEST_CREDS: ${{ secrets.SOURCE_FRESHSERVICE_TEST_CREDS }} SOURCE_LEMLIST_TEST_CREDS: ${{ secrets.SOURCE_LEMLIST_TEST_CREDS }} diff --git a/airbyte-integrations/connectors/source-confluence/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/source-confluence/integration_tests/configured_catalog.json index 8d03fca880e43..3b9510160172f 100644 --- a/airbyte-integrations/connectors/source-confluence/integration_tests/configured_catalog.json +++ b/airbyte-integrations/connectors/source-confluence/integration_tests/configured_catalog.json @@ -39,16 +39,6 @@ }, "sync_mode": "full_refresh", "destination_sync_mode": "overwrite" - }, - { - "stream": { - "name": "audit", - "json_schema": {}, - "supported_sync_modes": ["full_refresh"], - "source_defined_primary_key": [["id"]] - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" } ] } diff --git a/airbyte-integrations/connectors/source-confluence/source_confluence/source.py b/airbyte-integrations/connectors/source-confluence/source_confluence/source.py index 278dc1a84cab7..cb2510c20f15b 100644 --- a/airbyte-integrations/connectors/source-confluence/source_confluence/source.py +++ b/airbyte-integrations/connectors/source-confluence/source_confluence/source.py @@ -82,6 +82,7 @@ class Pages(BaseContentStream): """ API documentation: https://developer.atlassian.com/cloud/confluence/rest/api-group-content/#api-wiki-rest-api-content-get """ + content_type = "page" @@ -89,6 +90,7 @@ class BlogPosts(BaseContentStream): """ API documentation: https://developer.atlassian.com/cloud/confluence/rest/api-group-content/#api-wiki-rest-api-content-get """ + content_type = "blogpost" @@ -96,6 +98,7 @@ class Space(ConfluenceStream): """ API documentation: https://developer.atlassian.com/cloud/confluence/rest/api-group-space/#api-wiki-rest-api-space-get """ + api_name = "space" expand = ["permissions", "icon", "description.plain", "description.view"] @@ -104,6 +107,7 @@ class Group(ConfluenceStream): """ API documentation: https://developer.atlassian.com/cloud/confluence/rest/api-group-group/#api-wiki-rest-api-group-get """ + api_name = "group" @@ -111,6 +115,7 @@ class Audit(ConfluenceStream): """ API documentation: https://developer.atlassian.com/cloud/confluence/rest/api-group-audit/#api-wiki-rest-api-audit-get """ + primary_key = "author" api_name = "audit" limit = 1000 diff --git a/tools/bin/ci_credentials.sh b/tools/bin/ci_credentials.sh index 03e35719baae4..bb5e4da501503 100755 --- a/tools/bin/ci_credentials.sh +++ b/tools/bin/ci_credentials.sh @@ -64,6 +64,7 @@ write_standard_creds source-braintree "$BRAINTREE_TEST_CREDS" write_standard_creds source-cart "$CART_TEST_CREDS" write_standard_creds source-chargebee "$CHARGEBEE_INTEGRATION_TEST_CREDS" write_standard_creds source-close-com "$SOURCE_CLOSE_COM_CREDS" +write_standard_creds source-confluence "$SOURCE_CONFLUENCE_TEST_CREDS" write_standard_creds source-delighted "$SOURCE_DELIGHTED_TEST_CREDS" write_standard_creds source-drift "$DRIFT_INTEGRATION_TEST_CREDS" write_standard_creds source-dixa "$SOURCE_DIXA_TEST_CREDS" From 56db8065e9fbfbf054303c9d1a8c9694547dd02a Mon Sep 17 00:00:00 2001 From: Yurii Bidiuk <35812734+yurii-bidiuk@users.noreply.github.com> Date: Fri, 5 Nov 2021 12:21:07 +0200 Subject: [PATCH 54/83] =?UTF-8?q?=F0=9F=90=9B=20Source=20MSSQL:=20fix=20da?= =?UTF-8?q?ta=20type=20(smalldatetime,=20smallmoney)=20conversion=20from?= =?UTF-8?q?=20mssql=20source=20(#5609)=20(#7386)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Fix data type (smalldatetime, smallmoney) conversion from mssql source (#5609) * Fixed code format * Bumb new version * Update documentation (mssql.md) * formating * fixed converter properties * aligned converter utils with #7339 Co-authored-by: Andrii Leonets <30464745+DoNotPanicUA@users.noreply.github.com> --- .../b5ea17b1-f170-46dc-bc31-cc744ca984c1.json | 2 +- .../debezium/internals/MSSQLConverter.java | 65 +++++++++++++++++++ .../source-mssql-strict-encrypt/Dockerfile | 2 +- .../connectors/source-mssql/Dockerfile | 2 +- .../source/mssql/MssqlCdcProperties.java | 3 + docs/integrations/sources/mssql.md | 1 + 6 files changed, 72 insertions(+), 3 deletions(-) create mode 100644 airbyte-integrations/bases/debezium/src/main/java/io/airbyte/integrations/debezium/internals/MSSQLConverter.java diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/b5ea17b1-f170-46dc-bc31-cc744ca984c1.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/b5ea17b1-f170-46dc-bc31-cc744ca984c1.json index e65a9e63751d9..ff762f8c9f705 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/b5ea17b1-f170-46dc-bc31-cc744ca984c1.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/b5ea17b1-f170-46dc-bc31-cc744ca984c1.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "b5ea17b1-f170-46dc-bc31-cc744ca984c1", "name": "Microsoft SQL Server (MSSQL)", "dockerRepository": "airbyte/source-mssql", - "dockerImageTag": "0.3.6", + "dockerImageTag": "0.3.8", "documentationUrl": "https://docs.airbyte.io/integrations/sources/mssql", "icon": "mssql.svg" } diff --git a/airbyte-integrations/bases/debezium/src/main/java/io/airbyte/integrations/debezium/internals/MSSQLConverter.java b/airbyte-integrations/bases/debezium/src/main/java/io/airbyte/integrations/debezium/internals/MSSQLConverter.java new file mode 100644 index 0000000000000..e162262ba9fb5 --- /dev/null +++ b/airbyte-integrations/bases/debezium/src/main/java/io/airbyte/integrations/debezium/internals/MSSQLConverter.java @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.debezium.internals; + +import io.debezium.spi.converter.CustomConverter; +import io.debezium.spi.converter.RelationalColumn; +import java.math.BigDecimal; +import java.util.Objects; +import java.util.Properties; +import org.apache.kafka.connect.data.SchemaBuilder; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class MSSQLConverter implements CustomConverter { + + private final Logger LOGGER = LoggerFactory.getLogger(MSSQLConverter.class);; + + private final String SMALLDATETIME_TYPE = "SMALLDATETIME"; + private final String SMALLMONEY_TYPE = "SMALLMONEY"; + + @Override + public void configure(Properties props) {} + + @Override + public void converterFor(final RelationalColumn field, + final ConverterRegistration registration) { + if (SMALLDATETIME_TYPE.equalsIgnoreCase(field.typeName())) { + registerDate(field, registration); + } else if (SMALLMONEY_TYPE.equalsIgnoreCase(field.typeName())) { + registerMoney(field, registration); + } + + } + + private void registerDate(final RelationalColumn field, + final ConverterRegistration registration) { + registration.register(SchemaBuilder.string(), input -> { + if (Objects.isNull(input)) { + return DebeziumConverterUtils.convertDefaultValue(field); + } + + return DebeziumConverterUtils.convertDate(input); + }); + } + + private void registerMoney(final RelationalColumn field, + final ConverterRegistration registration) { + registration.register(SchemaBuilder.float64(), input -> { + if (Objects.isNull(input)) { + return DebeziumConverterUtils.convertDefaultValue(field); + } + + if (input instanceof BigDecimal) { + return ((BigDecimal) input).doubleValue(); + } + + LOGGER.warn("Uncovered money class type '{}'. Use default converter", + input.getClass().getName()); + return input.toString(); + }); + } + +} diff --git a/airbyte-integrations/connectors/source-mssql-strict-encrypt/Dockerfile b/airbyte-integrations/connectors/source-mssql-strict-encrypt/Dockerfile index d25e01d0ad9f6..adb13884e8aa4 100644 --- a/airbyte-integrations/connectors/source-mssql-strict-encrypt/Dockerfile +++ b/airbyte-integrations/connectors/source-mssql-strict-encrypt/Dockerfile @@ -8,5 +8,5 @@ COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar RUN tar xf ${APPLICATION}.tar --strip-components=1 -LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.version=0.1.1 LABEL io.airbyte.name=airbyte/source-mssql-strict-encrypt diff --git a/airbyte-integrations/connectors/source-mssql/Dockerfile b/airbyte-integrations/connectors/source-mssql/Dockerfile index 0683c70931345..b73c16413c579 100644 --- a/airbyte-integrations/connectors/source-mssql/Dockerfile +++ b/airbyte-integrations/connectors/source-mssql/Dockerfile @@ -8,5 +8,5 @@ COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar RUN tar xf ${APPLICATION}.tar --strip-components=1 -LABEL io.airbyte.version=0.3.6 +LABEL io.airbyte.version=0.3.8 LABEL io.airbyte.name=airbyte/source-mssql diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcProperties.java b/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcProperties.java index ce1476668304f..50dd8429577b8 100644 --- a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcProperties.java +++ b/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcProperties.java @@ -26,6 +26,9 @@ static Properties getDebeziumProperties() { // https://debezium.io/documentation/reference/1.4/connectors/sqlserver.html#sqlserver-property-provide-transaction-metadata props.setProperty("provide.transaction.metadata", "false"); + props.setProperty("converters", "mssql_converter"); + props.setProperty("mssql_converter.type", "io.airbyte.integrations.debezium.internals.MSSQLConverter"); + return props; } diff --git a/docs/integrations/sources/mssql.md b/docs/integrations/sources/mssql.md index d15213490f0e1..35e3621541df3 100644 --- a/docs/integrations/sources/mssql.md +++ b/docs/integrations/sources/mssql.md @@ -294,6 +294,7 @@ If you do not see a type in this list, assume that it is coerced into a string. | Version | Date | Pull Request | Subject | | | :--- | :--- | :--- | :--- | :--- | +| 0.3.8 | 2021-10-26 | [7386](https://github.com/airbytehq/airbyte/pull/7386) | Fixed data type (smalldatetime, smallmoney) conversion from mssql source | | | 0.3.7 | 2021-09-30 | [6585](https://github.com/airbytehq/airbyte/pull/6585) | Improved SSH Tunnel key generation steps | | | 0.3.6 | 2021-09-17 | [6318](https://github.com/airbytehq/airbyte/pull/6318) | Added option to connect to DB via SSH | | | 0.3.4 | 2021-08-13 | [4699](https://github.com/airbytehq/airbyte/pull/4699) | Added json config validator | | From 7fd180446f4ff6d711082dc231ac9b8089c74004 Mon Sep 17 00:00:00 2001 From: Artem Astapenko <3767150+Jamakase@users.noreply.github.com> Date: Fri, 5 Nov 2021 13:52:04 +0300 Subject: [PATCH 55/83] Fix numerous create calls (#7659) --- .../src/packages/cloud/services/auth/AuthService.tsx | 11 +++-------- .../packages/cloud/views/auth/LoginPage/LoginPage.tsx | 4 ++-- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/airbyte-webapp/src/packages/cloud/services/auth/AuthService.tsx b/airbyte-webapp/src/packages/cloud/services/auth/AuthService.tsx index 7ec8173eee198..110b1243989c9 100644 --- a/airbyte-webapp/src/packages/cloud/services/auth/AuthService.tsx +++ b/airbyte-webapp/src/packages/cloud/services/auth/AuthService.tsx @@ -29,7 +29,7 @@ export type AuthConfirmPasswordReset = ( export type AuthLogin = (values: { email: string; password: string; -}) => Promise; +}) => Promise; export type AuthSignUp = (form: { email: string; @@ -77,7 +77,7 @@ export const AuthenticationProvider: React.FC = ({ children }) => { const authService = useMemo(() => new GoogleAuthService(() => auth), [auth]); useEffect(() => { - auth.onAuthStateChanged(async (currentUser) => { + return auth.onAuthStateChanged(async (currentUser) => { if (state.currentUser === null && currentUser) { // token = await currentUser.getIdToken(); @@ -115,13 +115,8 @@ export const AuthenticationProvider: React.FC = ({ children }) => { inited: state.inited, isLoading: state.loading, emailVerified: state.emailVerified, - async login(values: { - email: string; - password: string; - }): Promise { + async login(values: { email: string; password: string }): Promise { await authService.login(values.email, values.password); - - return null; }, async logout(): Promise { await authService.signOut(); diff --git a/airbyte-webapp/src/packages/cloud/views/auth/LoginPage/LoginPage.tsx b/airbyte-webapp/src/packages/cloud/views/auth/LoginPage/LoginPage.tsx index badbdf965ad19..98045be799280 100644 --- a/airbyte-webapp/src/packages/cloud/views/auth/LoginPage/LoginPage.tsx +++ b/airbyte-webapp/src/packages/cloud/views/auth/LoginPage/LoginPage.tsx @@ -36,12 +36,12 @@ const LoginPage: React.FC = () => { password: "", }} validationSchema={LoginPageValidationSchema} - onSubmit={async (values, { setFieldError, setStatus }) => + onSubmit={async (values, { setFieldError }) => login(values).catch((err) => { if (err instanceof FieldError) { setFieldError(err.field, err.message); } else { - setStatus(err.message); + setFieldError("password", err.message); } }) } From c6edf1358881fe80082503e98b8882ebb13b8cbf Mon Sep 17 00:00:00 2001 From: Marcos Eliziario Santos Date: Fri, 5 Nov 2021 08:59:39 -0300 Subject: [PATCH 56/83] Eliziario/hubspot oauth (#7279) * Hubspot OAuth backend implementation * Hubspot OAuth backend implementation - post master merge fixes * Hubspot OAuth backend implementation - missing factory * Review changes - return only refresh_token when consent flow callback returns both access_token and refresh_token * Missing import for OAuthImplementationFactory * unit test fix after merge for HubspotOAuthFlowTest * unit test fix after merge for HubspotOAuthFlowTest --- .../java/io/airbyte/oauth/BaseOAuthFlow.java | 27 +----- .../oauth/OAuthImplementationFactory.java | 7 +- .../airbyte/oauth/flows/HubspotOAuthFlow.java | 93 +++++++++++++++++++ .../FacebookOAuthFlowIntegrationTest.java | 2 +- .../GithubOAuthFlowIntegrationTest.java | 2 +- .../SurveymonkeyOAuthFlowIntegrationTest.java | 2 +- .../HubspotOAuthFlowIntegrationTest.java | 74 +++++++++++++++ .../oauth/flows/OAuthFlowIntegrationTest.java | 6 +- .../oauth/flows/HubspotOAuthFlowTest.java | 79 ++++++++++++++++ 9 files changed, 256 insertions(+), 36 deletions(-) create mode 100644 airbyte-oauth/src/main/java/io/airbyte/oauth/flows/HubspotOAuthFlow.java create mode 100644 airbyte-oauth/src/test-integration/java/io/airbyte/oauth/flows/HubspotOAuthFlowIntegrationTest.java create mode 100644 airbyte-oauth/src/test/java/io/airbyte/oauth/flows/HubspotOAuthFlowTest.java diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/BaseOAuthFlow.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/BaseOAuthFlow.java index dd80f158e110a..ebf3346b2c0e5 100644 --- a/airbyte-oauth/src/main/java/io/airbyte/oauth/BaseOAuthFlow.java +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/BaseOAuthFlow.java @@ -14,7 +14,6 @@ import java.io.IOException; import java.lang.reflect.Type; import java.net.URI; -import java.net.URISyntaxException; import java.net.URLEncoder; import java.net.http.HttpClient; import java.net.http.HttpClient.Version; @@ -100,31 +99,6 @@ public String getDestinationConsentUrl(final UUID workspaceId, final UUID destin return formatConsentUrl(destinationDefinitionId, getClientIdUnsafe(oAuthParamConfig), redirectUrl); } - protected String formatConsentUrl(String clientId, - String redirectUrl, - String host, - String path, - String scope, - String responseType) - throws IOException { - final URIBuilder builder = new URIBuilder() - .setScheme("https") - .setHost(host) - .setPath(path) - // required - .addParameter("client_id", clientId) - .addParameter("redirect_uri", redirectUrl) - .addParameter("state", getState()) - // optional - .addParameter("response_type", responseType) - .addParameter("scope", scope); - try { - return builder.build().toString(); - } catch (URISyntaxException e) { - throw new IOException("Failed to format Consent URL for OAuth flow", e); - } - } - /** * Depending on the OAuth flow implementation, the URL to grant user's consent may differ, * especially in the query parameters to be provided. This function should generate such consent URL @@ -235,6 +209,7 @@ protected Map extractRefreshToken(final JsonNode data, String ac } else { LOGGER.info("Oauth flow failed. Data received from server: {}", data); throw new IOException(String.format("Missing 'refresh_token' in query params from %s. Response: %s", accessTokenUrl)); + } return Map.of("credentials", result); diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/OAuthImplementationFactory.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/OAuthImplementationFactory.java index f9f450c286d53..289390429be96 100644 --- a/airbyte-oauth/src/main/java/io/airbyte/oauth/OAuthImplementationFactory.java +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/OAuthImplementationFactory.java @@ -6,11 +6,7 @@ import com.google.common.collect.ImmutableMap; import io.airbyte.config.persistence.ConfigRepository; -import io.airbyte.oauth.flows.AsanaOAuthFlow; -import io.airbyte.oauth.flows.GithubOAuthFlow; -import io.airbyte.oauth.flows.SalesforceOAuthFlow; -import io.airbyte.oauth.flows.SurveymonkeyOAuthFlow; -import io.airbyte.oauth.flows.TrelloOAuthFlow; +import io.airbyte.oauth.flows.*; import io.airbyte.oauth.flows.facebook.FacebookMarketingOAuthFlow; import io.airbyte.oauth.flows.facebook.FacebookPagesOAuthFlow; import io.airbyte.oauth.flows.facebook.InstagramOAuthFlow; @@ -39,6 +35,7 @@ public OAuthImplementationFactory(final ConfigRepository configRepository) { .put("airbyte/source-salesforce", new SalesforceOAuthFlow(configRepository)) .put("airbyte/source-surveymonkey", new SurveymonkeyOAuthFlow(configRepository)) .put("airbyte/source-trello", new TrelloOAuthFlow(configRepository)) + .put("airbyte/source-hubspot", new HubspotOAuthFlow(configRepository)) .build(); } diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/HubspotOAuthFlow.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/HubspotOAuthFlow.java new file mode 100644 index 0000000000000..7e7e81d5e239e --- /dev/null +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/HubspotOAuthFlow.java @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.oauth.flows; + +import com.google.common.collect.ImmutableMap; +import io.airbyte.config.persistence.ConfigRepository; +import io.airbyte.oauth.BaseOAuthFlow; +import java.io.IOException; +import java.net.URISyntaxException; +import java.net.http.HttpClient; +import java.util.Map; +import java.util.UUID; +import java.util.function.Supplier; +import org.apache.http.client.utils.URIBuilder; + +public class HubspotOAuthFlow extends BaseOAuthFlow { + + private final String AUTHORIZE_URL = "https://app.hubspot.com/oauth/authorize"; + + public HubspotOAuthFlow(ConfigRepository configRepository) { + super(configRepository); + } + + public HubspotOAuthFlow(ConfigRepository configRepository, HttpClient httpClient, Supplier stateSupplier) { + super(configRepository, httpClient, stateSupplier, TOKEN_REQUEST_CONTENT_TYPE.JSON); + } + + /** + * Depending on the OAuth flow implementation, the URL to grant user's consent may differ, + * especially in the query parameters to be provided. This function should generate such consent URL + * accordingly. + * + * @param definitionId The configured definition ID of this client + * @param clientId The configured client ID + * @param redirectUrl the redirect URL + */ + @Override + protected String formatConsentUrl(UUID definitionId, String clientId, String redirectUrl) throws IOException { + try { + return new URIBuilder(AUTHORIZE_URL) + .addParameter("client_id", clientId) + .addParameter("redirect_uri", redirectUrl) + .addParameter("state", getState()) + .addParameter("scopes", getScopes()) + .build().toString(); + } catch (URISyntaxException e) { + throw new IOException("Failed to format Consent URL for OAuth flow", e); + } + } + + @Override + protected Map getAccessTokenQueryParameters(String clientId, String clientSecret, String authCode, String redirectUrl) { + return ImmutableMap.builder() + // required + .put("client_id", clientId) + .put("redirect_uri", redirectUrl) + .put("client_secret", clientSecret) + .put("code", authCode) + .put("grant_type", "authorization_code") + .build(); + } + + private String getScopes() { + return String.join(" ", "content", + "crm.schemas.deals.read", + "crm.objects.owners.read", + "forms", + "tickets", + "e-commerce", + "crm.objects.companies.read", + "crm.lists.read", + "crm.objects.deals.read", + "crm.schemas.contacts.read", + "crm.objects.contacts.read", + "crm.schemas.companies.read", + "files", + "forms-uploaded-files", + "files.ui_hidden.read"); + } + + /** + * Returns the URL where to retrieve the access token from. + * + * @param oAuthParamConfig the configuration map + */ + @Override + protected String getAccessTokenUrl() { + return "https://api.hubapi.com/oauth/v1/token"; + } + +} diff --git a/airbyte-oauth/src/test-integration/java/io.airbyte.oauth.flows/FacebookOAuthFlowIntegrationTest.java b/airbyte-oauth/src/test-integration/java/io.airbyte.oauth.flows/FacebookOAuthFlowIntegrationTest.java index 88884ff611c59..53ccd82061154 100644 --- a/airbyte-oauth/src/test-integration/java/io.airbyte.oauth.flows/FacebookOAuthFlowIntegrationTest.java +++ b/airbyte-oauth/src/test-integration/java/io.airbyte.oauth.flows/FacebookOAuthFlowIntegrationTest.java @@ -31,7 +31,7 @@ public class FacebookOAuthFlowIntegrationTest extends OAuthFlowIntegrationTest { protected static final String REDIRECT_URL = "http://localhost:9000/auth_flow"; @Override - protected Path get_credentials_path() { + protected Path getCredentialsPath() { return CREDENTIALS_PATH; } diff --git a/airbyte-oauth/src/test-integration/java/io.airbyte.oauth.flows/GithubOAuthFlowIntegrationTest.java b/airbyte-oauth/src/test-integration/java/io.airbyte.oauth.flows/GithubOAuthFlowIntegrationTest.java index 7d569291c3045..797af710644f2 100644 --- a/airbyte-oauth/src/test-integration/java/io.airbyte.oauth.flows/GithubOAuthFlowIntegrationTest.java +++ b/airbyte-oauth/src/test-integration/java/io.airbyte.oauth.flows/GithubOAuthFlowIntegrationTest.java @@ -31,7 +31,7 @@ public class GithubOAuthFlowIntegrationTest extends OAuthFlowIntegrationTest { protected static final int SERVER_LISTENING_PORT = 8000; @Override - protected Path get_credentials_path() { + protected Path getCredentialsPath() { return CREDENTIALS_PATH; } diff --git a/airbyte-oauth/src/test-integration/java/io.airbyte.oauth.flows/SurveymonkeyOAuthFlowIntegrationTest.java b/airbyte-oauth/src/test-integration/java/io.airbyte.oauth.flows/SurveymonkeyOAuthFlowIntegrationTest.java index e5263ebbe1082..60961ec15936c 100644 --- a/airbyte-oauth/src/test-integration/java/io.airbyte.oauth.flows/SurveymonkeyOAuthFlowIntegrationTest.java +++ b/airbyte-oauth/src/test-integration/java/io.airbyte.oauth.flows/SurveymonkeyOAuthFlowIntegrationTest.java @@ -30,7 +30,7 @@ public class SurveymonkeyOAuthFlowIntegrationTest extends OAuthFlowIntegrationTe protected static final String REDIRECT_URL = "http://localhost:3000/auth_flow"; @Override - protected Path get_credentials_path() { + protected Path getCredentialsPath() { return CREDENTIALS_PATH; } diff --git a/airbyte-oauth/src/test-integration/java/io/airbyte/oauth/flows/HubspotOAuthFlowIntegrationTest.java b/airbyte-oauth/src/test-integration/java/io/airbyte/oauth/flows/HubspotOAuthFlowIntegrationTest.java new file mode 100644 index 0000000000000..bbe96e57956a2 --- /dev/null +++ b/airbyte-oauth/src/test-integration/java/io/airbyte/oauth/flows/HubspotOAuthFlowIntegrationTest.java @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.oauth.flows; + +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.when; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; +import io.airbyte.commons.json.Jsons; +import io.airbyte.config.SourceOAuthParameter; +import io.airbyte.config.persistence.ConfigNotFoundException; +import io.airbyte.config.persistence.ConfigRepository; +import io.airbyte.oauth.OAuthFlowImplementation; +import io.airbyte.validation.json.JsonValidationException; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; +import java.util.Map; +import java.util.UUID; +import org.junit.jupiter.api.Test; + +public class HubspotOAuthFlowIntegrationTest extends OAuthFlowIntegrationTest { + + @Override + protected Path getCredentialsPath() { + return Path.of("secrets/hubspot.json"); + } + + @Override + protected OAuthFlowImplementation getFlowObject(ConfigRepository configRepository) { + return new HubspotOAuthFlow(configRepository); + } + + @Test + public void testFullOAuthFlow() throws InterruptedException, ConfigNotFoundException, IOException, JsonValidationException { + int limit = 100; + final UUID workspaceId = UUID.randomUUID(); + final UUID definitionId = UUID.randomUUID(); + final String fullConfigAsString = new String(Files.readAllBytes(getCredentialsPath())); + final JsonNode credentialsJson = Jsons.deserialize(fullConfigAsString); + when(configRepository.listSourceOAuthParam()).thenReturn(List.of(new SourceOAuthParameter() + .withOauthParameterId(UUID.randomUUID()) + .withSourceDefinitionId(definitionId) + .withWorkspaceId(workspaceId) + .withConfiguration(Jsons.jsonNode(ImmutableMap.builder() + .put("client_id", credentialsJson.get("credentials").get("client_id").asText()) + .put("client_secret", credentialsJson.get("credentials").get("client_secret").asText()) + .build())))); + var flowObject = getFlowObject(configRepository); + final String url = flowObject.getSourceConsentUrl(workspaceId, definitionId, REDIRECT_URL); + LOGGER.info("Waiting for user consent at: {}", url); + // TODO: To automate, start a selenium job to navigate to the Consent URL and click on allowing + // access... + while (!serverHandler.isSucceeded() && limit > 0) { + Thread.sleep(1000); + limit -= 1; + } + assertTrue(serverHandler.isSucceeded(), "Failed to get User consent on time"); + final Map params = flowObject.completeSourceOAuth(workspaceId, definitionId, + Map.of("code", serverHandler.getParamValue()), REDIRECT_URL); + LOGGER.info("Response from completing OAuth Flow is: {}", params.toString()); + assertTrue(params.containsKey("credentials")); + final Map credentials = (Map) params.get("credentials"); + assertTrue(credentials.containsKey("refresh_token")); + assertTrue(credentials.get("refresh_token").toString().length() > 0); + assertTrue(credentials.containsKey("access_token")); + assertTrue(credentials.get("access_token").toString().length() > 0); + } + +} diff --git a/airbyte-oauth/src/test-integration/java/io/airbyte/oauth/flows/OAuthFlowIntegrationTest.java b/airbyte-oauth/src/test-integration/java/io/airbyte/oauth/flows/OAuthFlowIntegrationTest.java index c2d64d6c2e154..d9124f645fd60 100644 --- a/airbyte-oauth/src/test-integration/java/io/airbyte/oauth/flows/OAuthFlowIntegrationTest.java +++ b/airbyte-oauth/src/test-integration/java/io/airbyte/oauth/flows/OAuthFlowIntegrationTest.java @@ -38,13 +38,15 @@ public abstract class OAuthFlowIntegrationTest { protected HttpServer server; protected ServerHandler serverHandler; - protected abstract Path get_credentials_path(); + protected Path getCredentialsPath() { + return Path.of("secrets/config.json"); + }; protected abstract OAuthFlowImplementation getFlowObject(ConfigRepository configRepository); @BeforeEach public void setup() throws IOException { - if (!Files.exists(get_credentials_path())) { + if (!Files.exists(getCredentialsPath())) { throw new IllegalStateException( "Must provide path to a oauth credentials file."); } diff --git a/airbyte-oauth/src/test/java/io/airbyte/oauth/flows/HubspotOAuthFlowTest.java b/airbyte-oauth/src/test/java/io/airbyte/oauth/flows/HubspotOAuthFlowTest.java new file mode 100644 index 0000000000000..e18f83864e26b --- /dev/null +++ b/airbyte-oauth/src/test/java/io/airbyte/oauth/flows/HubspotOAuthFlowTest.java @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.oauth.flows; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import com.google.common.collect.ImmutableMap; +import io.airbyte.commons.json.Jsons; +import io.airbyte.config.SourceOAuthParameter; +import io.airbyte.config.persistence.ConfigNotFoundException; +import io.airbyte.config.persistence.ConfigRepository; +import io.airbyte.validation.json.JsonValidationException; +import java.io.IOException; +import java.net.http.HttpClient; +import java.net.http.HttpResponse; +import java.util.List; +import java.util.Map; +import java.util.UUID; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +public class HubspotOAuthFlowTest { + + private UUID workspaceId; + private UUID definitionId; + private ConfigRepository configRepository; + private HubspotOAuthFlow flow; + private HttpClient httpClient; + + private static final String REDIRECT_URL = "https://airbyte.io"; + + private static String getConstantState() { + return "state"; + } + + @BeforeEach + public void setup() throws IOException, JsonValidationException { + workspaceId = UUID.randomUUID(); + definitionId = UUID.randomUUID(); + configRepository = mock(ConfigRepository.class); + httpClient = mock(HttpClient.class); + when(configRepository.listSourceOAuthParam()).thenReturn(List.of(new SourceOAuthParameter() + .withOauthParameterId(UUID.randomUUID()) + .withSourceDefinitionId(definitionId) + .withWorkspaceId(workspaceId) + .withConfiguration(Jsons.jsonNode(ImmutableMap.builder() + .put("client_id", "test_client_id") + .put("client_secret", "test_client_secret") + .build())))); + flow = new HubspotOAuthFlow(configRepository, httpClient, HubspotOAuthFlowTest::getConstantState); + + } + + @Test + public void testGetSourceConcentUrl() throws IOException, ConfigNotFoundException { + final String concentUrl = + flow.getSourceConsentUrl(workspaceId, definitionId, REDIRECT_URL); + assertEquals(concentUrl, + "https://app.hubspot.com/oauth/authorize?client_id=test_client_id&redirect_uri=https%3A%2F%2Fairbyte.io&state=state&scopes=content+crm.schemas.deals.read+crm.objects.owners.read+forms+tickets+e-commerce+crm.objects.companies.read+crm.lists.read+crm.objects.deals.read+crm.schemas.contacts.read+crm.objects.contacts.read+crm.schemas.companies.read+files+forms-uploaded-files+files.ui_hidden.read"); + } + + @Test + public void testCompleteSourceOAuth() throws IOException, InterruptedException, ConfigNotFoundException { + final var response = mock(HttpResponse.class); + var returnedCredentials = "{\"refresh_token\":\"refresh_token_response\"}"; + when(response.body()).thenReturn(returnedCredentials); + when(httpClient.send(any(), any())).thenReturn(response); + final Map queryParams = Map.of("code", "test_code"); + final Map actualQueryParams = + flow.completeSourceOAuth(workspaceId, definitionId, queryParams, REDIRECT_URL); + assertEquals(Jsons.serialize(Map.of("credentials", Jsons.deserialize(returnedCredentials))), Jsons.serialize(actualQueryParams)); + } + +} From 23f3d3e597c7d15e6207e07f698f2e938b58cafb Mon Sep 17 00:00:00 2001 From: Vadym Date: Fri, 5 Nov 2021 20:40:22 +0200 Subject: [PATCH 57/83] Source Facebook Marketing: Fix number and integer fields in schemas (#7605) * Add job retry logics to AdsInsights stream. * Add ad_creatives.thumbnail_url to ignored_fields in full_refresh SAT * Update wait_for_job condition --- .../acceptance-test-config.yml | 3 +- .../source_facebook_marketing/streams.py | 36 ++++++++++++------- 2 files changed, 26 insertions(+), 13 deletions(-) diff --git a/airbyte-integrations/connectors/source-facebook-marketing/acceptance-test-config.yml b/airbyte-integrations/connectors/source-facebook-marketing/acceptance-test-config.yml index 4ec24438c10d0..d426101a5ef5f 100644 --- a/airbyte-integrations/connectors/source-facebook-marketing/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-facebook-marketing/acceptance-test-config.yml @@ -29,4 +29,5 @@ tests: # Because one read response contains this metric, and other doesn't. # Therefore, it's needed to ignore fields like this in API responses. ignored_fields: - "ads_insights_age_and_gender": ["cost_per_estimated_ad_recallers"] + "ads_insights_age_and_gender": ["cost_per_estimated_ad_recallers"] + "ad_creatives": ["thumbnail_url"] diff --git a/airbyte-integrations/connectors/source-facebook-marketing/source_facebook_marketing/streams.py b/airbyte-integrations/connectors/source-facebook-marketing/source_facebook_marketing/streams.py index cb45532f32deb..315e19628ea48 100644 --- a/airbyte-integrations/connectors/source-facebook-marketing/source_facebook_marketing/streams.py +++ b/airbyte-integrations/connectors/source-facebook-marketing/source_facebook_marketing/streams.py @@ -290,6 +290,9 @@ class AdsInsights(FBMarketingIncrementalStream): action_attribution_windows = ALL_ACTION_ATTRIBUTION_WINDOWS time_increment = 1 + running_jobs = deque() + times_job_restarted = {} + breakdowns = [] def __init__( @@ -327,7 +330,7 @@ def read_records( stream_state: Mapping[str, Any] = None, ) -> Iterable[Mapping[str, Any]]: """Waits for current job to finish (slice) and yield its result""" - result = self.wait_for_job(stream_slice["job"]) + result = self.wait_for_job(stream_slice["job"], stream_state=stream_state) # because we query `lookback_window` days before actual cursor we might get records older then cursor for obj in result.get_result(): @@ -341,20 +344,19 @@ def stream_slices(self, stream_state: Mapping[str, Any] = None, **kwargs) -> Ite 3. we shouldn't proceed to consumption of the next job before previous succeed """ stream_state = stream_state or {} - running_jobs = deque() date_ranges = list(self._date_ranges(stream_state=stream_state)) for params in date_ranges: params = deep_merge(params, self.request_params(stream_state=stream_state)) job = self._create_insights_job(params) - running_jobs.append(job) - if len(running_jobs) >= self.MAX_ASYNC_JOBS: - yield {"job": running_jobs.popleft()} + self.running_jobs.append(job) + if len(self.running_jobs) >= self.MAX_ASYNC_JOBS: + yield {"job": self.running_jobs.popleft()} - while running_jobs: - yield {"job": running_jobs.popleft()} + while self.running_jobs: + yield {"job": self.running_jobs.popleft()} @backoff_policy - def wait_for_job(self, job) -> AdReportRun: + def wait_for_job(self, job, stream_state: Mapping[str, Any] = None) -> AdReportRun: factor = 2 start_time = pendulum.now() sleep_seconds = factor @@ -367,10 +369,20 @@ def wait_for_job(self, job) -> AdReportRun: if job["async_status"] == "Job Completed": return job - elif job["async_status"] == "Job Failed": - raise JobTimeoutException(f"AdReportRun {job} failed after {runtime.in_seconds()} seconds.") - elif job["async_status"] == "Job Skipped": - raise JobTimeoutException(f"AdReportRun {job} skipped after {runtime.in_seconds()} seconds.") + elif job["async_status"] in ["Job Failed", "Job Skipped"]: + time_range = (job["date_start"], job["date_stop"]) + if self.times_job_restarted.get(time_range, 0) < 6: + params = deep_merge( + {"time_range": {"since": job["date_start"], "until": job["date_stop"]}}, + self.request_params(stream_state=stream_state), + ) + restart_job = self._create_insights_job(params) + self.running_jobs.append(restart_job) + self.times_job_restarted[time_range] += 1 + elif job["async_status"] == "Job Failed": + raise JobTimeoutException(f"AdReportRun {job} failed after {runtime.in_seconds()} seconds.") + elif job["async_status"] == "Job Skipped": + raise JobTimeoutException(f"AdReportRun {job} skipped after {runtime.in_seconds()} seconds.") if runtime > self.MAX_WAIT_TO_START and job_progress_pct == 0: raise JobTimeoutException( From 4e17fa21a5a4571cf25407e2423c8b59af615cca Mon Sep 17 00:00:00 2001 From: Benoit Moriceau Date: Fri, 5 Nov 2021 11:58:23 -0700 Subject: [PATCH 58/83] Bmoric/remove docker compose for build (#7500) This making the build using a gradle plugin instead of using docker-compose build. It aims to make the build to be more incremental as described in #7306 Building the docker image don't rely on docker-compose anymore. The docker build step is isolated into a dedicated folder (in order to make sure that gradle plugin don't recompute the build of the docker container) Gradle is responsible for copying the files that docker needs to build its image. That removes the need of having a dockerignore file. This might not be effective until #7539 is solved. --- .bumpversion.cfg | 2 + .github/workflows/gradle.yml | 4 +- airbyte-cli/build.gradle | 6 +- airbyte-config/init/.dockerignore | 3 - airbyte-config/init/Dockerfile | 2 +- airbyte-config/init/build.gradle | 11 +++ airbyte-db/lib/.dockerignore | 2 - airbyte-db/lib/Dockerfile | 2 +- airbyte-db/lib/build.gradle | 13 ++++ airbyte-migration/.dockerignore | 3 - airbyte-migration/Dockerfile | 2 +- airbyte-migration/build.gradle | 13 ++++ .../java/io/airbyte/oauth/BaseOAuthFlow.java | 1 - airbyte-scheduler/app/.dockerignore | 3 - airbyte-scheduler/app/Dockerfile | 2 +- airbyte-scheduler/app/build.gradle | 14 ++++ airbyte-server/.dockerignore | 3 - airbyte-server/Dockerfile | 2 +- airbyte-server/build.gradle | 14 ++++ airbyte-webapp/.dockerignore | 4 - airbyte-webapp/Dockerfile | 8 +- airbyte-webapp/build.gradle | 31 +++++++- airbyte-workers/Dockerfile | 2 +- airbyte-workers/build.gradle | 14 ++++ build.gradle | 75 ++++++++++--------- docs/SUMMARY.md | 1 + .../developing-on-docker.md | 43 +++++++++++ docs/project-overview/changelog/README.md | 3 +- .../acceptance_test_kube_gke.sh | 2 +- tools/bin/release_version.sh | 2 +- 30 files changed, 210 insertions(+), 77 deletions(-) delete mode 100644 airbyte-config/init/.dockerignore delete mode 100644 airbyte-db/lib/.dockerignore delete mode 100644 airbyte-migration/.dockerignore delete mode 100644 airbyte-scheduler/app/.dockerignore delete mode 100644 airbyte-server/.dockerignore delete mode 100644 airbyte-webapp/.dockerignore create mode 100644 docs/contributing-to-airbyte/developing-on-docker.md diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 5b463ca898bed..e9bda52d54a98 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -10,6 +10,8 @@ serialize = [bumpversion:file:.env] +[bumpversion:file:airbyte-migration/Dockerfile] + [bumpversion:file:airbyte-server/Dockerfile] [bumpversion:file:airbyte-workers/Dockerfile] diff --git a/.github/workflows/gradle.yml b/.github/workflows/gradle.yml index 87ff147195de7..174a134cca29b 100644 --- a/.github/workflows/gradle.yml +++ b/.github/workflows/gradle.yml @@ -339,7 +339,7 @@ jobs: EOF - name: Build Platform Docker Images - run: SUB_BUILD=PLATFORM ./gradlew --no-daemon composebuild --scan + run: SUB_BUILD=PLATFORM ./gradlew --no-daemon assemble --scan - name: Run End-to-End Frontend Tests run: ./tools/bin/e2e_test.sh @@ -457,7 +457,7 @@ jobs: HOME: /home/runner - name: Build Platform Docker Images - run: SUB_BUILD=PLATFORM ./gradlew composeBuild --scan + run: SUB_BUILD=PLATFORM ./gradlew assemble --scan - name: Run Kubernetes End-to-End Acceptance Tests env: diff --git a/airbyte-cli/build.gradle b/airbyte-cli/build.gradle index 3388338f60a7a..4cccd9d4f4018 100644 --- a/airbyte-cli/build.gradle +++ b/airbyte-cli/build.gradle @@ -1,3 +1,3 @@ -plugins { - id "airbyte-docker" -} +Task dockerBuildTask = getDockerBuildTask("cli", "$project.projectDir") +dockerBuildTask.dependsOn(copyDocker) +assemble.dependsOn(dockerBuildTask) diff --git a/airbyte-config/init/.dockerignore b/airbyte-config/init/.dockerignore deleted file mode 100644 index 5ad9d43099b67..0000000000000 --- a/airbyte-config/init/.dockerignore +++ /dev/null @@ -1,3 +0,0 @@ -* -!src -!scripts diff --git a/airbyte-config/init/Dockerfile b/airbyte-config/init/Dockerfile index afba972dedf19..3144a772ef4ef 100644 --- a/airbyte-config/init/Dockerfile +++ b/airbyte-config/init/Dockerfile @@ -4,4 +4,4 @@ WORKDIR /app # the sole purpose of this image is to seed the data volume with the default data # that the app should have when it is first installed. -COPY scripts scripts +COPY bin/scripts scripts diff --git a/airbyte-config/init/build.gradle b/airbyte-config/init/build.gradle index c7117fd16ea78..05bfa389043cd 100644 --- a/airbyte-config/init/build.gradle +++ b/airbyte-config/init/build.gradle @@ -11,3 +11,14 @@ dependencies { implementation project(':airbyte-commons-docker') implementation project(':airbyte-json-validation') } + +task copyScripts(type: Copy) { + dependsOn copyDocker + + from('scripts') + into 'build/docker/bin/scripts' +} + +Task dockerBuildTask = getDockerBuildTask("init", "$project.projectDir") +dockerBuildTask.dependsOn(copyScripts) +assemble.dependsOn(dockerBuildTask) diff --git a/airbyte-db/lib/.dockerignore b/airbyte-db/lib/.dockerignore deleted file mode 100644 index 7a1eba35d5be1..0000000000000 --- a/airbyte-db/lib/.dockerignore +++ /dev/null @@ -1,2 +0,0 @@ -* -!src diff --git a/airbyte-db/lib/Dockerfile b/airbyte-db/lib/Dockerfile index a2feafc85de83..a9f42ce521ba1 100644 --- a/airbyte-db/lib/Dockerfile +++ b/airbyte-db/lib/Dockerfile @@ -1,3 +1,3 @@ FROM postgres:13-alpine -COPY src/main/resources/init.sql /docker-entrypoint-initdb.d/000_init.sql +COPY bin/init.sql /docker-entrypoint-initdb.d/000_init.sql diff --git a/airbyte-db/lib/build.gradle b/airbyte-db/lib/build.gradle index 1d3d5dd2d3312..5be6ca9a1e8e9 100644 --- a/airbyte-db/lib/build.gradle +++ b/airbyte-db/lib/build.gradle @@ -69,3 +69,16 @@ task(dumpJobsSchema, dependsOn: 'classes', type: JavaExec) { classpath = sourceSets.main.runtimeClasspath args 'jobs', 'dump_schema' } + +task copyInitSql(type: Copy) { + dependsOn copyDocker + + from('src/main/resources') { + include 'init.sql' + } + into 'build/docker/bin' +} + +Task dockerBuildTask = getDockerBuildTask("db", "$project.projectDir") +dockerBuildTask.dependsOn(copyInitSql) +assemble.dependsOn(dockerBuildTask) diff --git a/airbyte-migration/.dockerignore b/airbyte-migration/.dockerignore deleted file mode 100644 index 65c7d0ad3e73c..0000000000000 --- a/airbyte-migration/.dockerignore +++ /dev/null @@ -1,3 +0,0 @@ -* -!Dockerfile -!build diff --git a/airbyte-migration/Dockerfile b/airbyte-migration/Dockerfile index 8a657e07ee5bb..1fe50c4560c60 100644 --- a/airbyte-migration/Dockerfile +++ b/airbyte-migration/Dockerfile @@ -6,7 +6,7 @@ ENV APPLICATION airbyte-migration WORKDIR /app # Move and run scheduler -COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar +COPY bin/${APPLICATION}-0.30.31-alpha.tar ${APPLICATION}.tar RUN tar xf ${APPLICATION}.tar --strip-components=1 diff --git a/airbyte-migration/build.gradle b/airbyte-migration/build.gradle index ac752cf739c7d..1a9504dec522b 100644 --- a/airbyte-migration/build.gradle +++ b/airbyte-migration/build.gradle @@ -15,3 +15,16 @@ application { mainClass = 'io.airbyte.migrate.MigrationRunner' } +task copyGeneratedTar(type: Copy) { + dependsOn distTar + dependsOn copyDocker + + from('build/distributions') { + include 'airbyte-migration-*.tar' + } + into 'build/docker/bin' +} + +Task dockerBuildTask = getDockerBuildTask("migration", "$project.projectDir") +dockerBuildTask.dependsOn(copyGeneratedTar) +assemble.dependsOn(dockerBuildTask) diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/BaseOAuthFlow.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/BaseOAuthFlow.java index ebf3346b2c0e5..9fb0a22a76250 100644 --- a/airbyte-oauth/src/main/java/io/airbyte/oauth/BaseOAuthFlow.java +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/BaseOAuthFlow.java @@ -26,7 +26,6 @@ import java.util.function.Function; import java.util.function.Supplier; import org.apache.commons.lang3.RandomStringUtils; -import org.apache.http.client.utils.URIBuilder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/airbyte-scheduler/app/.dockerignore b/airbyte-scheduler/app/.dockerignore deleted file mode 100644 index 65c7d0ad3e73c..0000000000000 --- a/airbyte-scheduler/app/.dockerignore +++ /dev/null @@ -1,3 +0,0 @@ -* -!Dockerfile -!build diff --git a/airbyte-scheduler/app/Dockerfile b/airbyte-scheduler/app/Dockerfile index f4cb9b7d537ad..c603b3d9e8034 100644 --- a/airbyte-scheduler/app/Dockerfile +++ b/airbyte-scheduler/app/Dockerfile @@ -5,7 +5,7 @@ ENV APPLICATION airbyte-scheduler WORKDIR /app -ADD build/distributions/${APPLICATION}-0.30.31-alpha.tar /app +ADD bin/${APPLICATION}-0.30.31-alpha.tar /app # wait for upstream dependencies to become available before starting server ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.31-alpha/bin/${APPLICATION}"] diff --git a/airbyte-scheduler/app/build.gradle b/airbyte-scheduler/app/build.gradle index d40206efb3ae9..371137316379c 100644 --- a/airbyte-scheduler/app/build.gradle +++ b/airbyte-scheduler/app/build.gradle @@ -57,3 +57,17 @@ run { environment "TEMPORAL_HOST", "localhost:7233" } + +task copyGeneratedTar(type: Copy) { + dependsOn copyDocker + dependsOn distTar + + from('build/distributions') { + include 'airbyte-scheduler-*.tar' + } + into 'build/docker/bin' +} + +Task dockerBuildTask = getDockerBuildTask("scheduler", "$project.projectDir") +dockerBuildTask.dependsOn(copyGeneratedTar) +assemble.dependsOn(dockerBuildTask) diff --git a/airbyte-server/.dockerignore b/airbyte-server/.dockerignore deleted file mode 100644 index 65c7d0ad3e73c..0000000000000 --- a/airbyte-server/.dockerignore +++ /dev/null @@ -1,3 +0,0 @@ -* -!Dockerfile -!build diff --git a/airbyte-server/Dockerfile b/airbyte-server/Dockerfile index ec7aefe8c4c11..56ccd6192bcde 100644 --- a/airbyte-server/Dockerfile +++ b/airbyte-server/Dockerfile @@ -7,7 +7,7 @@ ENV APPLICATION airbyte-server WORKDIR /app -ADD build/distributions/${APPLICATION}-0.30.31-alpha.tar /app +ADD bin/${APPLICATION}-0.30.31-alpha.tar /app # wait for upstream dependencies to become available before starting server ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.31-alpha/bin/${APPLICATION}"] diff --git a/airbyte-server/build.gradle b/airbyte-server/build.gradle index a38db0edc5dbe..a5ce58eb79463 100644 --- a/airbyte-server/build.gradle +++ b/airbyte-server/build.gradle @@ -127,3 +127,17 @@ run { environment "AIRBYTE_ROLE", System.getenv('AIRBYTE_ROLE') environment "TEMPORAL_HOST", "localhost:7233" } + +task copyGeneratedTar(type: Copy) { + dependsOn copyDocker + dependsOn distTar + + from('build/distributions') { + include 'airbyte-server-*.tar' + } + into 'build/docker/bin' +} + +Task dockerBuildTask = getDockerBuildTask("server", "$project.projectDir") +dockerBuildTask.dependsOn(copyGeneratedTar) +assemble.dependsOn(dockerBuildTask) diff --git a/airbyte-webapp/.dockerignore b/airbyte-webapp/.dockerignore deleted file mode 100644 index b284b9daeb6c5..0000000000000 --- a/airbyte-webapp/.dockerignore +++ /dev/null @@ -1,4 +0,0 @@ -* -!Dockerfile -!build -!nginx diff --git a/airbyte-webapp/Dockerfile b/airbyte-webapp/Dockerfile index e1054ff5154fa..1eead5631b6fb 100644 --- a/airbyte-webapp/Dockerfile +++ b/airbyte-webapp/Dockerfile @@ -2,8 +2,6 @@ FROM nginx:1.19-alpine as webapp EXPOSE 80 -COPY build/docs docs/ -# docs get copied twice because npm gradle plugin ignores output dir. -COPY build /usr/share/nginx/html -RUN rm -rf /usr/share/nginx/html/docs -COPY nginx/default.conf.template /etc/nginx/templates/default.conf.template +COPY bin/docs docs/ +COPY bin/build /usr/share/nginx/html +COPY bin/nginx/default.conf.template /etc/nginx/templates/default.conf.template diff --git a/airbyte-webapp/build.gradle b/airbyte-webapp/build.gradle index 1ea5a0ed0dbc8..886827b844947 100644 --- a/airbyte-webapp/build.gradle +++ b/airbyte-webapp/build.gradle @@ -32,11 +32,38 @@ task test(type: NpmTask) { assemble.dependsOn npm_run_build build.finalizedBy test +task copyBuild(type: Copy) { + dependsOn copyDocker + + from "${project.projectDir}/build" + into "build/docker/bin/build" + exclude ".docker" + exclude "docker" +} + task copyDocs(type: Copy) { - from "${System.getProperty("user.dir")}/docs/integrations" - into "${buildDir}/docs/integrations" + dependsOn copyDocker + + from "${project.rootProject.projectDir}/docs/integrations" + into "build/docker/bin/docs/integrations" duplicatesStrategy DuplicatesStrategy.INCLUDE } +task copyNginx(type: Copy) { + dependsOn copyDocker + + from "${project.projectDir}/nginx" + into "build/docker/bin/nginx" +} + +copyBuild.dependsOn npm_run_build +copyNginx.dependsOn npm_run_build copyDocs.dependsOn npm_run_build assemble.dependsOn copyDocs +copyDocker.dependsOn(npm_run_build) + +Task dockerBuildTask = getDockerBuildTask("webapp", "$project.projectDir") +dockerBuildTask.dependsOn(copyBuild) +dockerBuildTask.dependsOn(copyNginx) +dockerBuildTask.dependsOn(copyDocs) +assemble.dependsOn(dockerBuildTask) diff --git a/airbyte-workers/Dockerfile b/airbyte-workers/Dockerfile index 030bccd8f1f27..92301b8816437 100644 --- a/airbyte-workers/Dockerfile +++ b/airbyte-workers/Dockerfile @@ -23,7 +23,7 @@ ENV APPLICATION airbyte-workers WORKDIR /app # Move worker app -ADD build/distributions/${APPLICATION}-0.30.31-alpha.tar /app +ADD bin/${APPLICATION}-0.30.31-alpha.tar /app # wait for upstream dependencies to become available before starting server ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.31-alpha/bin/${APPLICATION}"] diff --git a/airbyte-workers/build.gradle b/airbyte-workers/build.gradle index b1a2f9a38afd5..f461116ce2d70 100644 --- a/airbyte-workers/build.gradle +++ b/airbyte-workers/build.gradle @@ -62,3 +62,17 @@ application { mainClass = mainClassName applicationDefaultJvmArgs = ['-XX:MaxRAMPercentage=75.0'] } + +task copyGeneratedTar(type: Copy) { + dependsOn copyDocker + dependsOn distTar + + from('build/distributions') { + include 'airbyte-workers-*.tar' + } + into 'build/docker/bin' +} + +Task dockerBuildTask = getDockerBuildTask("worker", "$project.projectDir") +dockerBuildTask.dependsOn(copyGeneratedTar) +assemble.dependsOn(dockerBuildTask) diff --git a/build.gradle b/build.gradle index 39d9957cd26b1..39b0b9e2f22d9 100644 --- a/build.gradle +++ b/build.gradle @@ -1,3 +1,16 @@ +import com.bmuschko.gradle.docker.tasks.image.DockerBuildImage + +buildscript { + repositories { + maven { + url "https://plugins.gradle.org/m2/" + } + } + dependencies { + classpath 'com.bmuschko:gradle-docker-plugin:7.1.0' + } +} + plugins { id 'base' id 'pmd' @@ -112,27 +125,36 @@ spotless { } check.dependsOn 'spotlessApply' +@SuppressWarnings('GroovyAssignabilityCheck') +def Task getDockerBuildTask(String artifactName, String projectDir) { + return task ("buildDockerImage-$artifactName" (type: DockerBuildImage) { + def buildTag = System.getenv('VERSION') ?: 'dev' + def buildPlatform = System.getenv('DOCKER_BUILD_PLATFORM') ?: 'linux/amd64' + def jdkVersion = System.getenv('JDK_VERSION') ?: '14.0.2' + def buildArch = System.getenv('DOCKER_BUILD_ARCH') ?: 'amd64' + + inputDir = file("$projectDir/build/docker") + platform = buildPlatform + images.add("airbyte/$artifactName:$buildTag") + buildArgs.put('JDK_VERSION', jdkVersion) + buildArgs.put('DOCKER_BUILD_ARCH', buildArch) + }) +} + allprojects { - apply plugin: 'base' + apply plugin: 'com.bmuschko.docker-remote-api' - afterEvaluate { project -> - def composeDeps = [ - ":airbyte-config:init", - ":airbyte-db:lib", - ":airbyte-migration", - ":airbyte-scheduler:app", - ":airbyte-workers", - ":airbyte-server", - ":airbyte-webapp", - ].toSet().asImmutable() - - if (project.getPath() in composeDeps) { - composeBuild.dependsOn(project.getPath() + ':assemble') - } + task copyDocker(type: Copy) { + delete "build/docker" + + from "${project.projectDir}/Dockerfile" + into "build/docker/" } } allprojects { + apply plugin: 'base' + // by default gradle uses directory as the project name. That works very well in a single project environment but // projects clobber each other in an environments with subprojects when projects are in directories named identically. def sub = rootDir.relativePath(projectDir.parentFile).replace('/', '.') @@ -234,6 +256,7 @@ subprojects { testImplementation 'org.junit.jupiter:junit-jupiter-params:5.7.2' testImplementation 'org.mockito:mockito-junit-jupiter:3.12.4' testImplementation 'org.assertj:assertj-core:3.21.0' + } tasks.withType(Tar) { @@ -245,28 +268,6 @@ subprojects { } } -task composeBuild { - def buildTag = System.getenv('VERSION') ?: 'dev' - def buildPlatform = System.getenv('DOCKER_BUILD_PLATFORM') ?: 'linux/amd64' - def buildArch = System.getenv('DOCKER_BUILD_ARCH') ?: 'amd64' - def jdkVersion = System.getenv('JDK_VERSION') ?: '14.0.2' - def dockerComposeFile = buildArch == 'arm64' ? 'docker-compose.build-m1.yaml' : 'docker-compose.build.yaml' - doFirst { - exec { - workingDir rootDir - commandLine 'docker-compose', '-f', dockerComposeFile, 'build', '--parallel', '--quiet' - environment 'VERSION', buildTag - environment 'DOCKER_BUILD_PLATFORM', buildPlatform - environment 'DOCKER_BUILD_ARCH', buildArch - environment 'JDK_VERSION', jdkVersion - } - } -} - -if (!System.getenv().containsKey("SUB_BUILD") || System.getenv().get("SUB_BUILD") == "PLATFORM") { - build.dependsOn(composeBuild) -} - task('generate') { dependsOn subprojects.collect { it.getTasksByName('generateProtocolClassFiles', true) } dependsOn subprojects.collect { it.getTasksByName('generateJsonSchema2Pojo', true) } diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 44cfa4f835ba0..b8040b0007076 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -197,6 +197,7 @@ * [Contributing to Airbyte](contributing-to-airbyte/README.md) * [Code of Conduct](contributing-to-airbyte/code-of-conduct.md) * [Developing Locally](contributing-to-airbyte/developing-locally.md) + * [Developing on Docker](contributing-to-airbyte/developing-on-docker.md) * [Developing on Kubernetes](contributing-to-airbyte/developing-on-kubernetes.md) * [Monorepo Python Development](contributing-to-airbyte/monorepo-python-development.md) * [Code Style](contributing-to-airbyte/code-style.md) diff --git a/docs/contributing-to-airbyte/developing-on-docker.md b/docs/contributing-to-airbyte/developing-on-docker.md new file mode 100644 index 0000000000000..34f225c8b0172 --- /dev/null +++ b/docs/contributing-to-airbyte/developing-on-docker.md @@ -0,0 +1,43 @@ +# Developing on docker + +## Incrementality + +The docker build is fully incremental for the platform build, which means that it will only build an image if it is needed. We need to keep it that +way. +A task generator, `getDockerBuildTask`, is available for building a docker image for any given module. Behind the scene, it will generate a +task which will run the build of a docker image in a specific folder. The goal is to make sure that we have an isolated +context which helps with incrementality. All files that need to be present in the docker image will need to be copy into this folder. The generate +method takes 2 arguments: +- The image name, for example if `foo` is given as an image name, the image `airbyte/foo` will be created +- The project directory folder. It is needed because the `getDockerBuildTask` is declared in the rootProject + +## Adding a new docker build + +Once you have a `Dockerfile`, generating the docker image is done in the following way: +- specify the artifact name and the project directory, +- make sure that the Dockerfile is properly copied to the docker context dir before building the image +- make the build docker task to depend on the `assemble` task. + +For example: +```groovy +Task dockerBuildTask = getDockerBuildTask("cli", project.projectDir) +dockerBuildTask.dependsOn(copyDocker) +assemble.dependsOn(dockerBuildTask) +``` + +If you need to add files in your image you need to copy them in `build/docker/bin` first. The need to happen after the `copyDocker` task. +The `copyDocker` task clean up the `build/docker` folder as a first step. + +For example: +```groovy +task copyScripts(type: Copy) { + dependsOn copyDocker + + from('scripts') + into 'build/docker/bin/scripts' +} + +Task dockerBuildTask = getDockerBuildTask("init", project.projectDir) +dockerBuildTask.dependsOn(copyScripts) +assemble.dependsOn(dockerBuildTask) +``` diff --git a/docs/project-overview/changelog/README.md b/docs/project-overview/changelog/README.md index 86c8266c239b0..6729332c0004b 100644 --- a/docs/project-overview/changelog/README.md +++ b/docs/project-overview/changelog/README.md @@ -257,7 +257,8 @@ Airbyte is comprised of 2 parts: The "production" version of Airbyte is the version of the app specified in `.env`. With each production release, we update the version in the `.env` file. This version will always be available for download on DockerHub. It is the version of the app that runs when a user runs `docker-compose up`. -The "development" version of Airbyte is the head of master branch. It is the version of the app that runs when a user runs `./gradlew composeBuild && VERSION=dev docker compose up`. +The "development" version of Airbyte is the head of master branch. It is the version of the app that runs when a user runs `./gradlew build && +VERSION=dev docker compose up`. ### Production Release Schedule diff --git a/tools/bin/gke-kube-acceptance-test/acceptance_test_kube_gke.sh b/tools/bin/gke-kube-acceptance-test/acceptance_test_kube_gke.sh index f796e94bb5fa8..85ee8e4c517bd 100755 --- a/tools/bin/gke-kube-acceptance-test/acceptance_test_kube_gke.sh +++ b/tools/bin/gke-kube-acceptance-test/acceptance_test_kube_gke.sh @@ -14,7 +14,7 @@ TAG=$(openssl rand -hex 12) echo "Tag" $TAG docker login -u airbytebot -p $DOCKER_PASSWORD -VERSION=$TAG ./gradlew composeBuild +VERSION=$TAG ./gradlew build VERSION=$TAG docker-compose -f docker-compose.build.yaml push # For running on Mac diff --git a/tools/bin/release_version.sh b/tools/bin/release_version.sh index 5cc3d245fcd1c..1ae49cddacc13 100755 --- a/tools/bin/release_version.sh +++ b/tools/bin/release_version.sh @@ -38,7 +38,7 @@ GIT_REVISION=$(git rev-parse HEAD) echo "Bumped version from ${PREV_VERSION} to ${NEW_VERSION}" echo "Building and publishing version $NEW_VERSION for git revision $GIT_REVISION..." -SUB_BUILD=PLATFORM ./gradlew clean composeBuild +SUB_BUILD=PLATFORM ./gradlew clean build SUB_BUILD=PLATFORM ./gradlew publish VERSION=$NEW_VERSION GIT_REVISION=$GIT_REVISION docker-compose -f docker-compose.build.yaml build VERSION=$NEW_VERSION GIT_REVISION=$GIT_REVISION docker-compose -f docker-compose.build.yaml push From 497858d6eae66d862d3e7ffb24718676f5503506 Mon Sep 17 00:00:00 2001 From: Vadym Date: Fri, 5 Nov 2021 21:03:51 +0200 Subject: [PATCH 59/83] Source Facebook Marketing: Bump docker version (#7682) * Bump docker version --- .../e7778cfc-e97c-4458-9ecb-b4f2bba8946c.json | 2 +- .../init/src/main/resources/seed/source_definitions.yaml | 2 +- airbyte-config/init/src/main/resources/seed/source_specs.yaml | 2 +- .../connectors/source-facebook-marketing/Dockerfile | 2 +- docs/integrations/sources/facebook-marketing.md | 1 + 5 files changed, 5 insertions(+), 4 deletions(-) diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/e7778cfc-e97c-4458-9ecb-b4f2bba8946c.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/e7778cfc-e97c-4458-9ecb-b4f2bba8946c.json index da5565d41b5f2..798cb04b32b5a 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/e7778cfc-e97c-4458-9ecb-b4f2bba8946c.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/e7778cfc-e97c-4458-9ecb-b4f2bba8946c.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "e7778cfc-e97c-4458-9ecb-b4f2bba8946c", "name": "Facebook Marketing", "dockerRepository": "airbyte/source-facebook-marketing", - "dockerImageTag": "0.2.21", + "dockerImageTag": "0.2.22", "documentationUrl": "https://docs.airbyte.io/integrations/sources/facebook-marketing", "icon": "facebook.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index ef2b67389ff81..d4f0bfecf1e5b 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -131,7 +131,7 @@ - name: Facebook Marketing sourceDefinitionId: e7778cfc-e97c-4458-9ecb-b4f2bba8946c dockerRepository: airbyte/source-facebook-marketing - dockerImageTag: 0.2.21 + dockerImageTag: 0.2.22 documentationUrl: https://docs.airbyte.io/integrations/sources/facebook-marketing icon: facebook.svg sourceType: api diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index a7157e93bc41c..b28322703efe8 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -1084,7 +1084,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-facebook-marketing:0.2.21" +- dockerImage: "airbyte/source-facebook-marketing:0.2.22" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/facebook-marketing" changelogUrl: "https://docs.airbyte.io/integrations/sources/facebook-marketing" diff --git a/airbyte-integrations/connectors/source-facebook-marketing/Dockerfile b/airbyte-integrations/connectors/source-facebook-marketing/Dockerfile index 16dd6edf9e90d..2a2905c3f229e 100644 --- a/airbyte-integrations/connectors/source-facebook-marketing/Dockerfile +++ b/airbyte-integrations/connectors/source-facebook-marketing/Dockerfile @@ -12,5 +12,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.2.21 +LABEL io.airbyte.version=0.2.22 LABEL io.airbyte.name=airbyte/source-facebook-marketing diff --git a/docs/integrations/sources/facebook-marketing.md b/docs/integrations/sources/facebook-marketing.md index 82cfa1780422f..1a6ef22e57872 100644 --- a/docs/integrations/sources/facebook-marketing.md +++ b/docs/integrations/sources/facebook-marketing.md @@ -96,6 +96,7 @@ As a summary, custom insights allows to replicate only some fields, resulting in | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.2.22 | 2021-11-05 | [4864](https://github.com/airbytehq/airbyte/pull/7605) | Add job retry logics to AdsInsights stream | | 0.2.21 | 2021-10-05 | [4864](https://github.com/airbytehq/airbyte/pull/4864) | Update insights streams with custom entries for fields, breakdowns and action_breakdowns | | 0.2.20 | 2021-10-04 | [6719](https://github.com/airbytehq/airbyte/pull/6719) | Update version of facebook\_bussiness package to 12.0 | | 0.2.19 | 2021-09-30 | [6438](https://github.com/airbytehq/airbyte/pull/6438) | Annotate Oauth2 flow initialization parameters in connector specification | From 1f295f24e65126db1c5d106cf415491c5754e84b Mon Sep 17 00:00:00 2001 From: vitaliizazmic <75620293+vitaliizazmic@users.noreply.github.com> Date: Fri, 5 Nov 2021 21:40:56 +0200 Subject: [PATCH 60/83] =?UTF-8?q?=F0=9F=8E=89=20Source=20Google=20Director?= =?UTF-8?q?y:=20support=20oauth?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Source Google Directory #6265 - add oauth support * Source Google Directory #6265 - update credentials * Source Google Directory #6265 - fixing according to PR * Source Google directory #6265 - update docs * Source Google directory #5190 - update doc * Source Google Directory #6265 - resolve merge conflict * Source Google Directory #6265 - SAT for oauth * Source Google Directory #6265 - bump version and update changelog * Source Google Directory #6265 - bump version and update changelog (update publish) --- .github/workflows/publish-command.yml | 1 + .github/workflows/test-command.yml | 1 + .../d19ae824-e289-4b14-995a-0632eb46d246.json | 2 +- .../resources/seed/source_definitions.yaml | 2 +- .../source-google-directory/Dockerfile | 2 +- .../acceptance-test-config.yml | 8 +- .../invalid_config_oauth.json | 7 ++ .../integration_tests/sample_config.json | 6 +- .../sample_config_oauth.json | 7 ++ .../source_google_directory/api.py | 41 +++++++-- .../source_google_directory/client.py | 7 +- .../source_google_directory/spec.json | 89 ++++++++++++++++--- docs/integrations/sources/google-directory.md | 16 +++- tools/bin/ci_credentials.sh | 1 + 14 files changed, 161 insertions(+), 29 deletions(-) create mode 100644 airbyte-integrations/connectors/source-google-directory/integration_tests/invalid_config_oauth.json create mode 100644 airbyte-integrations/connectors/source-google-directory/integration_tests/sample_config_oauth.json diff --git a/.github/workflows/publish-command.yml b/.github/workflows/publish-command.yml index 9d45d2dc2c2e0..b7b08bdf4d9d9 100644 --- a/.github/workflows/publish-command.yml +++ b/.github/workflows/publish-command.yml @@ -107,6 +107,7 @@ jobs: GOOGLE_ANALYTICS_V4_TEST_CREDS_OLD: ${{ secrets.GOOGLE_ANALYTICS_V4_TEST_CREDS_OLD }} GOOGLE_CLOUD_STORAGE_TEST_CREDS: ${{ secrets.GOOGLE_CLOUD_STORAGE_TEST_CREDS }} GOOGLE_DIRECTORY_TEST_CREDS: ${{ secrets.GOOGLE_DIRECTORY_TEST_CREDS }} + GOOGLE_DIRECTORY_TEST_CREDS_OAUTH: ${{ secrets.GOOGLE_DIRECTORY_TEST_CREDS_OAUTH }} GOOGLE_SEARCH_CONSOLE_CDK_TEST_CREDS: ${{ secrets.GOOGLE_SEARCH_CONSOLE_CDK_TEST_CREDS }} GOOGLE_SEARCH_CONSOLE_CDK_TEST_CREDS_SRV_ACC: ${{ secrets.GOOGLE_SEARCH_CONSOLE_CDK_TEST_CREDS_SRV_ACC }} GOOGLE_SHEETS_TESTS_CREDS: ${{ secrets.GOOGLE_SHEETS_TESTS_CREDS }} diff --git a/.github/workflows/test-command.yml b/.github/workflows/test-command.yml index cf685295ee0af..1490808c33fe7 100644 --- a/.github/workflows/test-command.yml +++ b/.github/workflows/test-command.yml @@ -102,6 +102,7 @@ jobs: GOOGLE_ANALYTICS_V4_TEST_CREDS_OLD: ${{ secrets.GOOGLE_ANALYTICS_V4_TEST_CREDS_OLD }} GOOGLE_CLOUD_STORAGE_TEST_CREDS: ${{ secrets.GOOGLE_CLOUD_STORAGE_TEST_CREDS }} GOOGLE_DIRECTORY_TEST_CREDS: ${{ secrets.GOOGLE_DIRECTORY_TEST_CREDS }} + GOOGLE_DIRECTORY_TEST_CREDS_OAUTH: ${{ secrets.GOOGLE_DIRECTORY_TEST_CREDS_OAUTH }} GOOGLE_SEARCH_CONSOLE_CDK_TEST_CREDS: ${{ secrets.GOOGLE_SEARCH_CONSOLE_CDK_TEST_CREDS }} GOOGLE_SEARCH_CONSOLE_CDK_TEST_CREDS_SRV_ACC: ${{ secrets.GOOGLE_SEARCH_CONSOLE_CDK_TEST_CREDS_SRV_ACC }} GOOGLE_SHEETS_TESTS_CREDS: ${{ secrets.GOOGLE_SHEETS_TESTS_CREDS }} diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/d19ae824-e289-4b14-995a-0632eb46d246.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/d19ae824-e289-4b14-995a-0632eb46d246.json index c15dd270d14b0..5abea4626c530 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/d19ae824-e289-4b14-995a-0632eb46d246.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/d19ae824-e289-4b14-995a-0632eb46d246.json @@ -2,6 +2,6 @@ "sourceDefinitionId": "d19ae824-e289-4b14-995a-0632eb46d246", "name": "Google Directory", "dockerRepository": "airbyte/source-google-directory", - "dockerImageTag": "0.1.6", + "dockerImageTag": "0.1.8", "documentationUrl": "https://docs.airbyte.io/integrations/sources/google-directory" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index d4f0bfecf1e5b..e3ab0f83124c1 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -198,7 +198,7 @@ - name: Google Directory sourceDefinitionId: d19ae824-e289-4b14-995a-0632eb46d246 dockerRepository: airbyte/source-google-directory - dockerImageTag: 0.1.6 + dockerImageTag: 0.1.8 documentationUrl: https://docs.airbyte.io/integrations/sources/google-directory sourceType: api - name: Google Search Console diff --git a/airbyte-integrations/connectors/source-google-directory/Dockerfile b/airbyte-integrations/connectors/source-google-directory/Dockerfile index 995b550e28eba..cc6e9f2f1a690 100644 --- a/airbyte-integrations/connectors/source-google-directory/Dockerfile +++ b/airbyte-integrations/connectors/source-google-directory/Dockerfile @@ -34,5 +34,5 @@ COPY source_google_directory ./source_google_directory ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.6 +LABEL io.airbyte.version=0.1.8 LABEL io.airbyte.name=airbyte/source-google-directory diff --git a/airbyte-integrations/connectors/source-google-directory/acceptance-test-config.yml b/airbyte-integrations/connectors/source-google-directory/acceptance-test-config.yml index fb8a23bcf7aeb..8d3a0596d25d1 100644 --- a/airbyte-integrations/connectors/source-google-directory/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-google-directory/acceptance-test-config.yml @@ -7,16 +7,22 @@ tests: connection: - config_path: "secrets/config.json" status: "succeed" + - config_path: "secrets/config_oauth.json" + status: "succeed" - config_path: "integration_tests/invalid_config.json" status: "failed" + - config_path: "integration_tests/invalid_config_oauth.json" + status: "failed" discovery: - config_path: "secrets/config.json" basic_read: - config_path: "secrets/config.json" configured_catalog_path: "integration_tests/configured_catalog.json" + - config_path: "secrets/config_oauth.json" + configured_catalog_path: "integration_tests/configured_catalog.json" full_refresh: - config_path: "secrets/config.json" configured_catalog_path: "integration_tests/configured_catalog.json" - # API returns different lastLoginTime for some users, eteg is generated on all data and also same time different + # API returns different lastLoginTime for some users, eteg is generated based on all data, so also sometime are different ignored_fields: "users": ["etag", "lastLoginTime"] diff --git a/airbyte-integrations/connectors/source-google-directory/integration_tests/invalid_config_oauth.json b/airbyte-integrations/connectors/source-google-directory/integration_tests/invalid_config_oauth.json new file mode 100644 index 0000000000000..ade2aa38d0116 --- /dev/null +++ b/airbyte-integrations/connectors/source-google-directory/integration_tests/invalid_config_oauth.json @@ -0,0 +1,7 @@ +{ + "credentials": { + "client_id": "", + "client_secret": "", + "refresh_token": "" + } +} diff --git a/airbyte-integrations/connectors/source-google-directory/integration_tests/sample_config.json b/airbyte-integrations/connectors/source-google-directory/integration_tests/sample_config.json index 361b1de29e923..229ddbd5e6043 100644 --- a/airbyte-integrations/connectors/source-google-directory/integration_tests/sample_config.json +++ b/airbyte-integrations/connectors/source-google-directory/integration_tests/sample_config.json @@ -1,4 +1,6 @@ { - "credentials_json": "", - "email": "test@test.test" + "credentials": { + "credentials_json": "", + "email": "test@test.test" + } } diff --git a/airbyte-integrations/connectors/source-google-directory/integration_tests/sample_config_oauth.json b/airbyte-integrations/connectors/source-google-directory/integration_tests/sample_config_oauth.json new file mode 100644 index 0000000000000..baca9caa3c4c2 --- /dev/null +++ b/airbyte-integrations/connectors/source-google-directory/integration_tests/sample_config_oauth.json @@ -0,0 +1,7 @@ +{ + "credentials": { + "client_id": "", + "client_secret": "", + "refresh_token": "" + } +} diff --git a/airbyte-integrations/connectors/source-google-directory/source_google_directory/api.py b/airbyte-integrations/connectors/source-google-directory/source_google_directory/api.py index 8083eb3080445..f3a5ba0ab4154 100644 --- a/airbyte-integrations/connectors/source-google-directory/source_google_directory/api.py +++ b/airbyte-integrations/connectors/source-google-directory/source_google_directory/api.py @@ -6,10 +6,12 @@ import json from abc import ABC, abstractmethod from functools import partial -from typing import Callable, Dict, Iterator, Sequence +from typing import Any, Callable, Dict, Iterator, Mapping, Sequence import backoff +from google.auth.transport.requests import Request from google.oauth2 import service_account +from google.oauth2.credentials import Credentials from googleapiclient.discovery import build from googleapiclient.errors import HttpError as GoogleApiHttpError @@ -19,20 +21,41 @@ class API: - def __init__(self, credentials_json: str, email: str): + def __init__(self, credentials: Mapping[str, Any]): self._creds = None - self._credentials_json = credentials_json - self._admin_email = email + self._raw_credentials = credentials self._service = None - def _load_account_info(self) -> Dict: - account_info = json.loads(self._credentials_json) + @staticmethod + def _load_account_info(credentials_json: str) -> Dict: + account_info = json.loads(credentials_json) return account_info - def _obtain_creds(self): - account_info = self._load_account_info() + def _obtain_service_account_creds(self) -> service_account.Credentials: + """Obtaining creds based on Service account scenario""" + credentials_json = self._raw_credentials.get("credentials_json") + admin_email = self._raw_credentials.get("email") + account_info = self._load_account_info(credentials_json) creds = service_account.Credentials.from_service_account_info(account_info, scopes=SCOPES) - self._creds = creds.with_subject(self._admin_email) + self._creds = creds.with_subject(admin_email) + + def _obtain_web_app_creds(self) -> Credentials: + """Obtaining creds based on Web server application scenario""" + info = { + "client_id": self._raw_credentials.get("client_id"), + "client_secret": self._raw_credentials.get("client_secret"), + "refresh_token": self._raw_credentials.get("refresh_token"), + } + creds = Credentials.from_authorized_user_info(info) + if creds.expired: + creds.refresh(Request()) + self._creds = creds + + def _obtain_creds(self): + if "credentials_json" in self._raw_credentials: + self._obtain_service_account_creds() + elif "client_id" and "client_secret" in self._raw_credentials: + self._obtain_web_app_creds() def _construct_resource(self): if not self._creds: diff --git a/airbyte-integrations/connectors/source-google-directory/source_google_directory/client.py b/airbyte-integrations/connectors/source-google-directory/source_google_directory/client.py index 32b48f2007053..086e51c1077bb 100644 --- a/airbyte-integrations/connectors/source-google-directory/source_google_directory/client.py +++ b/airbyte-integrations/connectors/source-google-directory/source_google_directory/client.py @@ -11,8 +11,11 @@ class Client(BaseClient): - def __init__(self, credentials_json: str, email: str): - self._api = API(credentials_json, email) + def __init__(self, credentials: Mapping[str, Any] = None, credentials_json: str = None, email: str = None): + # supporting old config format + if not credentials: + credentials = {"credentials_json": credentials_json, "email": email} + self._api = API(credentials) self._apis = {"users": UsersAPI(self._api), "groups": GroupsAPI(self._api), "group_members": GroupMembersAPI(self._api)} super().__init__() diff --git a/airbyte-integrations/connectors/source-google-directory/source_google_directory/spec.json b/airbyte-integrations/connectors/source-google-directory/source_google_directory/spec.json index c848cdde6dae0..5b59a13256374 100644 --- a/airbyte-integrations/connectors/source-google-directory/source_google_directory/spec.json +++ b/airbyte-integrations/connectors/source-google-directory/source_google_directory/spec.json @@ -4,18 +4,87 @@ "$schema": "http://json-schema.org/draft-07/schema#", "title": "Google Directory Spec", "type": "object", - "required": ["credentials_json", "email"], - "additionalProperties": false, + "required": [], + "additionalProperties": true, "properties": { - "credentials_json": { - "type": "string", - "description": "The contents of the JSON service account key. See the docs for more information on how to generate this key.", - "airbyte_secret": true - }, - "email": { - "type": "string", - "description": "The email of the user, which has permissions to access the Google Workspace Admin APIs." + "credentials": { + "title": "Google Credentials", + "description": "Google APIs use the OAuth 2.0 protocol for authentication and authorization. The Source supports Web server application and Service accounts scenarios", + "type": "object", + "oneOf": [ + { + "title": "Sign in via Google (Oauth)", + "description": "For these scenario user only needs to give permission to read Google Directory data", + "type": "object", + "required": ["client_id", "client_secret", "refresh_token"], + "properties": { + "credentials_title": { + "type": "string", + "title": "Credentials title", + "description": "Authentication scenario", + "const": "Web server app", + "enum": ["Web server app"], + "default": "Web server app", + "order": 0 + }, + "client_id": { + "title": "Client ID", + "type": "string", + "description": "The client ID of developer application", + "airbyte_secret": true + }, + "client_secret": { + "title": "Client secret", + "type": "string", + "description": "The client secret of developer application", + "airbyte_secret": true + }, + "refresh_token": { + "title": "Refresh Token", + "type": "string", + "description": "The token for obtaining new access token", + "airbyte_secret": true + } + } + }, + { + "title": "Service account Key", + "description": "For these scenario user should obtain service account's credentials from the Google API Console and provide delegated email", + "type": "object", + "required": ["credentials_json", "email"], + "properties": { + "credentials_title": { + "type": "string", + "title": "Credentials title", + "description": "Authentication scenario", + "const": "Service accounts", + "enum": ["Service accounts"], + "default": "Service accounts", + "order": 0 + }, + "credentials_json": { + "type": "string", + "title": "Credentials JSON", + "description": "The contents of the JSON service account key. See the docs for more information on how to generate this key.", + "airbyte_secret": true + }, + "email": { + "type": "string", + "title": "Email", + "description": "The email of the user, which has permissions to access the Google Workspace Admin APIs." + } + } + } + ] } } + }, + "authSpecification": { + "auth_type": "oauth2.0", + "oauth2Specification": { + "rootObject": ["credentials", 0], + "oauthFlowInitParameters": [["client_id"], ["client_secret"]], + "oauthFlowOutputParameters": [["refresh_token"]] + } } } diff --git a/docs/integrations/sources/google-directory.md b/docs/integrations/sources/google-directory.md index bb52676d054df..e80608000e364 100644 --- a/docs/integrations/sources/google-directory.md +++ b/docs/integrations/sources/google-directory.md @@ -35,9 +35,19 @@ This Source is capable of syncing the following Streams: This connector attempts to back off gracefully when it hits Directory API's rate limits. To find more information about limits, see [Google Directory's Limits and Quotas](https://developers.google.com/admin-sdk/directory/v1/limits) documentation. -## Getting started +## Getting Started \(Airbyte Cloud\) -### Requirements +1. Click `OAuth2.0 authorization` then `Authenticate your Google Directory account`. +2. You're done. + +## Getting Started \(Airbyte Open-Source\) + +Google APIs use the OAuth 2.0 protocol for authentication and authorization. This connector supports [Web server application](https://developers.google.com/identity/protocols/oauth2#webserver) and [Service accounts](https://developers.google.com/identity/protocols/oauth2#serviceaccount) scenarios. Therefore, there are 2 options of setting up authorization for this source: + +* Use your Google account and authorize over Google's OAuth on connection setup. Select "Default OAuth2.0 authorization" from dropdown list. +* Create service account specifically for Airbyte. + +### Service account requirements * Credentials to a Google Service Account with delegated Domain Wide Authority * Email address of the workspace admin which created the Service Account @@ -58,6 +68,8 @@ You should now be ready to use the Google Directory connector in Airbyte. | Version | Date | Pull Request | Subject | | :------ | :-------- | :----- | :------ | +| 0.1.8 | 2021-11-02 | [7409](https://github.com/airbytehq/airbyte/pull/7409) | Support oauth (update publish) | +| 0.1.7 | 2021-11-02 | [7409](https://github.com/airbytehq/airbyte/pull/7409) | Support oauth | | 0.1.6 | 2021-11-02 | [7464](https://github.com/airbytehq/airbyte/pull/7464) | Migrate to the CDK | | 0.1.5 | 2021-10-20 | [6930](https://github.com/airbytehq/airbyte/pull/6930) | Fix crash when a group don't have members | | 0.1.4 | 2021-10-19 | [7167](https://github.com/airbytehq/airbyte/pull/7167) | Add organizations and phones to `users` schema | diff --git a/tools/bin/ci_credentials.sh b/tools/bin/ci_credentials.sh index bb5e4da501503..6936d2e3a54a3 100755 --- a/tools/bin/ci_credentials.sh +++ b/tools/bin/ci_credentials.sh @@ -86,6 +86,7 @@ write_standard_creds source-google-analytics-v4 "$GOOGLE_ANALYTICS_V4_TEST_CREDS write_standard_creds source-google-analytics-v4 "$GOOGLE_ANALYTICS_V4_TEST_CREDS_SRV_ACC" "service_config.json" write_standard_creds source-google-analytics-v4 "$GOOGLE_ANALYTICS_V4_TEST_CREDS_OLD" "old_config.json" write_standard_creds source-google-directory "$GOOGLE_DIRECTORY_TEST_CREDS" +write_standard_creds source-google-directory "$GOOGLE_DIRECTORY_TEST_CREDS_OAUTH" "config_oauth.json" write_standard_creds source-google-search-console "$GOOGLE_SEARCH_CONSOLE_CDK_TEST_CREDS" write_standard_creds source-google-search-console "$GOOGLE_SEARCH_CONSOLE_CDK_TEST_CREDS_SRV_ACC" "service_account_config.json" write_standard_creds source-google-sheets "$GOOGLE_SHEETS_TESTS_CREDS" From f53fd5e66b3f800f89fc999b6b7b8d3c330026eb Mon Sep 17 00:00:00 2001 From: itaseskii Date: Fri, 5 Nov 2021 23:02:01 +0100 Subject: [PATCH 61/83] =?UTF-8?q?=F0=9F=8E=89=20New=20destination:=20Cassa?= =?UTF-8?q?ndra=20(#7186)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * add cassandra destination connector * refactor and docs. * delete test dockerfile * revert Dockerfile rm change * refactor & fix acceptance tests & format * revert stream peek * remove get pip * add address example * improved copy and code refactor * add docker-compose and improved docs Co-authored-by: itaseski --- .../707456df-6f4f-4ced-b5c6-03f73bcad1c5.json | 7 + .../BufferedStreamConsumer.java | 1 - .../DestinationAcceptanceTest.java | 1 - airbyte-integrations/builds.md | 1 + .../destination-cassandra/.dockerignore | 3 + .../destination-cassandra/Dockerfile | 11 ++ .../destination-cassandra/README.md | 68 +++++++ .../destination-cassandra/bootstrap.md | 30 +++ .../destination-cassandra/build.gradle | 33 ++++ .../destination-cassandra/docker-compose.yml | 24 +++ .../sample_secrets/config.json | 4 + .../cassandra/CassandraConfig.java | 114 +++++++++++ .../cassandra/CassandraCqlProvider.java | 179 ++++++++++++++++++ .../cassandra/CassandraDestination.java | 64 +++++++ .../cassandra/CassandraMessageConsumer.java | 109 +++++++++++ .../cassandra/CassandraNameTransformer.java | 42 ++++ .../cassandra/CassandraRecord.java | 49 +++++ .../cassandra/CassandraStreamConfig.java | 58 ++++++ .../destination/cassandra/SessionManager.java | 66 +++++++ .../destination/cassandra/Tuple.java | 38 ++++ .../src/main/resources/spec.json | 65 +++++++ .../CassandraContainerInitializr.java | 34 ++++ .../cassandra/CassandraCqlProviderIT.java | 135 +++++++++++++ .../CassandraDestinationAcceptanceTest.java | 92 +++++++++ .../cassandra/CassandraDestinationIT.java | 57 ++++++ .../cassandra/CassandraMessageConsumerIT.java | 130 +++++++++++++ .../cassandra/TestDataFactory.java | 77 ++++++++ .../cassandra/CassandraConfigTest.java | 40 ++++ .../CassandraNameTransformerTest.java | 64 +++++++ .../cassandra/TestDataFactory.java | 77 ++++++++ docs/SUMMARY.md | 1 + docs/integrations/README.md | 1 + docs/integrations/destinations/cassandra.md | 49 +++++ 33 files changed, 1722 insertions(+), 2 deletions(-) create mode 100644 airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/707456df-6f4f-4ced-b5c6-03f73bcad1c5.json create mode 100644 airbyte-integrations/connectors/destination-cassandra/.dockerignore create mode 100644 airbyte-integrations/connectors/destination-cassandra/Dockerfile create mode 100644 airbyte-integrations/connectors/destination-cassandra/README.md create mode 100644 airbyte-integrations/connectors/destination-cassandra/bootstrap.md create mode 100644 airbyte-integrations/connectors/destination-cassandra/build.gradle create mode 100644 airbyte-integrations/connectors/destination-cassandra/docker-compose.yml create mode 100644 airbyte-integrations/connectors/destination-cassandra/sample_secrets/config.json create mode 100644 airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraConfig.java create mode 100644 airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraCqlProvider.java create mode 100644 airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraDestination.java create mode 100644 airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraMessageConsumer.java create mode 100644 airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraNameTransformer.java create mode 100644 airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraRecord.java create mode 100644 airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraStreamConfig.java create mode 100644 airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/SessionManager.java create mode 100644 airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/Tuple.java create mode 100644 airbyte-integrations/connectors/destination-cassandra/src/main/resources/spec.json create mode 100644 airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraContainerInitializr.java create mode 100644 airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraCqlProviderIT.java create mode 100644 airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraDestinationAcceptanceTest.java create mode 100644 airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraDestinationIT.java create mode 100644 airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraMessageConsumerIT.java create mode 100644 airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/TestDataFactory.java create mode 100644 airbyte-integrations/connectors/destination-cassandra/src/test/java/io/airbyte/integrations/destination/cassandra/CassandraConfigTest.java create mode 100644 airbyte-integrations/connectors/destination-cassandra/src/test/java/io/airbyte/integrations/destination/cassandra/CassandraNameTransformerTest.java create mode 100644 airbyte-integrations/connectors/destination-cassandra/src/test/java/io/airbyte/integrations/destination/cassandra/TestDataFactory.java create mode 100644 docs/integrations/destinations/cassandra.md diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/707456df-6f4f-4ced-b5c6-03f73bcad1c5.json b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/707456df-6f4f-4ced-b5c6-03f73bcad1c5.json new file mode 100644 index 0000000000000..52e0f38dddfb7 --- /dev/null +++ b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/707456df-6f4f-4ced-b5c6-03f73bcad1c5.json @@ -0,0 +1,7 @@ +{ + "destinationDefinitionId": "707456df-6f4f-4ced-b5c6-03f73bcad1c5", + "name": "Cassandra", + "dockerRepository": "airbyte/destination-cassandra", + "dockerImageTag": "0.1.0", + "documentationUrl": "https://docs.airbyte.io/integrations/destinations/cassandra" +} diff --git a/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/buffered_stream_consumer/BufferedStreamConsumer.java b/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/buffered_stream_consumer/BufferedStreamConsumer.java index b5ab459de8869..cb31fc19ad917 100644 --- a/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/buffered_stream_consumer/BufferedStreamConsumer.java +++ b/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/buffered_stream_consumer/BufferedStreamConsumer.java @@ -128,7 +128,6 @@ protected void startTracked() throws Exception { @Override protected void acceptTracked(final AirbyteMessage message) throws Exception { Preconditions.checkState(hasStarted, "Cannot accept records until consumer has started"); - if (message.getType() == Type.RECORD) { final AirbyteRecordMessage recordMessage = message.getRecord(); final AirbyteStreamNameNamespacePair stream = AirbyteStreamNameNamespacePair.fromRecordMessage(recordMessage); diff --git a/airbyte-integrations/bases/standard-destination-test/src/main/java/io/airbyte/integrations/standardtest/destination/DestinationAcceptanceTest.java b/airbyte-integrations/bases/standard-destination-test/src/main/java/io/airbyte/integrations/standardtest/destination/DestinationAcceptanceTest.java index 2959aab70089a..8c12d911d7700 100644 --- a/airbyte-integrations/bases/standard-destination-test/src/main/java/io/airbyte/integrations/standardtest/destination/DestinationAcceptanceTest.java +++ b/airbyte-integrations/bases/standard-destination-test/src/main/java/io/airbyte/integrations/standardtest/destination/DestinationAcceptanceTest.java @@ -496,7 +496,6 @@ public void testIncrementalSync() throws Exception { .map(record -> Jsons.deserialize(record, AirbyteMessage.class)).collect(Collectors.toList()); final JsonNode config = getConfig(); runSyncAndVerifyStateOutput(config, firstSyncMessages, configuredCatalog, false); - final List secondSyncMessages = Lists.newArrayList( new AirbyteMessage() .withType(Type.RECORD) diff --git a/airbyte-integrations/builds.md b/airbyte-integrations/builds.md index 00a87e0827684..c9d1010865a9a 100644 --- a/airbyte-integrations/builds.md +++ b/airbyte-integrations/builds.md @@ -112,3 +112,4 @@ | Redshift | [![destination-redshift](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fdestination-redshift%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/destination-redshift) | | S3 | [![destination-s3](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fdestination-s3%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/destination-s3) | | Snowflake | [![destination-snowflake](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fdestination-snowflake%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/destination-snowflake) | +| Cassandra | [![destination-cassandra](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fdestination-cassandra%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/destination-cassandra) | diff --git a/airbyte-integrations/connectors/destination-cassandra/.dockerignore b/airbyte-integrations/connectors/destination-cassandra/.dockerignore new file mode 100644 index 0000000000000..65c7d0ad3e73c --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/.dockerignore @@ -0,0 +1,3 @@ +* +!Dockerfile +!build diff --git a/airbyte-integrations/connectors/destination-cassandra/Dockerfile b/airbyte-integrations/connectors/destination-cassandra/Dockerfile new file mode 100644 index 0000000000000..197bb25ec9fc4 --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/Dockerfile @@ -0,0 +1,11 @@ +FROM airbyte/integration-base-java:dev + +WORKDIR /airbyte +ENV APPLICATION destination-cassandra + +COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar + +RUN tar xf ${APPLICATION}.tar --strip-components=1 + +LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.name=airbyte/destination-cassandra diff --git a/airbyte-integrations/connectors/destination-cassandra/README.md b/airbyte-integrations/connectors/destination-cassandra/README.md new file mode 100644 index 0000000000000..5e5237291eabe --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/README.md @@ -0,0 +1,68 @@ +# Destination Cassandra + +This is the repository for the Cassandra destination connector in Java. +For information about how to use this connector within Airbyte, see [the User Documentation](https://docs.airbyte.io/integrations/destinations/cassandra). + +## Local development + +#### Building via Gradle +From the Airbyte repository root, run: +``` +./gradlew :airbyte-integrations:connectors:destination-cassandra:build +``` + +#### Create credentials +**If you are a community contributor**, generate the necessary credentials and place them in `secrets/config.json` conforming to the spec file in `src/main/resources/spec.json`. +Note that the `secrets` directory is git-ignored by default, so there is no danger of accidentally checking in sensitive information. + +**If you are an Airbyte core member**, follow the [instructions](https://docs.airbyte.io/connector-development#using-credentials-in-ci) to set up the credentials. + +### Locally running the connector docker image + +#### Build +Build the connector image via Gradle: +``` +./gradlew :airbyte-integrations:connectors:destination-cassandra:airbyteDocker +``` +When building via Gradle, the docker image name and tag, respectively, are the values of the `io.airbyte.name` and `io.airbyte.version` `LABEL`s in +the Dockerfile. + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/destination-cassandra:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-cassandra:dev check --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-cassandra:dev discover --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/destination-cassandra:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json +``` + +## Testing +We use `JUnit` for Java tests. + +### Unit and Integration Tests +Place unit tests under `src/test/io/airbyte/integrations/destinations/cassandra`. + +#### Acceptance Tests +Airbyte has a standard test suite that all destination connectors must pass. Implement the `TODO`s in +`src/test-integration/java/io/airbyte/integrations/destinations/cassandraDestinationAcceptanceTest.java`. + +### Using gradle to run tests +All commands should be run from airbyte project root. +To run unit tests: +``` +./gradlew :airbyte-integrations:connectors:destination-cassandra:unitTest +``` +To run acceptance and custom integration tests: +``` +./gradlew :airbyte-integrations:connectors:destination-cassandra:integrationTest +``` + +## Dependency Management + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing unit and integration tests. +1. Bump the connector version in `Dockerfile` -- just increment the value of the `LABEL io.airbyte.version` appropriately (we use [SemVer](https://semver.org/)). +1. Create a Pull Request. +1. Pat yourself on the back for being an awesome contributor. +1. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. diff --git a/airbyte-integrations/connectors/destination-cassandra/bootstrap.md b/airbyte-integrations/connectors/destination-cassandra/bootstrap.md new file mode 100644 index 0000000000000..35c19425c395f --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/bootstrap.md @@ -0,0 +1,30 @@ +# Cassandra Destination + +Cassandra is a free and open-source, distributed, wide-column store, NoSQL database management system designed to handle +large amounts of data across many commodity servers, providing high availability with no single point of failure + +The data is structured in keyspaces and tables and is partitioned and replicated across different nodes in the +cluster. +[Read more about Cassandra](https://cassandra.apache.org/_/index.html) + +This connector maps an incoming `stream` to a Cassandra `table` and a `namespace` to a Cassandra`keyspace`. +When using destination sync mode `append` and `append_dedup`, an `insert` operation is performed against an existing +Cassandra table. +When using `overwrite`, the records are first placed in a temp table. When all the messages have been received the data +is copied to the final table which is first truncated and the temp table is deleted. + +The Implementation uses the [Datastax](https://github.com/datastax/java-driver) driver in order to access +Cassandra. [CassandraCqlProvider](./src/main/java/io/airbyte/integrations/destination/cassandra/CassandraCqlProvider.java) +handles the communication with the Cassandra cluster and internally it uses +the [SessionManager](./src/main/java/io/airbyte/integrations/destination/cassandra/SessionManager.java) to retrieve a +CqlSession to the cluster. + +The [CassandraMessageConsumer](./src/main/java/io/airbyte/integrations/destination/cassandra/CassandraMessageConsumer.java) +class contains the logic for handling airbyte messages, events and copying data between tables. + +## Development + +See the [CassandraCqlProvider](./src/main/java/io/airbyte/integrations/destination/cassandra/CassandraCqlProvider.java) +class on how to use the datastax driver. + +[Datastax docs.](https://docs.datastax.com/en/developer/java-driver/3.0/) \ No newline at end of file diff --git a/airbyte-integrations/connectors/destination-cassandra/build.gradle b/airbyte-integrations/connectors/destination-cassandra/build.gradle new file mode 100644 index 0000000000000..e3c4e6ce133c7 --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/build.gradle @@ -0,0 +1,33 @@ +plugins { + id 'application' + id 'airbyte-docker' + id 'airbyte-integration-test-java' +} + +application { + mainClass = 'io.airbyte.integrations.destination.cassandra.CassandraDestination' +} + +def cassandraDriver = '4.13.0' +def testContainersVersion = '1.16.0' +def assertVersion = '3.21.0' + +dependencies { + implementation project(':airbyte-config:models') + implementation project(':airbyte-protocol:models') + implementation project(':airbyte-integrations:bases:base-java') + implementation files(project(':airbyte-integrations:bases:base-java').airbyteDocker.outputs) + + implementation "com.datastax.oss:java-driver-core:${cassandraDriver}" + implementation "com.datastax.oss:java-driver-query-builder:${cassandraDriver}" + implementation "com.datastax.oss:java-driver-mapper-runtime:${cassandraDriver}" + + + // https://mvnrepository.com/artifact/org.assertj/assertj-core + testImplementation "org.assertj:assertj-core:${assertVersion}" + testImplementation "org.testcontainers:cassandra:${testContainersVersion}" + + + integrationTestJavaImplementation project(':airbyte-integrations:bases:standard-destination-test') + integrationTestJavaImplementation project(':airbyte-integrations:connectors:destination-cassandra') +} diff --git a/airbyte-integrations/connectors/destination-cassandra/docker-compose.yml b/airbyte-integrations/connectors/destination-cassandra/docker-compose.yml new file mode 100644 index 0000000000000..75090b3b59cae --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/docker-compose.yml @@ -0,0 +1,24 @@ +version: '3.7' + +services: + cassandra1: + image: cassandra:4.0 + ports: + - "9042:9042" + environment: + - "MAX_HEAP_SIZE=2048M" + - "HEAP_NEWSIZE=1024M" + - "CASSANDRA_CLUSTER_NAME=cassandra_cluster" + +# Uncomment if you want to run a Cassandra cluster +# cassandra2: +# image: cassandra:4.0 +# ports: +# - "9043:9042" +# environment: +# - "MAX_HEAP_SIZE=2048M" +# - "HEAP_NEWSIZE=1024M" +# - "CASSANDRA_SEEDS=cassandra1" +# - "CASSANDRA_CLUSTER_NAME=cassandra_cluster" +# depends_on: +# - cassandra1 \ No newline at end of file diff --git a/airbyte-integrations/connectors/destination-cassandra/sample_secrets/config.json b/airbyte-integrations/connectors/destination-cassandra/sample_secrets/config.json new file mode 100644 index 0000000000000..644fd54c1ab92 --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/sample_secrets/config.json @@ -0,0 +1,4 @@ +{ + "username": "paste-username-here", + "password": "paste-password-here" +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraConfig.java b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraConfig.java new file mode 100644 index 0000000000000..62a80f3b78036 --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraConfig.java @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.cassandra; + +import com.fasterxml.jackson.databind.JsonNode; +import java.util.Objects; + +/* + * Immutable configuration class for storing cassandra related config. + */ +class CassandraConfig { + + private final String keyspace; + + private final String username; + + private final String password; + + private final String address; + + private final int port; + + private final String datacenter; + + private final int replication; + + public CassandraConfig(String keyspace, + String username, + String password, + String address, + int port, + String datacenter, + int replication) { + this.keyspace = keyspace; + this.username = username; + this.password = password; + this.address = address; + this.port = port; + this.datacenter = datacenter; + this.replication = replication; + } + + public CassandraConfig(JsonNode config) { + this.keyspace = config.get("keyspace").asText(); + this.username = config.get("username").asText(); + this.password = config.get("password").asText(); + this.address = config.get("address").asText(); + this.port = config.get("port").asInt(9042); + this.datacenter = config.get("datacenter").asText("datacenter1"); + this.replication = config.get("replication").asInt(1); + } + + public String getKeyspace() { + return keyspace; + } + + public String getUsername() { + return username; + } + + public String getPassword() { + return password; + } + + public String getAddress() { + return address; + } + + public int getPort() { + return port; + } + + public String getDatacenter() { + return datacenter; + } + + public int getReplication() { + return replication; + } + + @Override + public String toString() { + return "CassandraConfig{" + + "keyspace='" + keyspace + '\'' + + ", username='" + username + '\'' + + ", password='" + password + '\'' + + ", address='" + address + '\'' + + ", port=" + port + + ", datacenter='" + datacenter + '\'' + + ", replication=" + replication + + '}'; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + CassandraConfig that = (CassandraConfig) o; + return port == that.port && username.equals(that.username) && password.equals(that.password) && + address.equals(that.address) && datacenter.equals(that.datacenter); + } + + @Override + public int hashCode() { + return Objects.hash(username, password, address, port, datacenter); + } + +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraCqlProvider.java b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraCqlProvider.java new file mode 100644 index 0000000000000..19ed9d1267c93 --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraCqlProvider.java @@ -0,0 +1,179 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.cassandra; + +import static com.datastax.oss.driver.api.querybuilder.QueryBuilder.now; + +import com.datastax.oss.driver.api.core.CqlSession; +import com.datastax.oss.driver.api.core.cql.BatchStatement; +import com.datastax.oss.driver.api.core.cql.BatchType; +import com.datastax.oss.driver.api.core.cql.BoundStatement; +import com.datastax.oss.driver.api.core.cql.PreparedStatement; +import com.datastax.oss.driver.api.core.metadata.TokenMap; +import com.datastax.oss.driver.api.core.type.DataTypes; +import com.datastax.oss.driver.api.core.uuid.Uuids; +import com.datastax.oss.driver.api.querybuilder.QueryBuilder; +import com.datastax.oss.driver.api.querybuilder.SchemaBuilder; +import io.airbyte.integrations.base.JavaBaseConstants; +import java.io.Closeable; +import java.time.Instant; +import java.util.List; +import java.util.UUID; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.stream.Collectors; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +class CassandraCqlProvider implements Closeable { + + private static final Logger LOGGER = LoggerFactory.getLogger(CassandraCqlProvider.class); + + private static final int N_THREADS = Runtime.getRuntime().availableProcessors(); + + private final ExecutorService executorService; + + private final CqlSession cqlSession; + + private final CassandraConfig cassandraConfig; + + private final String columnId; + + private final String columnData; + + private final String columnTimestamp; + + public CassandraCqlProvider(CassandraConfig cassandraConfig) { + this.cassandraConfig = cassandraConfig; + this.cqlSession = SessionManager.initSession(cassandraConfig); + var nameTransformer = new CassandraNameTransformer(cassandraConfig); + this.columnId = nameTransformer.outputColumn(JavaBaseConstants.COLUMN_NAME_AB_ID); + this.columnData = nameTransformer.outputColumn(JavaBaseConstants.COLUMN_NAME_DATA); + this.columnTimestamp = nameTransformer.outputColumn(JavaBaseConstants.COLUMN_NAME_EMITTED_AT); + this.executorService = Executors.newFixedThreadPool(N_THREADS); + } + + public void createKeySpaceIfNotExists(String keyspace, int replicationFactor) { + var query = SchemaBuilder.createKeyspace(keyspace) + .ifNotExists() + .withSimpleStrategy(replicationFactor) + .build(); + cqlSession.execute(query); + } + + public void createTableIfNotExists(String keyspace, String tableName) { + var query = SchemaBuilder.createTable(keyspace, tableName) + .ifNotExists() + .withPartitionKey(columnId, DataTypes.UUID) + .withColumn(columnData, DataTypes.TEXT) + .withColumn(columnTimestamp, DataTypes.TIMESTAMP) + .build(); + cqlSession.execute(query); + } + + public void dropTableIfExists(String keyspace, String tableName) { + var query = SchemaBuilder.dropTable(keyspace, tableName) + .ifExists() + .build(); + cqlSession.execute(query); + } + + public void insert(String keyspace, String tableName, String jsonData) { + var query = QueryBuilder.insertInto(keyspace, tableName) + .value(columnId, QueryBuilder.literal(Uuids.random())) + .value(columnData, QueryBuilder.literal(jsonData)) + .value(columnTimestamp, QueryBuilder.toTimestamp(now())) + .build(); + cqlSession.execute(query); + } + + public void truncate(String keyspace, String tableName) { + var query = QueryBuilder.truncate(keyspace, tableName).build(); + cqlSession.execute(query); + } + + public List select(String keyspace, String tableName) { + var query = QueryBuilder.selectFrom(keyspace, tableName) + .columns(columnId, columnData, columnTimestamp) + .build(); + return cqlSession.execute(query) + .map(result -> new CassandraRecord( + result.get(columnId, UUID.class), + result.get(columnData, String.class), + result.get(columnTimestamp, Instant.class))) + .all(); + } + + public List>> retrieveMetadata() { + return cqlSession.getMetadata().getKeyspaces().values().stream() + .map(keyspace -> Tuple.of(keyspace.getName().toString(), keyspace.getTables().values() + .stream() + .map(table -> table.getName().toString()) + .collect(Collectors.toList()))) + .collect(Collectors.toList()); + } + + public void copy(String keyspace, String sourceTable, String destinationTable) { + var select = String.format("SELECT * FROM %s.%s WHERE token(%s) > ? AND token(%s) <= ?", + keyspace, sourceTable, columnId, columnId); + + var selectStatement = cqlSession.prepare(select); + + var insert = String.format("INSERT INTO %s.%s (%s, %s, %s) VALUES (?, ?, ?)", + keyspace, destinationTable, columnId, columnData, columnTimestamp); + + var insertStatement = cqlSession.prepare(insert); + + // perform full table scan in parallel using token ranges + // optimal for copying large amounts of data + cqlSession.getMetadata().getTokenMap() + .map(TokenMap::getTokenRanges) + .orElseThrow(IllegalStateException::new) + .stream() + .flatMap(range -> range.unwrap().stream()) + .map(range -> selectStatement.bind(range.getStart(), range.getEnd())) + // explore datastax 4.x async api as an alternative for async processing + .map(selectBoundStatement -> executorService.submit(() -> batchInsert(selectBoundStatement, insertStatement))) + .forEach(this::awaitThread); + + } + + private void batchInsert(BoundStatement select, PreparedStatement insert) { + // unlogged removes the log record for increased insert speed + var batchStatement = BatchStatement.builder(BatchType.UNLOGGED); + + cqlSession.execute(select).all().stream() + .map(r -> CassandraRecord.of( + r.get(columnId, UUID.class), + r.get(columnData, String.class), + r.get(columnTimestamp, Instant.class))) + .map(r -> insert.bind(r.getId(), r.getData(), r.getTimestamp())) + .forEach(batchStatement::addStatement); + + cqlSession.execute(batchStatement.build()); + } + + private void awaitThread(Future future) { + try { + future.get(); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + LOGGER.error("Interrupted thread while copying data with reason: ", e); + } catch (ExecutionException e) { + LOGGER.error("Error while copying data with reason: ", e); + } + } + + @Override + public void close() { + // wait for tasks completion and terminate executor gracefully + executorService.shutdown(); + // close cassandra session for the given config + SessionManager.closeSession(cassandraConfig); + } + +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraDestination.java b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraDestination.java new file mode 100644 index 0000000000000..90fb821b7477a --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraDestination.java @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.cassandra; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.integrations.BaseConnector; +import io.airbyte.integrations.base.AirbyteMessageConsumer; +import io.airbyte.integrations.base.Destination; +import io.airbyte.integrations.base.IntegrationRunner; +import io.airbyte.protocol.models.AirbyteConnectionStatus; +import io.airbyte.protocol.models.AirbyteMessage; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import java.util.UUID; +import java.util.function.Consumer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +class CassandraDestination extends BaseConnector implements Destination { + + private static final Logger LOGGER = LoggerFactory.getLogger(CassandraDestination.class); + + public static void main(String[] args) throws Exception { + new IntegrationRunner(new CassandraDestination()).run(args); + } + + @Override + public AirbyteConnectionStatus check(JsonNode config) { + var cassandraConfig = new CassandraConfig(config); + // add random uuid to avoid conflicts with existing tables. + String tableName = "table_" + UUID.randomUUID().toString().replace("-", ""); + CassandraCqlProvider cassandraCqlProvider = null; + try { + cassandraCqlProvider = new CassandraCqlProvider(cassandraConfig); + // check connection and write permissions + cassandraCqlProvider.createKeySpaceIfNotExists(cassandraConfig.getKeyspace(), + cassandraConfig.getReplication()); + cassandraCqlProvider.createTableIfNotExists(cassandraConfig.getKeyspace(), tableName); + cassandraCqlProvider.insert(cassandraConfig.getKeyspace(), tableName, "{}"); + return new AirbyteConnectionStatus().withStatus(AirbyteConnectionStatus.Status.SUCCEEDED); + } catch (Exception e) { + LOGGER.error("Can't establish Cassandra connection with reason: ", e); + return new AirbyteConnectionStatus().withStatus(AirbyteConnectionStatus.Status.FAILED); + } finally { + if (cassandraCqlProvider != null) { + try { + cassandraCqlProvider.dropTableIfExists(cassandraConfig.getKeyspace(), tableName); + } catch (Exception e) { + LOGGER.error("Error while deleting temp table {} with reason: ", tableName, e); + } + cassandraCqlProvider.close(); + } + } + } + + @Override + public AirbyteMessageConsumer getConsumer(JsonNode config, + ConfiguredAirbyteCatalog configuredCatalog, + Consumer outputRecordCollector) { + return new CassandraMessageConsumer(new CassandraConfig(config), configuredCatalog, outputRecordCollector); + } + +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraMessageConsumer.java b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraMessageConsumer.java new file mode 100644 index 0000000000000..5eb6f8b9f0036 --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraMessageConsumer.java @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.cassandra; + +import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; +import io.airbyte.integrations.base.FailureTrackingAirbyteMessageConsumer; +import io.airbyte.protocol.models.AirbyteMessage; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import java.util.Map; +import java.util.function.Consumer; +import java.util.stream.Collectors; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +class CassandraMessageConsumer extends FailureTrackingAirbyteMessageConsumer { + + private static final Logger LOGGER = LoggerFactory.getLogger(CassandraMessageConsumer.class); + + private final CassandraConfig cassandraConfig; + + private final Consumer outputRecordCollector; + + private final Map cassandraStreams; + + private final CassandraCqlProvider cassandraCqlProvider; + + private AirbyteMessage lastMessage = null; + + public CassandraMessageConsumer(CassandraConfig cassandraConfig, + ConfiguredAirbyteCatalog configuredCatalog, + Consumer outputRecordCollector) { + this.cassandraConfig = cassandraConfig; + this.outputRecordCollector = outputRecordCollector; + this.cassandraCqlProvider = new CassandraCqlProvider(cassandraConfig); + var nameTransformer = new CassandraNameTransformer(cassandraConfig); + this.cassandraStreams = configuredCatalog.getStreams().stream() + .collect(Collectors.toUnmodifiableMap( + AirbyteStreamNameNamespacePair::fromConfiguredAirbyteSteam, + k -> new CassandraStreamConfig( + nameTransformer.outputKeyspace(k.getStream().getNamespace()), + nameTransformer.outputTable(k.getStream().getName()), + nameTransformer.outputTmpTable(k.getStream().getName()), + k.getDestinationSyncMode()))); + } + + @Override + protected void startTracked() { + cassandraStreams.forEach((k, v) -> { + cassandraCqlProvider.createKeySpaceIfNotExists(v.getKeyspace(), cassandraConfig.getReplication()); + cassandraCqlProvider.createTableIfNotExists(v.getKeyspace(), v.getTempTableName()); + }); + } + + @Override + protected void acceptTracked(AirbyteMessage message) { + if (message.getType() == AirbyteMessage.Type.RECORD) { + var messageRecord = message.getRecord(); + var streamConfig = + cassandraStreams.get(AirbyteStreamNameNamespacePair.fromRecordMessage(messageRecord)); + if (streamConfig == null) { + throw new IllegalArgumentException("Unrecognized destination stream"); + } + var data = Jsons.serialize(messageRecord.getData()); + cassandraCqlProvider.insert(streamConfig.getKeyspace(), streamConfig.getTempTableName(), data); + } else if (message.getType() == AirbyteMessage.Type.STATE) { + this.lastMessage = message; + } else { + LOGGER.warn("Unsupported airbyte message type: {}", message.getType()); + } + } + + @Override + protected void close(boolean hasFailed) { + if (!hasFailed) { + cassandraStreams.forEach((k, v) -> { + try { + cassandraCqlProvider.createTableIfNotExists(v.getKeyspace(), v.getTableName()); + switch (v.getDestinationSyncMode()) { + case APPEND -> { + cassandraCqlProvider.copy(v.getKeyspace(), v.getTempTableName(), v.getTableName()); + } + case OVERWRITE -> { + cassandraCqlProvider.truncate(v.getKeyspace(), v.getTableName()); + cassandraCqlProvider.copy(v.getKeyspace(), v.getTempTableName(), v.getTableName()); + } + default -> throw new UnsupportedOperationException(); + } + } catch (Exception e) { + LOGGER.error("Error while copying data to table {}: : ", v.getTableName(), e); + } + }); + outputRecordCollector.accept(lastMessage); + } + + cassandraStreams.forEach((k, v) -> { + try { + cassandraCqlProvider.dropTableIfExists(v.getKeyspace(), v.getTempTableName()); + } catch (Exception e) { + LOGGER.error("Error while deleting temp table {} with reason: ", v.getTempTableName(), e); + } + }); + cassandraCqlProvider.close(); + + } + +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraNameTransformer.java b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraNameTransformer.java new file mode 100644 index 0000000000000..791f6bd50de8c --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraNameTransformer.java @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.cassandra; + +import com.google.common.base.CharMatcher; +import io.airbyte.commons.text.Names; +import io.airbyte.integrations.destination.StandardNameTransformer; + +class CassandraNameTransformer extends StandardNameTransformer { + + private final CassandraConfig cassandraConfig; + + public CassandraNameTransformer(CassandraConfig cassandraConfig) { + this.cassandraConfig = cassandraConfig; + } + + String outputKeyspace(String namespace) { + if (namespace == null || namespace.isBlank()) { + return cassandraConfig.getKeyspace(); + } + return CharMatcher.is('_').trimLeadingFrom(Names.toAlphanumericAndUnderscore(namespace)); + } + + String outputTable(String streamName) { + var tableName = super.getRawTableName(streamName.toLowerCase()).substring(1); + // max allowed length for a cassandra table is 48 characters + return tableName.length() > 48 ? tableName.substring(0, 48) : tableName; + } + + String outputTmpTable(String streamName) { + var tableName = super.getTmpTableName(streamName.toLowerCase()).substring(1); + // max allowed length for a cassandra table is 48 characters + return tableName.length() > 48 ? tableName.substring(0, 48) : tableName; + } + + String outputColumn(String columnName) { + return Names.doubleQuote(columnName.toLowerCase()); + } + +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraRecord.java b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraRecord.java new file mode 100644 index 0000000000000..b30ef015d8378 --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraRecord.java @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.cassandra; + +import java.time.Instant; +import java.util.UUID; + +class CassandraRecord { + + private final UUID id; + + private final String data; + + private final Instant timestamp; + + public CassandraRecord(UUID id, String data, Instant timestamp) { + this.id = id; + this.data = data; + this.timestamp = timestamp; + } + + static CassandraRecord of(UUID id, String data, Instant timestamp) { + return new CassandraRecord(id, data, timestamp); + } + + public UUID getId() { + return id; + } + + public String getData() { + return data; + } + + public Instant getTimestamp() { + return timestamp; + } + + @Override + public String toString() { + return "CassandraRecord{" + + "id=" + id + + ", data='" + data + '\'' + + ", timestamp=" + timestamp + + '}'; + } + +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraStreamConfig.java b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraStreamConfig.java new file mode 100644 index 0000000000000..88a6334227985 --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraStreamConfig.java @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.cassandra; + +import io.airbyte.protocol.models.DestinationSyncMode; + +/* + * Immutable configuration class for storing destination stream config. + */ +class CassandraStreamConfig { + + private final String keyspace; + + private final String tableName; + + private final String tempTableName; + + private final DestinationSyncMode destinationSyncMode; + + public CassandraStreamConfig(String keyspace, + String tableName, + String tempTableName, + DestinationSyncMode destinationSyncMode) { + this.keyspace = keyspace; + this.tableName = tableName; + this.tempTableName = tempTableName; + this.destinationSyncMode = destinationSyncMode; + } + + public String getKeyspace() { + return keyspace; + } + + public String getTableName() { + return tableName; + } + + public String getTempTableName() { + return tempTableName; + } + + public DestinationSyncMode getDestinationSyncMode() { + return destinationSyncMode; + } + + @Override + public String toString() { + return "CassandraStreamConfig{" + + "keyspace='" + keyspace + '\'' + + ", tableName='" + tableName + '\'' + + ", tempTableName='" + tempTableName + '\'' + + ", destinationSyncMode=" + destinationSyncMode + + '}'; + } + +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/SessionManager.java b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/SessionManager.java new file mode 100644 index 0000000000000..43dc86f4fe83d --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/SessionManager.java @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.cassandra; + +import com.datastax.oss.driver.api.core.CqlSession; +import java.net.InetSocketAddress; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicInteger; + +class SessionManager { + + // AtomicInteger is used for convenience, this class is not thread safe + // and needs additional synchronization for that. + private static final ConcurrentHashMap> sessions; + + static { + sessions = new ConcurrentHashMap<>(); + } + + private SessionManager() { + + } + + /* + * CqlSession objects are heavyweight and can hold several tcp connections to the Cassandra cluster, + * for that reason it is better if sessions are reused per configuration. Sessions are thread-safe + * and can be accessed from different threads. + * + */ + public static CqlSession initSession(CassandraConfig cassandraConfig) { + var cachedSession = sessions.get(cassandraConfig); + if (cachedSession != null) { + cachedSession.value2().incrementAndGet(); + return cachedSession.value1(); + } else { + var session = CqlSession.builder() + .withLocalDatacenter(cassandraConfig.getDatacenter()) + .addContactPoint(new InetSocketAddress(cassandraConfig.getAddress(), cassandraConfig.getPort())) + .withAuthCredentials(cassandraConfig.getUsername(), cassandraConfig.getPassword()) + .build(); + sessions.put(cassandraConfig, Tuple.of(session, new AtomicInteger(1))); + return session; + } + } + + /* + * Close session configured with cassandra config. if the session is being used by more than one + * external instance only decrease the usage count, otherwise close the session and remove it from + * the map. + * + */ + public static void closeSession(CassandraConfig cassandraConfig) { + var cachedSession = sessions.get(cassandraConfig); + if (cachedSession == null) { + throw new IllegalStateException("No session for the provided config"); + } + int count = cachedSession.value2().decrementAndGet(); + if (count < 1) { + cachedSession.value1().close(); + sessions.remove(cassandraConfig); + } + } + +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/Tuple.java b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/Tuple.java new file mode 100644 index 0000000000000..8968138a43535 --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/Tuple.java @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.cassandra; + +public class Tuple { + + private final V1 value1; + + private final V2 value2; + + public Tuple(V1 value1, V2 value2) { + this.value1 = value1; + this.value2 = value2; + } + + public static Tuple of(V1 value1, V2 value2) { + return new Tuple<>(value1, value2); + } + + public V1 value1() { + return value1; + } + + public V2 value2() { + return value2; + } + + @Override + public String toString() { + return "Tuple{" + + "value1=" + value1 + + ", value2=" + value2 + + '}'; + } + +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-cassandra/src/main/resources/spec.json new file mode 100644 index 0000000000000..61e3c0a7ab727 --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/main/resources/spec.json @@ -0,0 +1,65 @@ +{ + "documentationUrl": "https://docs.airbyte.io/integrations/destinations/cassandra", + "supportsIncremental": true, + "supportsNormalization": false, + "supportsDBT": false, + "supported_destination_sync_modes": ["overwrite", "append"], + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Cassandra Destination Spec", + "type": "object", + "required": ["keyspace", "username", "password", "address", "port"], + "additionalProperties": true, + "properties": { + "keyspace": { + "title": "Keyspace", + "description": "Default Cassandra keyspace to create data in.", + "type": "string", + "order": 0 + }, + "username": { + "title": "Username", + "description": "Username to use to access Cassandra.", + "type": "string", + "order": 1 + }, + "password": { + "title": "Password", + "description": "Password associated with Cassandra.", + "type": "string", + "airbyte_secret": true, + "order": 2 + }, + "address": { + "title": "Address", + "description": "Address to connect to.", + "type": "string", + "examples": ["localhost,127.0.0.1"], + "order": 3 + }, + "port": { + "title": "Port", + "description": "Port of Cassandra.", + "type": "integer", + "minimum": 0, + "maximum": 65536, + "default": 9042, + "order": 4 + }, + "datacenter": { + "title": "Datacenter", + "description": "Datacenter of the cassandra cluster.", + "type": "string", + "default": "datacenter1", + "order": 5 + }, + "replication": { + "title": "Replication factor", + "type": "integer", + "description": "Indicates to how many nodes the data should be replicated to.", + "default": 1, + "order": 6 + } + } + } +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraContainerInitializr.java b/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraContainerInitializr.java new file mode 100644 index 0000000000000..145a8f89da976 --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraContainerInitializr.java @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.cassandra; + +import org.testcontainers.containers.CassandraContainer; + +class CassandraContainerInitializr { + + private static ConfiguredCassandraContainer cassandraContainer; + + private CassandraContainerInitializr() { + + } + + public static ConfiguredCassandraContainer initContainer() { + if (cassandraContainer == null) { + cassandraContainer = new ConfiguredCassandraContainer(); + } + cassandraContainer.start(); + return cassandraContainer; + } + + public static class ConfiguredCassandraContainer extends CassandraContainer { + + ConfiguredCassandraContainer() { + // latest compatible version with the internal testcontainers datastax driver. + super("cassandra:3.11.11"); + } + + } + +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraCqlProviderIT.java b/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraCqlProviderIT.java new file mode 100644 index 0000000000000..b5e38367510d9 --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraCqlProviderIT.java @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.cassandra; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import com.datastax.oss.driver.api.core.servererrors.InvalidQueryException; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; + +@TestInstance(TestInstance.Lifecycle.PER_CLASS) +class CassandraCqlProviderIT { + + private static final String CASSANDRA_KEYSPACE = "cassandra_keyspace"; + + private static final String CASSANDRA_TABLE = "cassandra_table"; + + private CassandraCqlProvider cassandraCqlProvider; + + private CassandraNameTransformer nameTransformer; + + @BeforeAll + void setup() { + var cassandraContainer = CassandraContainerInitializr.initContainer(); + var cassandraConfig = TestDataFactory.createCassandraConfig( + cassandraContainer.getUsername(), + cassandraContainer.getPassword(), + cassandraContainer.getHost(), + cassandraContainer.getFirstMappedPort()); + this.cassandraCqlProvider = new CassandraCqlProvider(cassandraConfig); + this.nameTransformer = new CassandraNameTransformer(cassandraConfig); + cassandraCqlProvider.createKeySpaceIfNotExists(CASSANDRA_KEYSPACE, 1); + cassandraCqlProvider.createTableIfNotExists(CASSANDRA_KEYSPACE, CASSANDRA_TABLE); + } + + @AfterEach + void clean() { + cassandraCqlProvider.truncate(CASSANDRA_KEYSPACE, CASSANDRA_TABLE); + } + + @Test + void testCreateKeySpaceIfNotExists() { + String keyspace = nameTransformer.outputKeyspace("test_keyspace"); + assertDoesNotThrow(() -> cassandraCqlProvider.createKeySpaceIfNotExists(keyspace, 1)); + } + + @Test + void testCreateTableIfNotExists() { + String table = nameTransformer.outputTable("test_stream"); + assertDoesNotThrow(() -> cassandraCqlProvider.createTableIfNotExists(CASSANDRA_KEYSPACE, table)); + } + + @Test + void testInsert() { + // given + cassandraCqlProvider.insert(CASSANDRA_KEYSPACE, CASSANDRA_TABLE, "{\"property\":\"data1\"}"); + cassandraCqlProvider.insert(CASSANDRA_KEYSPACE, CASSANDRA_TABLE, "{\"property\":\"data2\"}"); + cassandraCqlProvider.insert(CASSANDRA_KEYSPACE, CASSANDRA_TABLE, "{\"property\":\"data3\"}"); + + // when + var resultSet = cassandraCqlProvider.select(CASSANDRA_KEYSPACE, CASSANDRA_TABLE); + + // then + assertThat(resultSet) + .isNotNull() + .hasSize(3) + .anyMatch(r -> r.getData().equals("{\"property\":\"data1\"}")) + .anyMatch(r -> r.getData().equals("{\"property\":\"data2\"}")) + .anyMatch(r -> r.getData().equals("{\"property\":\"data3\"}")); + + } + + @Test + void testTruncate() { + // given + cassandraCqlProvider.insert(CASSANDRA_KEYSPACE, CASSANDRA_TABLE, "{\"property\":\"data1\"}"); + cassandraCqlProvider.insert(CASSANDRA_KEYSPACE, CASSANDRA_TABLE, "{\"property\":\"data2\"}"); + cassandraCqlProvider.insert(CASSANDRA_KEYSPACE, CASSANDRA_TABLE, "{\"property\":\"data3\"}"); + + // when + cassandraCqlProvider.truncate(CASSANDRA_KEYSPACE, CASSANDRA_TABLE); + var resultSet = cassandraCqlProvider.select(CASSANDRA_KEYSPACE, CASSANDRA_TABLE); + + // then + assertThat(resultSet) + .isNotNull() + .isEmpty(); + } + + @Test + void testDropTableIfExists() { + // given + String table = nameTransformer.outputTmpTable("test_stream"); + cassandraCqlProvider.createTableIfNotExists(CASSANDRA_KEYSPACE, table); + + // when + cassandraCqlProvider.dropTableIfExists(CASSANDRA_KEYSPACE, table); + + // then + assertThrows(InvalidQueryException.class, () -> cassandraCqlProvider.select(CASSANDRA_KEYSPACE, table)); + } + + @Test + void testCopy() { + // given + String tmpTable = nameTransformer.outputTmpTable("test_stream_copy"); + cassandraCqlProvider.createTableIfNotExists(CASSANDRA_KEYSPACE, tmpTable); + cassandraCqlProvider.insert(CASSANDRA_KEYSPACE, tmpTable, "{\"property\":\"data1\"}"); + cassandraCqlProvider.insert(CASSANDRA_KEYSPACE, tmpTable, "{\"property\":\"data2\"}"); + cassandraCqlProvider.insert(CASSANDRA_KEYSPACE, tmpTable, "{\"property\":\"data3\"}"); + + String rawTable = nameTransformer.outputTable("test_stream_copy"); + cassandraCqlProvider.createTableIfNotExists(CASSANDRA_KEYSPACE, rawTable); + + // when + cassandraCqlProvider.copy(CASSANDRA_KEYSPACE, tmpTable, rawTable); + var resultSet = cassandraCqlProvider.select(CASSANDRA_KEYSPACE, rawTable); + + // then + assertThat(resultSet) + .isNotNull() + .hasSize(3) + .anyMatch(r -> r.getData().equals("{\"property\":\"data1\"}")) + .anyMatch(r -> r.getData().equals("{\"property\":\"data2\"}")) + .anyMatch(r -> r.getData().equals("{\"property\":\"data3\"}")); + + } + +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraDestinationAcceptanceTest.java new file mode 100644 index 0000000000000..83e7846c1ef2f --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraDestinationAcceptanceTest.java @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.cassandra; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.standardtest.destination.DestinationAcceptanceTest; +import java.util.Comparator; +import java.util.List; +import java.util.stream.Collectors; +import org.junit.jupiter.api.BeforeAll; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class CassandraDestinationAcceptanceTest extends DestinationAcceptanceTest { + + private static final Logger LOGGER = LoggerFactory.getLogger(CassandraDestinationAcceptanceTest.class); + + private JsonNode configJson; + + private CassandraCqlProvider cassandraCqlProvider; + + private CassandraNameTransformer cassandraNameTransformer; + + private static CassandraContainerInitializr.ConfiguredCassandraContainer cassandraContainer; + + @BeforeAll + static void initContainer() { + cassandraContainer = CassandraContainerInitializr.initContainer(); + } + + @Override + protected void setup(TestDestinationEnv testEnv) { + configJson = TestDataFactory.createJsonConfig( + cassandraContainer.getUsername(), + cassandraContainer.getPassword(), + cassandraContainer.getHost(), + cassandraContainer.getFirstMappedPort()); + var cassandraConfig = new CassandraConfig(configJson); + cassandraCqlProvider = new CassandraCqlProvider(cassandraConfig); + cassandraNameTransformer = new CassandraNameTransformer(cassandraConfig); + } + + @Override + protected void tearDown(TestDestinationEnv testEnv) { + cassandraCqlProvider.retrieveMetadata().forEach(meta -> { + var keyspace = meta.value1(); + meta.value2().forEach(table -> cassandraCqlProvider.truncate(keyspace, table)); + }); + } + + @Override + protected String getImageName() { + return "airbyte/destination-cassandra:dev"; + } + + @Override + protected JsonNode getConfig() { + return configJson; + } + + @Override + protected boolean implementsNamespaces() { + return true; + } + + @Override + protected JsonNode getFailCheckConfig() { + return TestDataFactory.createJsonConfig( + "usr", + "pw", + "127.0.192.1", + 8080); + } + + @Override + protected List retrieveRecords(TestDestinationEnv testEnv, + String streamName, + String namespace, + JsonNode streamSchema) { + var keyspace = cassandraNameTransformer.outputKeyspace(namespace); + var table = cassandraNameTransformer.outputTable(streamName); + return cassandraCqlProvider.select(keyspace, table).stream() + .sorted(Comparator.comparing(CassandraRecord::getTimestamp)) + .map(CassandraRecord::getData) + .map(Jsons::deserialize) + .collect(Collectors.toList()); + } + +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraDestinationIT.java b/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraDestinationIT.java new file mode 100644 index 0000000000000..715900d9555bf --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraDestinationIT.java @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.cassandra; + +import static org.assertj.core.api.Assertions.assertThat; + +import io.airbyte.integrations.destination.cassandra.CassandraContainerInitializr.ConfiguredCassandraContainer; +import io.airbyte.protocol.models.AirbyteConnectionStatus; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; + +@TestInstance(TestInstance.Lifecycle.PER_CLASS) +class CassandraDestinationIT { + + private CassandraDestination cassandraDestination; + + private ConfiguredCassandraContainer cassandraContainer; + + @BeforeAll + void setup() { + this.cassandraContainer = CassandraContainerInitializr.initContainer(); + this.cassandraDestination = new CassandraDestination(); + } + + @Test + void testCheckWithStatusSucceeded() { + + var jsonConfiguration = TestDataFactory.createJsonConfig( + cassandraContainer.getUsername(), + cassandraContainer.getPassword(), + cassandraContainer.getHost(), + cassandraContainer.getFirstMappedPort()); + + var connectionStatus = cassandraDestination.check(jsonConfiguration); + + assertThat(connectionStatus.getStatus()).isEqualTo(AirbyteConnectionStatus.Status.SUCCEEDED); + } + + @Test + void testCheckWithStatusFailed() { + + var jsonConfiguration = TestDataFactory.createJsonConfig( + "usr", + "pw", + "192.0.2.1", + 8080); + + var connectionStatus = cassandraDestination.check(jsonConfiguration); + + assertThat(connectionStatus.getStatus()).isEqualTo(AirbyteConnectionStatus.Status.FAILED); + + } + +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraMessageConsumerIT.java b/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraMessageConsumerIT.java new file mode 100644 index 0000000000000..6e065affcde86 --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraMessageConsumerIT.java @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.cassandra; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; +import io.airbyte.commons.json.Jsons; +import io.airbyte.protocol.models.AirbyteMessage; +import io.airbyte.protocol.models.DestinationSyncMode; +import java.util.function.Function; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.MethodOrderer.OrderAnnotation; +import org.junit.jupiter.api.Order; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; +import org.junit.jupiter.api.TestMethodOrder; + +@TestMethodOrder(OrderAnnotation.class) +@TestInstance(TestInstance.Lifecycle.PER_CLASS) +class CassandraMessageConsumerIT { + + private static final String AIRBYTE_NAMESPACE_1 = "airbyte_namespace_1"; + private static final String AIRBYTE_NAMESPACE_2 = "airbyte_namespace_2"; + + private static final String AIRBYTE_STREAM_1 = "airbyte_stream_1"; + private static final String AIRBYTE_STREAM_2 = "airbyte_stream_2"; + + private CassandraMessageConsumer cassandraMessageConsumer; + + private CassandraCqlProvider cassandraCqlProvider; + + private CassandraNameTransformer nameTransformer; + + @BeforeAll + void setup() { + var cassandraContainer = CassandraContainerInitializr.initContainer(); + var cassandraConfig = TestDataFactory.createCassandraConfig( + cassandraContainer.getUsername(), + cassandraContainer.getPassword(), + cassandraContainer.getHost(), + cassandraContainer.getFirstMappedPort()); + + var stream1 = TestDataFactory.createAirbyteStream(AIRBYTE_STREAM_1, AIRBYTE_NAMESPACE_1); + var stream2 = TestDataFactory.createAirbyteStream(AIRBYTE_STREAM_2, AIRBYTE_NAMESPACE_2); + + var cStream1 = TestDataFactory.createConfiguredAirbyteStream(DestinationSyncMode.APPEND, stream1); + var cStream2 = TestDataFactory.createConfiguredAirbyteStream(DestinationSyncMode.OVERWRITE, stream2); + + var catalog = TestDataFactory.createConfiguredAirbyteCatalog(cStream1, cStream2); + + cassandraMessageConsumer = new CassandraMessageConsumer(cassandraConfig, catalog, message -> {}); + cassandraCqlProvider = new CassandraCqlProvider(cassandraConfig); + nameTransformer = new CassandraNameTransformer(cassandraConfig); + } + + @Test + @Order(1) + void testStartTracked() { + + assertDoesNotThrow(() -> cassandraMessageConsumer.startTracked()); + + } + + @Test + @Order(2) + void testAcceptTracked() { + + Function function = + data -> Jsons.jsonNode(ImmutableMap.builder().put("property", data).build()); + + assertDoesNotThrow(() -> { + cassandraMessageConsumer.acceptTracked( + TestDataFactory.createAirbyteMessage(AirbyteMessage.Type.RECORD, AIRBYTE_STREAM_1, AIRBYTE_NAMESPACE_1, + function.apply("data1"))); + cassandraMessageConsumer.acceptTracked( + TestDataFactory.createAirbyteMessage(AirbyteMessage.Type.RECORD, AIRBYTE_STREAM_1, AIRBYTE_NAMESPACE_1, + function.apply("data2"))); + cassandraMessageConsumer.acceptTracked( + TestDataFactory.createAirbyteMessage(AirbyteMessage.Type.RECORD, AIRBYTE_STREAM_2, AIRBYTE_NAMESPACE_2, + function.apply("data3"))); + cassandraMessageConsumer.acceptTracked( + TestDataFactory.createAirbyteMessage(AirbyteMessage.Type.RECORD, AIRBYTE_STREAM_2, AIRBYTE_NAMESPACE_2, + function.apply("data4"))); + cassandraMessageConsumer.acceptTracked( + TestDataFactory.createAirbyteMessage(AirbyteMessage.Type.STATE, AIRBYTE_STREAM_2, AIRBYTE_NAMESPACE_2, + function.apply("data5"))); + }); + + } + + @Test + @Order(3) + void testClose() { + + assertDoesNotThrow(() -> cassandraMessageConsumer.close(false)); + + } + + @Test + @Order(4) + void testFinalState() { + + var keyspace1 = nameTransformer.outputKeyspace(AIRBYTE_NAMESPACE_1); + var keyspace2 = nameTransformer.outputKeyspace(AIRBYTE_NAMESPACE_2); + var table1 = nameTransformer.outputTable(AIRBYTE_STREAM_1); + var table2 = nameTransformer.outputTable(AIRBYTE_STREAM_2); + + var resultSet1 = cassandraCqlProvider.select(keyspace1, table1); + var resultSet2 = cassandraCqlProvider.select(keyspace2, table2); + + assertThat(resultSet1) + .isNotNull() + .hasSize(2) + .anyMatch(r -> r.getData().equals("{\"property\":\"data1\"}")) + .anyMatch(r -> r.getData().equals("{\"property\":\"data2\"}")); + + assertThat(resultSet2) + .isNotNull() + .hasSize(2) + .anyMatch(r -> r.getData().equals("{\"property\":\"data3\"}")) + .anyMatch(r -> r.getData().equals("{\"property\":\"data4\"}")); + + } + +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/TestDataFactory.java b/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/TestDataFactory.java new file mode 100644 index 0000000000000..b460b6963314a --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/TestDataFactory.java @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.cassandra; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; +import io.airbyte.commons.json.Jsons; +import io.airbyte.protocol.models.AirbyteMessage; +import io.airbyte.protocol.models.AirbyteRecordMessage; +import io.airbyte.protocol.models.AirbyteStream; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.ConfiguredAirbyteStream; +import io.airbyte.protocol.models.DestinationSyncMode; +import java.time.Instant; +import java.util.List; + +public class TestDataFactory { + + private TestDataFactory() { + + } + + static CassandraConfig createCassandraConfig(String username, String password, String address, int port) { + return new CassandraConfig( + "default_keyspace", + username, + password, + address, + port, + "datacenter1", + 1); + } + + static JsonNode createJsonConfig(String username, String password, String address, int port) { + return Jsons.jsonNode(ImmutableMap.builder() + .put("keyspace", "default_keyspace") + .put("username", username) + .put("password", password) + .put("address", address) + .put("port", port) + .put("datacenter", "datacenter1") + .put("replication", 1) + .build()); + } + + static AirbyteMessage createAirbyteMessage(AirbyteMessage.Type type, + String streamName, + String namespace, + JsonNode data) { + return new AirbyteMessage() + .withType(type) + .withRecord(new AirbyteRecordMessage() + .withStream(streamName) + .withNamespace(namespace) + .withData(data) + .withEmittedAt(Instant.now().toEpochMilli())); + } + + static AirbyteStream createAirbyteStream(String name, String namespace) { + return new AirbyteStream() + .withName(name) + .withNamespace(namespace); + } + + static ConfiguredAirbyteStream createConfiguredAirbyteStream(DestinationSyncMode syncMode, AirbyteStream stream) { + return new ConfiguredAirbyteStream() + .withDestinationSyncMode(syncMode) + .withStream(stream); + } + + static ConfiguredAirbyteCatalog createConfiguredAirbyteCatalog(ConfiguredAirbyteStream... configuredStreams) { + return new ConfiguredAirbyteCatalog().withStreams(List.of(configuredStreams)); + } + +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/test/java/io/airbyte/integrations/destination/cassandra/CassandraConfigTest.java b/airbyte-integrations/connectors/destination-cassandra/src/test/java/io/airbyte/integrations/destination/cassandra/CassandraConfigTest.java new file mode 100644 index 0000000000000..a7249d2814a9a --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/test/java/io/airbyte/integrations/destination/cassandra/CassandraConfigTest.java @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.cassandra; + +import static org.assertj.core.api.Assertions.assertThat; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +class CassandraConfigTest { + + private CassandraConfig cassandraConfig; + + @BeforeEach + void setup() { + var jsonNode = TestDataFactory.createJsonConfig( + "usr", + "pw", + "127.0.0.1", + 9042); + this.cassandraConfig = new CassandraConfig(jsonNode); + } + + @Test + void testConfig() { + + assertThat(cassandraConfig) + .hasFieldOrPropertyWithValue("keyspace", "default_keyspace") + .hasFieldOrPropertyWithValue("username", "usr") + .hasFieldOrPropertyWithValue("password", "pw") + .hasFieldOrPropertyWithValue("address", "127.0.0.1") + .hasFieldOrPropertyWithValue("port", 9042) + .hasFieldOrPropertyWithValue("datacenter", "datacenter1") + .hasFieldOrPropertyWithValue("replication", 1); + + } + +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/test/java/io/airbyte/integrations/destination/cassandra/CassandraNameTransformerTest.java b/airbyte-integrations/connectors/destination-cassandra/src/test/java/io/airbyte/integrations/destination/cassandra/CassandraNameTransformerTest.java new file mode 100644 index 0000000000000..b456ace632f1c --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/test/java/io/airbyte/integrations/destination/cassandra/CassandraNameTransformerTest.java @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.cassandra; + +import static org.assertj.core.api.Assertions.assertThat; + +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; + +@TestInstance(TestInstance.Lifecycle.PER_CLASS) +class CassandraNameTransformerTest { + + private CassandraNameTransformer cassandraNameTransformer; + + @BeforeAll + void setup() { + var cassandraConfig = TestDataFactory.createCassandraConfig( + "usr", + "pw", + "127.0.0.1", + 9042); + this.cassandraNameTransformer = new CassandraNameTransformer(cassandraConfig); + } + + @Test + void testOutputTable() { + + var table = cassandraNameTransformer.outputTable("stream_name"); + + assertThat(table).matches("airbyte_raw_stream_name"); + + } + + @Test + void testOutputTmpTable() { + + var table = cassandraNameTransformer.outputTmpTable("stream_name"); + + assertThat(table).matches("airbyte_tmp_+[a-z]+_stream_name"); + + } + + @Test + void testOutputKeyspace() { + + var keyspace = cassandraNameTransformer.outputKeyspace("***keyspace^h"); + + assertThat(keyspace).matches("keyspace_h"); + + } + + @Test + void outputColumn() { + + var column = cassandraNameTransformer.outputColumn("_airbyte_data"); + + assertThat(column).matches("\"_airbyte_data\""); + + } + +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/test/java/io/airbyte/integrations/destination/cassandra/TestDataFactory.java b/airbyte-integrations/connectors/destination-cassandra/src/test/java/io/airbyte/integrations/destination/cassandra/TestDataFactory.java new file mode 100644 index 0000000000000..b460b6963314a --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/test/java/io/airbyte/integrations/destination/cassandra/TestDataFactory.java @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.cassandra; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; +import io.airbyte.commons.json.Jsons; +import io.airbyte.protocol.models.AirbyteMessage; +import io.airbyte.protocol.models.AirbyteRecordMessage; +import io.airbyte.protocol.models.AirbyteStream; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.ConfiguredAirbyteStream; +import io.airbyte.protocol.models.DestinationSyncMode; +import java.time.Instant; +import java.util.List; + +public class TestDataFactory { + + private TestDataFactory() { + + } + + static CassandraConfig createCassandraConfig(String username, String password, String address, int port) { + return new CassandraConfig( + "default_keyspace", + username, + password, + address, + port, + "datacenter1", + 1); + } + + static JsonNode createJsonConfig(String username, String password, String address, int port) { + return Jsons.jsonNode(ImmutableMap.builder() + .put("keyspace", "default_keyspace") + .put("username", username) + .put("password", password) + .put("address", address) + .put("port", port) + .put("datacenter", "datacenter1") + .put("replication", 1) + .build()); + } + + static AirbyteMessage createAirbyteMessage(AirbyteMessage.Type type, + String streamName, + String namespace, + JsonNode data) { + return new AirbyteMessage() + .withType(type) + .withRecord(new AirbyteRecordMessage() + .withStream(streamName) + .withNamespace(namespace) + .withData(data) + .withEmittedAt(Instant.now().toEpochMilli())); + } + + static AirbyteStream createAirbyteStream(String name, String namespace) { + return new AirbyteStream() + .withName(name) + .withNamespace(namespace); + } + + static ConfiguredAirbyteStream createConfiguredAirbyteStream(DestinationSyncMode syncMode, AirbyteStream stream) { + return new ConfiguredAirbyteStream() + .withDestinationSyncMode(syncMode) + .withStream(stream); + } + + static ConfiguredAirbyteCatalog createConfiguredAirbyteCatalog(ConfiguredAirbyteStream... configuredStreams) { + return new ConfiguredAirbyteCatalog().withStreams(List.of(configuredStreams)); + } + +} diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index b8040b0007076..f8737de227ff3 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -162,6 +162,7 @@ * [Redshift](integrations/destinations/redshift.md) * [S3](integrations/destinations/s3.md) * [Snowflake](integrations/destinations/snowflake.md) + * [Cassandra](integrations/destinations/cassandra.md) * [Custom or New Connector](integrations/custom-connectors.md) * [Connector Development](connector-development/README.md) * [Tutorials](connector-development/tutorials/README.md) diff --git a/docs/integrations/README.md b/docs/integrations/README.md index c3c783e0dcbea..e474a0b0d6299 100644 --- a/docs/integrations/README.md +++ b/docs/integrations/README.md @@ -147,4 +147,5 @@ Airbyte uses a grading system for connectors to help users understand what to ex | [S3](destinations/s3.md) | Certified | | [SQL Server \(MSSQL\)](destinations/mssql.md) | Alpha | | [Snowflake](destinations/snowflake.md) | Certified | +| [Cassandra](destinations/cassandra.md) | Alpha | diff --git a/docs/integrations/destinations/cassandra.md b/docs/integrations/destinations/cassandra.md new file mode 100644 index 0000000000000..2280daf9da0ab --- /dev/null +++ b/docs/integrations/destinations/cassandra.md @@ -0,0 +1,49 @@ +# Cassandra + +## Sync overview + +### Output schema + +The incoming airbyte data is structured in keyspaces and tables and is partitioned and replicated across different nodes +in the cluster. This connector maps an incoming `stream` to a Cassandra `table` and a `namespace` to a +Cassandra`keyspace`. Fields in the airbyte message become different columns in the Cassandra tables. Each table will +contain the following columns. + +* `_airbyte_ab_id`: A random uuid generator to be used as a partition key. +* `_airbyte_emitted_at`: a timestamp representing when the event was received from the data source. +* `_airbyte_data`: a json text representing the extracted data. + +### Features + +| Feature | Support | Notes | +| :--- | :---: | :--- | +| Full Refresh Sync | ✅ | Warning: this mode deletes all previously synced data in the configured DynamoDB table. | +| Incremental - Append Sync | ✅ | | +| Incremental - Deduped History | ❌ | As this connector does not support dbt, we don't support this sync mode on this destination. | +| Namespaces | ✅ | Namespace will be used as part of the table name. | + + + +### Performance considerations + +Cassandra is designed to handle large amounts of data by using different nodes in the cluster in order to perform write +operations. As long as you have enough nodes in the cluster the database can scale infinitely and handle any amount of +data from the connector. + +## Getting started + +### Requirements + +* The driver is compatible with _Cassandra >= 2.1_ +* Configuration + * Keyspace [default keyspace to use when writing data] + * Username [authentication username] + * Password [authentication password] + * Address [cluster address] + * Port [default: 9042] + * Datacenter [optional] [default: datacenter1] + * Replication [optional] [default: 1] + +### Setup guide + +######TODO: more info, screenshots?, etc... From 9f750c2d0e7c85b48c473155a075d402d4bbcac4 Mon Sep 17 00:00:00 2001 From: Marcos Marx Date: Fri, 5 Nov 2021 19:29:07 -0300 Subject: [PATCH 62/83] Publish PR 7186: config files for destination cassandra (#7685) * add cassandra destination connector * refactor and docs. * delete test dockerfile * revert Dockerfile rm change * refactor & fix acceptance tests & format * revert stream peek * remove get pip * add address example * improved copy and code refactor * add docker-compose and improved docs * config files Co-authored-by: itaseski Co-authored-by: itaseskii --- .../seed/destination_definitions.yaml | 5 + .../resources/seed/destination_specs.yaml | 65 ++++++++++++ .../src/main/resources/seed/source_specs.yaml | 99 ++++++++++++++++--- 3 files changed, 154 insertions(+), 15 deletions(-) diff --git a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml index f28327960f430..e65178b5d71fb 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml @@ -13,6 +13,11 @@ dockerRepository: airbyte/destination-bigquery-denormalized dockerImageTag: 0.1.8 documentationUrl: https://docs.airbyte.io/integrations/destinations/bigquery +- name: Cassandra + destinationDefinitionId: 707456df-6f4f-4ced-b5c6-03f73bcad1c5 + dockerRepository: airbyte/destination-cassandra + dockerImageTag: 0.1.0 + documentationUrl: https://docs.airbyte.io/integrations/destinations/cassandra - name: Chargify (Keen) destinationDefinitionId: 81740ce8-d764-4ea7-94df-16bb41de36ae dockerRepository: airbyte/destination-keen diff --git a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml index a07a77ba31e3e..9246d56e3ab82 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml @@ -326,6 +326,71 @@ supported_destination_sync_modes: - "overwrite" - "append" +- dockerImage: "airbyte/destination-cassandra:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/cassandra" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Cassandra Destination Spec" + type: "object" + required: + - "keyspace" + - "username" + - "password" + - "address" + - "port" + additionalProperties: true + properties: + keyspace: + title: "Keyspace" + description: "Default Cassandra keyspace to create data in." + type: "string" + order: 0 + username: + title: "Username" + description: "Username to use to access Cassandra." + type: "string" + order: 1 + password: + title: "Password" + description: "Password associated with Cassandra." + type: "string" + airbyte_secret: true + order: 2 + address: + title: "Address" + description: "Address to connect to." + type: "string" + examples: + - "localhost,127.0.0.1" + order: 3 + port: + title: "Port" + description: "Port of Cassandra." + type: "integer" + minimum: 0 + maximum: 65536 + default: 9042 + order: 4 + datacenter: + title: "Datacenter" + description: "Datacenter of the cassandra cluster." + type: "string" + default: "datacenter1" + order: 5 + replication: + title: "Replication factor" + type: "integer" + description: "Indicates to how many nodes the data should be replicated\ + \ to." + default: 1 + order: 6 + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "overwrite" + - "append" - dockerImage: "airbyte/destination-keen:0.2.0" spec: documentationUrl: "https://docs.airbyte.io/integrations/destinations/keen" diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index b28322703efe8..cafaf0adb6ad1 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -1934,31 +1934,100 @@ oauthFlowOutputParameters: - - "access_token" - - "refresh_token" -- dockerImage: "airbyte/source-google-directory:0.1.6" +- dockerImage: "airbyte/source-google-directory:0.1.8" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/google-directory" connectionSpecification: $schema: "http://json-schema.org/draft-07/schema#" title: "Google Directory Spec" type: "object" - required: - - "credentials_json" - - "email" - additionalProperties: false + required: [] + additionalProperties: true properties: - credentials_json: - type: "string" - description: "The contents of the JSON service account key. See the docs for more information on how to generate this key." - airbyte_secret: true - email: - type: "string" - description: "The email of the user, which has permissions to access the\ - \ Google Workspace Admin APIs." + credentials: + title: "Google Credentials" + description: "Google APIs use the OAuth 2.0 protocol for authentication\ + \ and authorization. The Source supports Web server application and Service accounts scenarios" + type: "object" + oneOf: + - title: "Sign in via Google (Oauth)" + description: "For these scenario user only needs to give permission to\ + \ read Google Directory data" + type: "object" + required: + - "client_id" + - "client_secret" + - "refresh_token" + properties: + credentials_title: + type: "string" + title: "Credentials title" + description: "Authentication scenario" + const: "Web server app" + enum: + - "Web server app" + default: "Web server app" + order: 0 + client_id: + title: "Client ID" + type: "string" + description: "The client ID of developer application" + airbyte_secret: true + client_secret: + title: "Client secret" + type: "string" + description: "The client secret of developer application" + airbyte_secret: true + refresh_token: + title: "Refresh Token" + type: "string" + description: "The token for obtaining new access token" + airbyte_secret: true + - title: "Service account Key" + description: "For these scenario user should obtain service account's\ + \ credentials from the Google API Console and provide delegated email" + type: "object" + required: + - "credentials_json" + - "email" + properties: + credentials_title: + type: "string" + title: "Credentials title" + description: "Authentication scenario" + const: "Service accounts" + enum: + - "Service accounts" + default: "Service accounts" + order: 0 + credentials_json: + type: "string" + title: "Credentials JSON" + description: "The contents of the JSON service account key. See the\ + \ docs for more information on how to generate this key." + airbyte_secret: true + email: + type: "string" + title: "Email" + description: "The email of the user, which has permissions to access\ + \ the Google Workspace Admin APIs." supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] + authSpecification: + auth_type: "oauth2.0" + oauth2Specification: + rootObject: + - "credentials" + - "0" + oauthFlowInitParameters: + - - "client_id" + - - "client_secret" + oauthFlowOutputParameters: + - - "refresh_token" - dockerImage: "airbyte/source-google-search-console:0.1.6" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/google-search-console" From 2efef3c04d4d652bfa8d8f5dfb1ef69af0abe450 Mon Sep 17 00:00:00 2001 From: Yevhenii <34103125+yevhenii-ldv@users.noreply.github.com> Date: Sat, 6 Nov 2021 00:42:06 +0200 Subject: [PATCH 63/83] =?UTF-8?q?=F0=9F=90=9B=20Source=20Salesforce:=20Fix?= =?UTF-8?q?=20getting=20`anyType`=20fields=20using=20BULK=20API=20(#7592)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Source Salesforce: Fix getting anyType fields using BULK API --- .../b117307c-14b6-41aa-9422-947e34922962.json | 2 +- .../resources/seed/source_definitions.yaml | 2 +- .../connectors/source-salesforce/BOOTSTRAP.md | 13 ++++++++++ .../connectors/source-salesforce/Dockerfile | 2 +- .../configured_catalog_bulk.json | 24 ------------------- .../configured_catalog_rest.json | 24 ------------------- .../source_salesforce/spec.json | 2 +- .../source_salesforce/streams.py | 12 +++------- docs/integrations/sources/salesforce.md | 1 + 9 files changed, 21 insertions(+), 61 deletions(-) diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/b117307c-14b6-41aa-9422-947e34922962.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/b117307c-14b6-41aa-9422-947e34922962.json index 9cf14666e7b14..b0f7d72a5c777 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/b117307c-14b6-41aa-9422-947e34922962.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/b117307c-14b6-41aa-9422-947e34922962.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "b117307c-14b6-41aa-9422-947e34922962", "name": "Salesforce", "dockerRepository": "airbyte/source-salesforce", - "dockerImageTag": "0.1.2", + "dockerImageTag": "0.1.3", "documentationUrl": "https://docs.airbyte.io/integrations/sources/salesforce", "icon": "salesforce.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index e3ab0f83124c1..db629e47be54d 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -468,7 +468,7 @@ - name: Salesforce sourceDefinitionId: b117307c-14b6-41aa-9422-947e34922962 dockerRepository: airbyte/source-salesforce - dockerImageTag: 0.1.2 + dockerImageTag: 0.1.3 documentationUrl: https://docs.airbyte.io/integrations/sources/salesforce icon: salesforce.svg sourceType: api diff --git a/airbyte-integrations/connectors/source-salesforce/BOOTSTRAP.md b/airbyte-integrations/connectors/source-salesforce/BOOTSTRAP.md index d08b9bdb0f83d..943fb5c4e4f93 100644 --- a/airbyte-integrations/connectors/source-salesforce/BOOTSTRAP.md +++ b/airbyte-integrations/connectors/source-salesforce/BOOTSTRAP.md @@ -10,6 +10,19 @@ There are two types of objects: To query an object, one must use [SOQL](https://developer.salesforce.com/docs/atlas.en-us.api_rest.meta/api_rest/dome_query.htm), Salesforce’s proprietary SQL language. An example might be `SELECT * FROM WHERE SystemModstamp > 2122-01-18T21:18:20.000Z`. +Because the `Salesforce` connector pulls all objects from `Salesforce` dynamically, then all streams are dynamically generated accordingly. +And at the stage of creating a schema for each stream, we understand whether the stream is dynamic or not (if the stream has one of the +following fields: `SystemModstamp`, `LastModifiedDate`, `CreatedDate`, `LoginTime`, then it is dynamic). +Based on this data, for streams that have information about record updates - we filter by `updated at`, and for streams that have information +only about the date of creation of the record (as in the case of streams that have only the `CreatedDate` field) - we filter by `created at`. +And we assign the Cursor as follows: +``` +@property +def cursor_field(self) -> str: + return self.replication_key +``` +`replication_key` is one of the following values: `SystemModstamp`, `LastModifiedDate`, `CreatedDate`, `LoginTime`. + In addition there are two types of APIs exposed by Salesforce: * **[REST API](https://developer.salesforce.com/docs/atlas.en-us.api_rest.meta/api_rest/dome_queryall.htm)**: completely synchronous * **[BULK API](https://developer.salesforce.com/docs/atlas.en-us.api_asynch.meta/api_asynch/queries.htm)**: has larger rate limit allowance (150k objects per day on the standard plan) but is asynchronous and therefore follows a request-poll-wait pattern. diff --git a/airbyte-integrations/connectors/source-salesforce/Dockerfile b/airbyte-integrations/connectors/source-salesforce/Dockerfile index 4779533f45117..47ba807177208 100644 --- a/airbyte-integrations/connectors/source-salesforce/Dockerfile +++ b/airbyte-integrations/connectors/source-salesforce/Dockerfile @@ -12,5 +12,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.2 +LABEL io.airbyte.version=0.1.3 LABEL io.airbyte.name=airbyte/source-salesforce diff --git a/airbyte-integrations/connectors/source-salesforce/integration_tests/configured_catalog_bulk.json b/airbyte-integrations/connectors/source-salesforce/integration_tests/configured_catalog_bulk.json index 69da9893a876b..0088a9218122b 100644 --- a/airbyte-integrations/connectors/source-salesforce/integration_tests/configured_catalog_bulk.json +++ b/airbyte-integrations/connectors/source-salesforce/integration_tests/configured_catalog_bulk.json @@ -80,30 +80,6 @@ "sync_mode": "incremental", "destination_sync_mode": "append" }, - { - "stream": { - "name": "LoginGeo", - "json_schema": {}, - "supported_sync_modes": ["full_refresh", "incremental"], - "source_defined_cursor": true, - "default_cursor_field": ["SystemModstamp"], - "source_defined_primary_key": [["Id"]] - }, - "sync_mode": "incremental", - "destination_sync_mode": "append" - }, - { - "stream": { - "name": "LoginHistory", - "json_schema": {}, - "supported_sync_modes": ["full_refresh", "incremental"], - "source_defined_cursor": true, - "default_cursor_field": ["LoginTime"], - "source_defined_primary_key": [["Id"]] - }, - "sync_mode": "incremental", - "destination_sync_mode": "append" - }, { "stream": { "name": "PermissionSetTabSetting", diff --git a/airbyte-integrations/connectors/source-salesforce/integration_tests/configured_catalog_rest.json b/airbyte-integrations/connectors/source-salesforce/integration_tests/configured_catalog_rest.json index bdf4425c618f5..c1d410e37bf9c 100644 --- a/airbyte-integrations/connectors/source-salesforce/integration_tests/configured_catalog_rest.json +++ b/airbyte-integrations/connectors/source-salesforce/integration_tests/configured_catalog_rest.json @@ -70,30 +70,6 @@ "sync_mode": "incremental", "destination_sync_mode": "append" }, - { - "stream": { - "name": "LoginGeo", - "json_schema": {}, - "supported_sync_modes": ["full_refresh", "incremental"], - "source_defined_cursor": true, - "default_cursor_field": ["SystemModstamp"], - "source_defined_primary_key": [["Id"]] - }, - "sync_mode": "incremental", - "destination_sync_mode": "append" - }, - { - "stream": { - "name": "LoginHistory", - "json_schema": {}, - "supported_sync_modes": ["full_refresh", "incremental"], - "source_defined_cursor": true, - "default_cursor_field": ["LoginTime"], - "source_defined_primary_key": [["Id"]] - }, - "sync_mode": "incremental", - "destination_sync_mode": "append" - }, { "stream": { "name": "PermissionSetTabSetting", diff --git a/airbyte-integrations/connectors/source-salesforce/source_salesforce/spec.json b/airbyte-integrations/connectors/source-salesforce/source_salesforce/spec.json index a676b2a0674b6..a167a53fc2c4d 100644 --- a/airbyte-integrations/connectors/source-salesforce/source_salesforce/spec.json +++ b/airbyte-integrations/connectors/source-salesforce/source_salesforce/spec.json @@ -28,7 +28,7 @@ "airbyte_secret": true }, "start_date": { - "description": "UTC date and time in the format 2017-01-25T00:00:00Z. Any data before this date will not be replicated.", + "description": "UTC date and time in the format 2017-01-25T00:00:00Z. Any data before this date will not be replicated. This field uses the \"updated\" field if available, otherwise the \"created\" fields if they are available for a stream.", "type": "string", "pattern": "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$", "examples": ["2021-07-25T00:00:00Z"] diff --git a/airbyte-integrations/connectors/source-salesforce/source_salesforce/streams.py b/airbyte-integrations/connectors/source-salesforce/source_salesforce/streams.py index b668bf0c965f4..3c5d44d7e115e 100644 --- a/airbyte-integrations/connectors/source-salesforce/source_salesforce/streams.py +++ b/airbyte-integrations/connectors/source-salesforce/source_salesforce/streams.py @@ -63,7 +63,7 @@ def request_params( selected_properties = { key: value for key, value in selected_properties.items() - if not (("format" in value and value["format"] == "base64") or "object" in value["type"]) + if not (("format" in value and value["format"] == "base64") or ("object" in value["type"] and len(value["type"]) < 3)) } query = f"SELECT {','.join(selected_properties.keys())} FROM {self.name} " @@ -179,13 +179,7 @@ def transform_types(field_types: list = None): """ Convert Jsonschema data types to Python data types. """ - convert_types_map = { - "boolean": bool, - "string": str, - "number": float, - "integer": int, - "object": dict, - } + convert_types_map = {"boolean": bool, "string": str, "number": float, "integer": int, "object": dict, "array": list} return [convert_types_map[field_type] for field_type in field_types if field_type != "null"] for key, value in record.items(): @@ -279,7 +273,7 @@ def request_params( selected_properties = { key: value for key, value in selected_properties.items() - if not (("format" in value and value["format"] == "base64") or "object" in value["type"]) + if not (("format" in value and value["format"] == "base64") or ("object" in value["type"] and len(value["type"]) < 3)) } stream_date = stream_state.get(self.cursor_field) diff --git a/docs/integrations/sources/salesforce.md b/docs/integrations/sources/salesforce.md index 64b27d5c5bf9d..afe7cc996f951 100644 --- a/docs/integrations/sources/salesforce.md +++ b/docs/integrations/sources/salesforce.md @@ -734,6 +734,7 @@ List of available streams: | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.1.3 | 2021-11-06 | [7592](https://github.com/airbytehq/airbyte/pull/7592) | Fix getting `anyType` fields using BULK API | | 0.1.2 | 2021-09-30 | [6438](https://github.com/airbytehq/airbyte/pull/6438) | Annotate Oauth2 flow initialization parameters in connector specification | | 0.1.1 | 2021-09-21 | [6209](https://github.com/airbytehq/airbyte/pull/6209) | Fix bug with pagination for BULK API | | 0.1.0 | 2021-09-08 | [5619](https://github.com/airbytehq/airbyte/pull/5619) | Salesforce Aitbyte-Native Connector | From 16d9d15f5a4bb06907d53bff381a3df66415bfe7 Mon Sep 17 00:00:00 2001 From: Mario Molina Date: Fri, 5 Nov 2021 17:03:27 -0600 Subject: [PATCH 64/83] =?UTF-8?q?=F0=9F=8E=89=20New=20Destination:=20Pulsa?= =?UTF-8?q?r=20(#7315)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Adding Pulsar destination * Add enable_chunking property * Add dependency for DNS resolve for Mac * Updating build.gradle * Adding definition * Refactor and fix tests * Adding new config properties * Reformat code * Close resources when checking connection * Updating doc * Adding more test to build the producer map * Rename brokers property * Avoid emitting null states to the output collector * fix config file * run seed source/destination Co-authored-by: Marcos Marx --- .../2340cbba-358e-11ec-8d3d-0242ac130203.json | 7 + .../seed/destination_definitions.yaml | 5 + .../resources/seed/destination_specs.yaml | 148 ++++++++++++ airbyte-integrations/builds.md | 1 + .../destination-pulsar/.dockerignore | 3 + .../connectors/destination-pulsar/Dockerfile | 12 + .../connectors/destination-pulsar/README.md | 68 ++++++ .../destination-pulsar/build.gradle | 25 ++ .../destination/pulsar/PulsarDestination.java | 96 ++++++++ .../pulsar/PulsarDestinationConfig.java | 114 +++++++++ .../pulsar/PulsarRecordConsumer.java | 121 ++++++++++ .../destination/pulsar/PulsarUtils.java | 40 ++++ .../src/main/resources/spec.json | 137 +++++++++++ .../PulsarDestinationAcceptanceTest.java | 170 +++++++++++++ .../pulsar/PulsarRecordConsumerTest.java | 225 ++++++++++++++++++ docs/SUMMARY.md | 1 + docs/integrations/README.md | 1 + docs/integrations/destinations/pulsar.md | 87 +++++++ 18 files changed, 1261 insertions(+) create mode 100644 airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/2340cbba-358e-11ec-8d3d-0242ac130203.json create mode 100644 airbyte-integrations/connectors/destination-pulsar/.dockerignore create mode 100644 airbyte-integrations/connectors/destination-pulsar/Dockerfile create mode 100644 airbyte-integrations/connectors/destination-pulsar/README.md create mode 100644 airbyte-integrations/connectors/destination-pulsar/build.gradle create mode 100644 airbyte-integrations/connectors/destination-pulsar/src/main/java/io/airbyte/integrations/destination/pulsar/PulsarDestination.java create mode 100644 airbyte-integrations/connectors/destination-pulsar/src/main/java/io/airbyte/integrations/destination/pulsar/PulsarDestinationConfig.java create mode 100644 airbyte-integrations/connectors/destination-pulsar/src/main/java/io/airbyte/integrations/destination/pulsar/PulsarRecordConsumer.java create mode 100644 airbyte-integrations/connectors/destination-pulsar/src/main/java/io/airbyte/integrations/destination/pulsar/PulsarUtils.java create mode 100644 airbyte-integrations/connectors/destination-pulsar/src/main/resources/spec.json create mode 100644 airbyte-integrations/connectors/destination-pulsar/src/test-integration/java/io/airbyte/integrations/destination/pulsar/PulsarDestinationAcceptanceTest.java create mode 100644 airbyte-integrations/connectors/destination-pulsar/src/test/java/io/airbyte/integrations/destination/pulsar/PulsarRecordConsumerTest.java create mode 100644 docs/integrations/destinations/pulsar.md diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/2340cbba-358e-11ec-8d3d-0242ac130203.json b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/2340cbba-358e-11ec-8d3d-0242ac130203.json new file mode 100644 index 0000000000000..d82499161c7b8 --- /dev/null +++ b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/2340cbba-358e-11ec-8d3d-0242ac130203.json @@ -0,0 +1,7 @@ +{ + "destinationDefinitionId": "2340cbba-358e-11ec-8d3d-0242ac130203", + "name": "Pulsar", + "dockerRepository": "airbyte/destination-pulsar", + "dockerImageTag": "0.1.0", + "documentationUrl": "https://docs.airbyte.io/integrations/destinations/pulsar" +} diff --git a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml index e65178b5d71fb..8e9644a95b98b 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml @@ -89,6 +89,11 @@ dockerImageTag: 0.3.11 documentationUrl: https://docs.airbyte.io/integrations/destinations/postgres icon: postgresql.svg +- name: Pulsar + destinationDefinitionId: 2340cbba-358e-11ec-8d3d-0242ac130203 + dockerRepository: airbyte/destination-pulsar + dockerImageTag: 0.1.0 + documentationUrl: https://docs.airbyte.io/integrations/destinations/pulsar - name: Redshift destinationDefinitionId: f7a7d195-377f-cf5b-70a5-be6b819019dc dockerRepository: airbyte/destination-redshift diff --git a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml index 9246d56e3ab82..6142ea259b445 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml @@ -2248,6 +2248,154 @@ - "overwrite" - "append" - "append_dedup" +- dockerImage: "airbyte/destination-pulsar:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/pulsar" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Pulsar Destination Spec" + type: "object" + required: + - "brokers" + - "use_tls" + - "topic_type" + - "topic_tenant" + - "topic_namespace" + - "topic_pattern" + - "compression_type" + - "send_timeout_ms" + - "max_pending_messages" + - "max_pending_messages_across_partitions" + - "batching_enabled" + - "batching_max_messages" + - "batching_max_publish_delay" + - "block_if_queue_full" + additionalProperties: true + properties: + brokers: + title: "Pulsar brokers" + description: "A list of host/port pairs to use for establishing the initial\ + \ connection to the Pulsar cluster." + type: "string" + examples: + - "broker1:6650,broker2:6650" + use_tls: + title: "Use TLS" + description: "Whether to use TLS encryption on the connection." + type: "boolean" + default: false + topic_type: + title: "Topic type" + description: "It identifies type of topic. Pulsar supports two kind of topics:\ + \ persistent and non-persistent. In persistent topic, all messages are\ + \ durably persisted on disk (that means on multiple disks unless the broker\ + \ is standalone), whereas non-persistent topic does not persist message\ + \ into storage disk." + type: "string" + default: "persistent" + enum: + - "persistent" + - "non-persistent" + topic_tenant: + title: "Topic tenant" + description: "The topic tenant within the instance. Tenants are essential\ + \ to multi-tenancy in Pulsar, and spread across clusters." + type: "string" + default: "public" + examples: + - "public" + topic_namespace: + title: "Topic namespace" + description: "The administrative unit of the topic, which acts as a grouping\ + \ mechanism for related topics. Most topic configuration is performed\ + \ at the namespace level. Each tenant has one or multiple namespaces." + type: "string" + default: "default" + examples: + - "default" + topic_pattern: + title: "Topic pattern" + description: "Topic pattern in which the records will be sent. You can use\ + \ patterns like '{namespace}' and/or '{stream}' to send the message to\ + \ a specific topic based on these values. Notice that the topic name will\ + \ be transformed to a standard naming convention." + type: "string" + examples: + - "sample.topic" + - "{namespace}.{stream}.sample" + topic_test: + title: "Test topic" + description: "Topic to test if Airbyte can produce messages." + type: "string" + examples: + - "test.topic" + producer_name: + title: "Producer name" + description: "Name for the producer. If not filled, the system will generate\ + \ a globally unique name which can be accessed with." + type: "string" + examples: + - "airbyte-producer" + producer_sync: + title: "Sync producer" + description: "Wait synchronously until the record has been sent to Pulsar." + type: "boolean" + default: false + compression_type: + title: "Compression type" + description: "Compression type for the producer." + type: "string" + default: "NONE" + enum: + - "NONE" + - "LZ4" + - "ZLIB" + - "ZSTD" + - "SNAPPY" + send_timeout_ms: + title: "Message send timeout" + description: "If a message is not acknowledged by a server before the send-timeout\ + \ expires, an error occurs (in ms)." + type: "integer" + default: 30000 + max_pending_messages: + title: "Max pending messages" + description: "The maximum size of a queue holding pending messages." + type: "integer" + default: 1000 + max_pending_messages_across_partitions: + title: "Max pending messages across partitions" + description: "The maximum number of pending messages across partitions." + type: "integer" + default: 50000 + batching_enabled: + title: "Enable batching" + description: "Control whether automatic batching of messages is enabled\ + \ for the producer." + type: "boolean" + default: true + batching_max_messages: + title: "Batching max messages" + description: "Maximum number of messages permitted in a batch." + type: "integer" + default: 1000 + batching_max_publish_delay: + title: "Batching max publish delay" + description: " Time period in milliseconds within which the messages sent\ + \ will be batched." + type: "integer" + default: 1 + block_if_queue_full: + title: "Block if queue is full" + description: "If the send operation should block when the outgoing message\ + \ queue is full." + type: "boolean" + default: false + supportsIncremental: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: + - "append" - dockerImage: "airbyte/destination-redshift:0.3.19" spec: documentationUrl: "https://docs.airbyte.io/integrations/destinations/redshift" diff --git a/airbyte-integrations/builds.md b/airbyte-integrations/builds.md index c9d1010865a9a..31f450fc9517b 100644 --- a/airbyte-integrations/builds.md +++ b/airbyte-integrations/builds.md @@ -109,6 +109,7 @@ | Local JSON | [![destination-local-json](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fdestination-local-json%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/destination-local-json) | | Mongo DB | [![destination-mongodb](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fdestination-mongodb%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/destination-mongodb) | | Postgres | [![destination-postgres](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fdestination-postgres%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/destination-postgres) | +| Pulsar | [![destination-pulsar](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fdestination-pulsar%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/destination-pulsar) | | Redshift | [![destination-redshift](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fdestination-redshift%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/destination-redshift) | | S3 | [![destination-s3](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fdestination-s3%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/destination-s3) | | Snowflake | [![destination-snowflake](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fdestination-snowflake%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/destination-snowflake) | diff --git a/airbyte-integrations/connectors/destination-pulsar/.dockerignore b/airbyte-integrations/connectors/destination-pulsar/.dockerignore new file mode 100644 index 0000000000000..65c7d0ad3e73c --- /dev/null +++ b/airbyte-integrations/connectors/destination-pulsar/.dockerignore @@ -0,0 +1,3 @@ +* +!Dockerfile +!build diff --git a/airbyte-integrations/connectors/destination-pulsar/Dockerfile b/airbyte-integrations/connectors/destination-pulsar/Dockerfile new file mode 100644 index 0000000000000..c5ffa3415f27f --- /dev/null +++ b/airbyte-integrations/connectors/destination-pulsar/Dockerfile @@ -0,0 +1,12 @@ +FROM airbyte/integration-base-java:dev + +WORKDIR /airbyte + +ENV APPLICATION destination-pulsar + +COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar + +RUN tar xf ${APPLICATION}.tar --strip-components=1 + +LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.name=airbyte/destination-pulsar diff --git a/airbyte-integrations/connectors/destination-pulsar/README.md b/airbyte-integrations/connectors/destination-pulsar/README.md new file mode 100644 index 0000000000000..a291e2c6680d5 --- /dev/null +++ b/airbyte-integrations/connectors/destination-pulsar/README.md @@ -0,0 +1,68 @@ +# Destination Pulsar + +This is the repository for the Pulsar destination connector in Java. +For information about how to use this connector within Airbyte, see [the User Documentation](https://docs.airbyte.io/integrations/destinations/pulsar). + +## Local development + +#### Building via Gradle +From the Airbyte repository root, run: +``` +./gradlew :airbyte-integrations:connectors:destination-pulsar:build +``` + +#### Create credentials +**If you are a community contributor**, generate the necessary credentials and place them in `secrets/config.json` conforming to the spec file in `src/main/resources/spec.json`. +Note that the `secrets` directory is git-ignored by default, so there is no danger of accidentally checking in sensitive information. + +**If you are an Airbyte core member**, follow the [instructions](https://docs.airbyte.io/connector-development#using-credentials-in-ci) to set up the credentials. + +### Locally running the connector docker image + +#### Build +Build the connector image via Gradle: +``` +./gradlew :airbyte-integrations:connectors:destination-pulsar:airbyteDocker +``` +When building via Gradle, the docker image name and tag, respectively, are the values of the `io.airbyte.name` and `io.airbyte.version` `LABEL`s in +the Dockerfile. + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/destination-pulsar:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-pulsar:dev check --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-pulsar:dev discover --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/destination-pulsar:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json +``` + +## Testing +We use `JUnit` for Java tests. + +### Unit and Integration Tests +Place unit tests under `src/test/io/airbyte/integrations/destinations/pulsar`. + +#### Acceptance Tests +Airbyte has a standard test suite that all destination connectors must pass. Implement the `TODO`s in +`src/test-integration/java/io/airbyte/integrations/destinations/PulsarDestinationAcceptanceTest.java`. + +### Using gradle to run tests +All commands should be run from airbyte project root. +To run unit tests: +``` +./gradlew :airbyte-integrations:connectors:destination-pulsar:unitTest +``` +To run acceptance and custom integration tests: +``` +./gradlew :airbyte-integrations:connectors:destination-pulsar:integrationTest +``` + +## Dependency Management + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing unit and integration tests. +1. Bump the connector version in `Dockerfile` -- just increment the value of the `LABEL io.airbyte.version` appropriately (we use [SemVer](https://semver.org/)). +1. Create a Pull Request. +1. Pat yourself on the back for being an awesome contributor. +1. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. diff --git a/airbyte-integrations/connectors/destination-pulsar/build.gradle b/airbyte-integrations/connectors/destination-pulsar/build.gradle new file mode 100644 index 0000000000000..835f9dfaaa9e4 --- /dev/null +++ b/airbyte-integrations/connectors/destination-pulsar/build.gradle @@ -0,0 +1,25 @@ +plugins { + id 'application' + id 'airbyte-docker' + id 'airbyte-integration-test-java' +} + +application { + mainClass = 'io.airbyte.integrations.destination.pulsar.PulsarDestination' + applicationDefaultJvmArgs = ['-XX:MaxRAMPercentage=75.0'] +} + +dependencies { + implementation project(':airbyte-config:models') + implementation project(':airbyte-protocol:models') + implementation project(':airbyte-integrations:bases:base-java') + + implementation files(project(':airbyte-integrations:bases:base-java').airbyteDocker.outputs) + + implementation 'org.apache.pulsar:pulsar-client:2.8.1' + + testImplementation "org.testcontainers:pulsar:1.16.2" + + integrationTestJavaImplementation project(':airbyte-integrations:bases:standard-destination-test') + integrationTestJavaImplementation project(':airbyte-integrations:connectors:destination-pulsar') +} diff --git a/airbyte-integrations/connectors/destination-pulsar/src/main/java/io/airbyte/integrations/destination/pulsar/PulsarDestination.java b/airbyte-integrations/connectors/destination-pulsar/src/main/java/io/airbyte/integrations/destination/pulsar/PulsarDestination.java new file mode 100644 index 0000000000000..5b00b99d34f88 --- /dev/null +++ b/airbyte-integrations/connectors/destination-pulsar/src/main/java/io/airbyte/integrations/destination/pulsar/PulsarDestination.java @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.pulsar; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; +import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.BaseConnector; +import io.airbyte.integrations.base.AirbyteMessageConsumer; +import io.airbyte.integrations.base.Destination; +import io.airbyte.integrations.base.IntegrationRunner; +import io.airbyte.integrations.base.JavaBaseConstants; +import io.airbyte.integrations.destination.StandardNameTransformer; +import io.airbyte.protocol.models.AirbyteConnectionStatus; +import io.airbyte.protocol.models.AirbyteConnectionStatus.Status; +import io.airbyte.protocol.models.AirbyteMessage; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import java.util.UUID; +import java.util.function.Consumer; +import org.apache.pulsar.client.api.MessageId; +import org.apache.pulsar.client.api.Producer; +import org.apache.pulsar.client.api.PulsarClient; +import org.apache.pulsar.client.api.Schema; +import org.apache.pulsar.client.api.schema.GenericRecord; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class PulsarDestination extends BaseConnector implements Destination { + + private static final Logger LOGGER = LoggerFactory.getLogger(PulsarDestination.class); + + public static final String COLUMN_NAME_AB_ID = JavaBaseConstants.COLUMN_NAME_AB_ID; + public static final String COLUMN_NAME_EMITTED_AT = JavaBaseConstants.COLUMN_NAME_EMITTED_AT; + public static final String COLUMN_NAME_DATA = JavaBaseConstants.COLUMN_NAME_DATA; + public static final String COLUMN_NAME_STREAM = "_airbyte_stream"; + + private final StandardNameTransformer namingResolver; + + public PulsarDestination() { + this.namingResolver = new StandardNameTransformer(); + } + + @Override + public AirbyteConnectionStatus check(final JsonNode config) { + try { + final PulsarDestinationConfig pulsarConfig = PulsarDestinationConfig.getPulsarDestinationConfig(config); + final String testTopic = pulsarConfig.getTestTopic(); + if (!testTopic.isBlank()) { + final String key = UUID.randomUUID().toString(); + final GenericRecord value = Schema.generic(PulsarDestinationConfig.getSchemaInfo()) + .newRecordBuilder() + .set(PulsarDestination.COLUMN_NAME_AB_ID, key) + .set(PulsarDestination.COLUMN_NAME_STREAM, "test-topic-stream") + .set(PulsarDestination.COLUMN_NAME_EMITTED_AT, System.currentTimeMillis()) + .set(PulsarDestination.COLUMN_NAME_DATA, Jsons.jsonNode(ImmutableMap.of("test-key", "test-value"))) + .build(); + + try (final PulsarClient client = PulsarUtils.buildClient(pulsarConfig.getServiceUrl()); + final Producer producer = PulsarUtils.buildProducer(client, Schema.generic(PulsarDestinationConfig.getSchemaInfo()), + pulsarConfig.getProducerConfig(), pulsarConfig.uriForTopic(testTopic))) { + final MessageId messageId = producer.send(value); + + producer.flush(); + + LOGGER.info("Successfully sent message id '{}' to Pulsar brokers for topic '{}'.", messageId, testTopic); + } + } + return new AirbyteConnectionStatus().withStatus(Status.SUCCEEDED); + } catch (final Exception e) { + LOGGER.error("Exception attempting to connect to the Pulsar brokers: ", e); + return new AirbyteConnectionStatus() + .withStatus(Status.FAILED) + .withMessage("Could not connect to the Pulsar brokers with provided configuration. \n" + e.getMessage()); + } + } + + @Override + public AirbyteMessageConsumer getConsumer(final JsonNode config, + final ConfiguredAirbyteCatalog catalog, + final Consumer outputRecordCollector) { + return new PulsarRecordConsumer(PulsarDestinationConfig.getPulsarDestinationConfig(config), + catalog, + outputRecordCollector, + namingResolver); + } + + public static void main(final String[] args) throws Exception { + final Destination destination = new PulsarDestination(); + LOGGER.info("Starting destination: {}", PulsarDestination.class); + new IntegrationRunner(destination).run(args); + LOGGER.info("Completed destination: {}", PulsarDestination.class); + } + +} diff --git a/airbyte-integrations/connectors/destination-pulsar/src/main/java/io/airbyte/integrations/destination/pulsar/PulsarDestinationConfig.java b/airbyte-integrations/connectors/destination-pulsar/src/main/java/io/airbyte/integrations/destination/pulsar/PulsarDestinationConfig.java new file mode 100644 index 0000000000000..c67056a1f35b7 --- /dev/null +++ b/airbyte-integrations/connectors/destination-pulsar/src/main/java/io/airbyte/integrations/destination/pulsar/PulsarDestinationConfig.java @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.pulsar; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; +import java.util.Map; +import org.apache.pulsar.client.api.CompressionType; +import org.apache.pulsar.client.api.schema.RecordSchemaBuilder; +import org.apache.pulsar.client.api.schema.SchemaBuilder; +import org.apache.pulsar.common.schema.SchemaInfo; +import org.apache.pulsar.common.schema.SchemaType; + +public class PulsarDestinationConfig { + + private final String serviceUrl; + private final String topicPattern; + private final String topicPrefix; + private final String testTopic; + private final Map producerConfig; + private final boolean sync; + + private PulsarDestinationConfig(final JsonNode config) { + this.serviceUrl = buildServiceUrl(config); + this.topicPattern = buildTopicPattern(config); + this.topicPrefix = buildTopicPrefix(config); + this.testTopic = buildTestTopic(config); + this.producerConfig = buildProducerConfig(config); + this.sync = isSyncProducer(config); + } + + public static PulsarDestinationConfig getPulsarDestinationConfig(final JsonNode config) { + return new PulsarDestinationConfig(config); + } + + public Map getProducerConfig() { + return producerConfig; + } + + public String getServiceUrl() { + return serviceUrl; + } + + public static SchemaInfo getSchemaInfo() { + RecordSchemaBuilder recordSchemaBuilder = SchemaBuilder.record("airbyte"); + recordSchemaBuilder.field(PulsarDestination.COLUMN_NAME_AB_ID).type(SchemaType.STRING).required(); + recordSchemaBuilder.field(PulsarDestination.COLUMN_NAME_STREAM).type(SchemaType.STRING).required(); + recordSchemaBuilder.field(PulsarDestination.COLUMN_NAME_EMITTED_AT).type(SchemaType.TIMESTAMP).required(); + recordSchemaBuilder.field(PulsarDestination.COLUMN_NAME_DATA).type(SchemaType.BYTES).required(); + + return recordSchemaBuilder.build(SchemaType.JSON); + } + + public String uriForTopic(final String topic) { + return topicPrefix + topic; + } + + public String getTestTopic() { + return testTopic; + } + + public String getTopicPattern() { + return topicPattern; + } + + public boolean isSync() { + return sync; + } + + private String buildServiceUrl(final JsonNode config) { + return String.format("pulsar%s://%s", + config.get("use_tls").asBoolean() ? "+ssl" : "", + config.get("brokers").asText()); + } + + private String buildTestTopic(final JsonNode config) { + return config.has("test_topic") ? config.get("test_topic").asText() : ""; + } + + private String buildTopicPattern(final JsonNode config) { + return config.get("topic_pattern").asText(); + } + + private String buildTopicPrefix(final JsonNode config) { + return String.format("%s://%s/%s/", + config.get("topic_type").asText(), + config.get("topic_tenant").asText(), + config.get("topic_namespace").asText()); + } + + private Map buildProducerConfig(final JsonNode config) { + final ImmutableMap.Builder conf = ImmutableMap.builder(); + if (config.has("producer_name")) { + conf.put("producerName", config.get("producer_name").asText()); + } + conf.put("compressionType", CompressionType.valueOf(config.get("compression_type").asText())); + conf.put("sendTimeoutMs", config.get("send_timeout_ms").asInt()); + conf.put("maxPendingMessages", config.get("max_pending_messages").asInt()); + conf.put("maxPendingMessagesAcrossPartitions", config.get("max_pending_messages_across_partitions").asInt()); + conf.put("batchingEnabled", config.get("batching_enabled").asBoolean()); + conf.put("batchingMaxMessages", config.get("batching_max_messages").asInt()); + conf.put("batchingMaxPublishDelayMicros", config.get("batching_max_publish_delay").asInt() * 1000); + conf.put("blockIfQueueFull", config.get("block_if_queue_full").asBoolean()); + + return conf.build(); + } + + private boolean isSyncProducer(final JsonNode config) { + return config.has("producer_sync") && config.get("producer_sync").asBoolean(); + } + +} diff --git a/airbyte-integrations/connectors/destination-pulsar/src/main/java/io/airbyte/integrations/destination/pulsar/PulsarRecordConsumer.java b/airbyte-integrations/connectors/destination-pulsar/src/main/java/io/airbyte/integrations/destination/pulsar/PulsarRecordConsumer.java new file mode 100644 index 0000000000000..c22ac5c056c5b --- /dev/null +++ b/airbyte-integrations/connectors/destination-pulsar/src/main/java/io/airbyte/integrations/destination/pulsar/PulsarRecordConsumer.java @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.pulsar; + +import io.airbyte.commons.lang.Exceptions; +import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; +import io.airbyte.integrations.base.FailureTrackingAirbyteMessageConsumer; +import io.airbyte.integrations.destination.NamingConventionTransformer; +import io.airbyte.protocol.models.AirbyteMessage; +import io.airbyte.protocol.models.AirbyteRecordMessage; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; +import java.util.UUID; +import java.util.function.Consumer; +import java.util.function.Function; +import java.util.stream.Collectors; +import org.apache.pulsar.client.api.Producer; +import org.apache.pulsar.client.api.PulsarClient; +import org.apache.pulsar.client.api.PulsarClientException; +import org.apache.pulsar.client.api.Schema; +import org.apache.pulsar.client.api.schema.GenericRecord; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class PulsarRecordConsumer extends FailureTrackingAirbyteMessageConsumer { + + private static final Logger LOGGER = LoggerFactory.getLogger(PulsarRecordConsumer.class); + + private final PulsarDestinationConfig config; + private final Map> producerMap; + private final ConfiguredAirbyteCatalog catalog; + private final Consumer outputRecordCollector; + private final NamingConventionTransformer nameTransformer; + private final PulsarClient client; + + private AirbyteMessage lastStateMessage = null; + + public PulsarRecordConsumer(final PulsarDestinationConfig pulsarDestinationConfig, + final ConfiguredAirbyteCatalog catalog, + final Consumer outputRecordCollector, + final NamingConventionTransformer nameTransformer) { + this.config = pulsarDestinationConfig; + this.producerMap = new HashMap<>(); + this.catalog = catalog; + this.outputRecordCollector = outputRecordCollector; + this.nameTransformer = nameTransformer; + this.client = PulsarUtils.buildClient(this.config.getServiceUrl()); + } + + @Override + protected void startTracked() { + producerMap.putAll(buildProducerMap()); + } + + @Override + protected void acceptTracked(final AirbyteMessage airbyteMessage) { + if (airbyteMessage.getType() == AirbyteMessage.Type.STATE) { + lastStateMessage = airbyteMessage; + } else if (airbyteMessage.getType() == AirbyteMessage.Type.RECORD) { + final AirbyteRecordMessage recordMessage = airbyteMessage.getRecord(); + final Producer producer = producerMap.get(AirbyteStreamNameNamespacePair.fromRecordMessage(recordMessage)); + final String key = UUID.randomUUID().toString(); + final GenericRecord value = Schema.generic(PulsarDestinationConfig.getSchemaInfo()) + .newRecordBuilder() + .set(PulsarDestination.COLUMN_NAME_AB_ID, key) + .set(PulsarDestination.COLUMN_NAME_STREAM, recordMessage.getStream()) + .set(PulsarDestination.COLUMN_NAME_EMITTED_AT, recordMessage.getEmittedAt()) + .set(PulsarDestination.COLUMN_NAME_DATA, recordMessage.getData().toString().getBytes()) + .build(); + + sendRecord(producer, value); + } else { + LOGGER.warn("Unexpected message: " + airbyteMessage.getType()); + } + } + + Map> buildProducerMap() { + return catalog.getStreams().stream() + .map(stream -> AirbyteStreamNameNamespacePair.fromAirbyteSteam(stream.getStream())) + .collect(Collectors.toMap(Function.identity(), pair -> { + String topic = nameTransformer.getIdentifier(config.getTopicPattern() + .replaceAll("\\{namespace}", Optional.ofNullable(pair.getNamespace()).orElse("")) + .replaceAll("\\{stream}", Optional.ofNullable(pair.getName()).orElse(""))); + return PulsarUtils.buildProducer(client, Schema.generic(PulsarDestinationConfig.getSchemaInfo()), config.getProducerConfig(), + config.uriForTopic(topic)); + }, (existing, newValue) -> existing)); + } + + private void sendRecord(final Producer producer, final GenericRecord record) { + producer.sendAsync(record); + if (config.isSync()) { + try { + producer.flush(); + } catch (PulsarClientException e) { + LOGGER.error("Error sending message to topic.", e); + throw new RuntimeException("Cannot send message to Pulsar. Error: " + e.getMessage(), e); + } + if (lastStateMessage != null) { + outputRecordCollector.accept(lastStateMessage); + } + } + } + + @Override + protected void close(final boolean hasFailed) { + producerMap.values().forEach(producer -> { + Exceptions.swallow(producer::flush); + Exceptions.swallow(producer::close); + }); + Exceptions.swallow(client::close); + + if (lastStateMessage != null) { + outputRecordCollector.accept(lastStateMessage); + } + } + +} diff --git a/airbyte-integrations/connectors/destination-pulsar/src/main/java/io/airbyte/integrations/destination/pulsar/PulsarUtils.java b/airbyte-integrations/connectors/destination-pulsar/src/main/java/io/airbyte/integrations/destination/pulsar/PulsarUtils.java new file mode 100644 index 0000000000000..fed932ee41818 --- /dev/null +++ b/airbyte-integrations/connectors/destination-pulsar/src/main/java/io/airbyte/integrations/destination/pulsar/PulsarUtils.java @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.pulsar; + +import java.util.Map; +import org.apache.pulsar.client.api.Producer; +import org.apache.pulsar.client.api.PulsarClient; +import org.apache.pulsar.client.api.PulsarClientException; +import org.apache.pulsar.client.api.Schema; +import org.apache.pulsar.client.api.schema.GenericRecord; + +class PulsarUtils { + + static PulsarClient buildClient(final String serviceUrl) { + try { + return PulsarClient.builder() + .serviceUrl(serviceUrl) + .build(); + } catch (PulsarClientException e) { + throw new RuntimeException("Error creating the Pulsar client", e); + } + } + + static Producer buildProducer(final PulsarClient client, + final Schema schema, + final Map config, + final String topic) { + try { + return client.newProducer(schema) + .loadConf(config) + .topic(topic) + .create(); + } catch (PulsarClientException e) { + throw new RuntimeException("Error creating the Pulsar producer", e); + } + } + +} diff --git a/airbyte-integrations/connectors/destination-pulsar/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-pulsar/src/main/resources/spec.json new file mode 100644 index 0000000000000..7dc40a064f490 --- /dev/null +++ b/airbyte-integrations/connectors/destination-pulsar/src/main/resources/spec.json @@ -0,0 +1,137 @@ +{ + "documentationUrl": "https://docs.airbyte.io/integrations/destinations/pulsar", + "supportsIncremental": true, + "supportsNormalization": false, + "supportsDBT": false, + "supported_destination_sync_modes": ["append"], + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Pulsar Destination Spec", + "type": "object", + "required": [ + "brokers", + "use_tls", + "topic_type", + "topic_tenant", + "topic_namespace", + "topic_pattern", + "compression_type", + "send_timeout_ms", + "max_pending_messages", + "max_pending_messages_across_partitions", + "batching_enabled", + "batching_max_messages", + "batching_max_publish_delay", + "block_if_queue_full" + ], + "additionalProperties": true, + "properties": { + "brokers": { + "title": "Pulsar brokers", + "description": "A list of host/port pairs to use for establishing the initial connection to the Pulsar cluster.", + "type": "string", + "examples": ["broker1:6650,broker2:6650"] + }, + "use_tls": { + "title": "Use TLS", + "description": "Whether to use TLS encryption on the connection.", + "type": "boolean", + "default": false + }, + "topic_type": { + "title": "Topic type", + "description": "It identifies type of topic. Pulsar supports two kind of topics: persistent and non-persistent. In persistent topic, all messages are durably persisted on disk (that means on multiple disks unless the broker is standalone), whereas non-persistent topic does not persist message into storage disk.", + "type": "string", + "default": "persistent", + "enum": ["persistent", "non-persistent"] + }, + "topic_tenant": { + "title": "Topic tenant", + "description": "The topic tenant within the instance. Tenants are essential to multi-tenancy in Pulsar, and spread across clusters.", + "type": "string", + "default": "public", + "examples": ["public"] + }, + "topic_namespace": { + "title": "Topic namespace", + "description": "The administrative unit of the topic, which acts as a grouping mechanism for related topics. Most topic configuration is performed at the namespace level. Each tenant has one or multiple namespaces.", + "type": "string", + "default": "default", + "examples": ["default"] + }, + "topic_pattern": { + "title": "Topic pattern", + "description": "Topic pattern in which the records will be sent. You can use patterns like '{namespace}' and/or '{stream}' to send the message to a specific topic based on these values. Notice that the topic name will be transformed to a standard naming convention.", + "type": "string", + "examples": ["sample.topic", "{namespace}.{stream}.sample"] + }, + "topic_test": { + "title": "Test topic", + "description": "Topic to test if Airbyte can produce messages.", + "type": "string", + "examples": ["test.topic"] + }, + "producer_name": { + "title": "Producer name", + "description": "Name for the producer. If not filled, the system will generate a globally unique name which can be accessed with.", + "type": "string", + "examples": ["airbyte-producer"] + }, + "producer_sync": { + "title": "Sync producer", + "description": "Wait synchronously until the record has been sent to Pulsar.", + "type": "boolean", + "default": false + }, + "compression_type": { + "title": "Compression type", + "description": "Compression type for the producer.", + "type": "string", + "default": "NONE", + "enum": ["NONE", "LZ4", "ZLIB", "ZSTD", "SNAPPY"] + }, + "send_timeout_ms": { + "title": "Message send timeout", + "description": "If a message is not acknowledged by a server before the send-timeout expires, an error occurs (in ms).", + "type": "integer", + "default": 30000 + }, + "max_pending_messages": { + "title": "Max pending messages", + "description": "The maximum size of a queue holding pending messages.", + "type": "integer", + "default": 1000 + }, + "max_pending_messages_across_partitions": { + "title": "Max pending messages across partitions", + "description": "The maximum number of pending messages across partitions.", + "type": "integer", + "default": 50000 + }, + "batching_enabled": { + "title": "Enable batching", + "description": "Control whether automatic batching of messages is enabled for the producer.", + "type": "boolean", + "default": true + }, + "batching_max_messages": { + "title": "Batching max messages", + "description": "Maximum number of messages permitted in a batch.", + "type": "integer", + "default": 1000 + }, + "batching_max_publish_delay": { + "title": "Batching max publish delay", + "description": " Time period in milliseconds within which the messages sent will be batched.", + "type": "integer", + "default": 1 + }, + "block_if_queue_full": { + "title": "Block if queue is full", + "description": "If the send operation should block when the outgoing message queue is full.", + "type": "boolean", + "default": false + } + } + } +} diff --git a/airbyte-integrations/connectors/destination-pulsar/src/test-integration/java/io/airbyte/integrations/destination/pulsar/PulsarDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-pulsar/src/test-integration/java/io/airbyte/integrations/destination/pulsar/PulsarDestinationAcceptanceTest.java new file mode 100644 index 0000000000000..26dae59de485c --- /dev/null +++ b/airbyte-integrations/connectors/destination-pulsar/src/test-integration/java/io/airbyte/integrations/destination/pulsar/PulsarDestinationAcceptanceTest.java @@ -0,0 +1,170 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.pulsar; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.ObjectReader; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Streams; +import com.google.common.net.InetAddresses; +import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.lang.Exceptions; +import io.airbyte.integrations.destination.NamingConventionTransformer; +import io.airbyte.integrations.destination.StandardNameTransformer; +import io.airbyte.integrations.standardtest.destination.DestinationAcceptanceTest; +import java.io.IOException; +import java.net.InetAddress; +import java.net.NetworkInterface; +import java.net.SocketException; +import java.net.UnknownHostException; +import java.util.ArrayList; +import java.util.Base64; +import java.util.Collections; +import java.util.List; +import java.util.UUID; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.apache.pulsar.client.api.Consumer; +import org.apache.pulsar.client.api.Message; +import org.apache.pulsar.client.api.PulsarClient; +import org.apache.pulsar.client.api.Schema; +import org.apache.pulsar.client.api.SubscriptionInitialPosition; +import org.apache.pulsar.client.api.SubscriptionType; +import org.apache.pulsar.client.api.schema.GenericRecord; +import org.testcontainers.containers.PulsarContainer; +import org.testcontainers.utility.DockerImageName; + +public class PulsarDestinationAcceptanceTest extends DestinationAcceptanceTest { + + private static final String TOPIC_NAME = "test.topic"; + private static final ObjectReader READER = new ObjectMapper().reader(); + + private static PulsarContainer PULSAR; + + private final NamingConventionTransformer namingResolver = new StandardNameTransformer(); + + @Override + protected String getImageName() { + return "airbyte/destination-pulsar:dev"; + } + + @Override + protected JsonNode getConfig() throws UnknownHostException { + String brokers = Stream.concat(getIpAddresses().stream(), Stream.of("localhost")) + .map(ip -> ip + ":" + PULSAR.getMappedPort(PulsarContainer.BROKER_PORT)) + .collect(Collectors.joining(",")); + return Jsons.jsonNode(ImmutableMap.builder() + .put("brokers", brokers) + .put("use_tls", false) + .put("topic_type", "persistent") + .put("topic_tenant", "public") + .put("topic_namespace", "default") + .put("topic_pattern", "{namespace}.{stream}." + TOPIC_NAME) + .put("producer_name", "test-producer-" + UUID.randomUUID()) + .put("producer_sync", true) + .put("compression_type", "NONE") + .put("send_timeout_ms", 30000) + .put("max_pending_messages", 1000) + .put("max_pending_messages_across_partitions", 50000) + .put("batching_enabled", false) + .put("batching_max_messages", 1000) + .put("batching_max_publish_delay", 1) + .put("block_if_queue_full", true) + .build()); + } + + @Override + protected JsonNode getFailCheckConfig() { + return Jsons.jsonNode(ImmutableMap.builder() + .put("brokers", PULSAR.getHost() + ":" + PULSAR.getMappedPort(PulsarContainer.BROKER_PORT)) + .put("use_tls", false) + .put("topic_pattern", "{namespace}.{stream}." + TOPIC_NAME) + .put("producer_sync", true) + .put("producer_name", "test-producer") + .put("compression_type", "NONE") + .put("send_timeout_ms", 30000) + .put("max_pending_messages", 1000) + .put("max_pending_messages_across_partitions", 50000) + .put("block_if_queue_full", true) + .build()); + } + + @Override + protected boolean implementsNamespaces() { + return true; + } + + @Override + protected String getDefaultSchema(final JsonNode config) { + return ""; + } + + @Override + protected List retrieveNormalizedRecords(final TestDestinationEnv testEnv, final String streamName, final String namespace) + throws IOException { + return retrieveRecords(testEnv, streamName, namespace, null); + } + + @Override + protected List retrieveRecords(final TestDestinationEnv testEnv, + final String streamName, + final String namespace, + final JsonNode streamSchema) + throws IOException { + final PulsarClient client = PulsarClient.builder() + .serviceUrl(PULSAR.getPulsarBrokerUrl()) + .build(); + final String topic = namingResolver.getIdentifier(namespace + "." + streamName + "." + TOPIC_NAME); + final Consumer consumer = client.newConsumer(Schema.AUTO_CONSUME()) + .topic(topic) + .subscriptionName("test-subscription-" + UUID.randomUUID()) + .enableRetry(true) + .subscriptionType(SubscriptionType.Exclusive) + .subscriptionInitialPosition(SubscriptionInitialPosition.Earliest) + .subscribe(); + + final List records = new ArrayList<>(); + while (!consumer.hasReachedEndOfTopic()) { + Message message = consumer.receive(5, TimeUnit.SECONDS); + if (message == null) { + break; + } + records.add(READER.readTree(Base64.getDecoder().decode(message.getValue().getField(PulsarDestination.COLUMN_NAME_DATA).toString()))); + Exceptions.swallow(() -> consumer.acknowledge(message)); + } + consumer.unsubscribe(); + consumer.close(); + client.close(); + + return records; + } + + @SuppressWarnings("UnstableApiUsage") + private List getIpAddresses() throws UnknownHostException { + try { + return Streams.stream(NetworkInterface.getNetworkInterfaces().asIterator()) + .flatMap(ni -> Streams.stream(ni.getInetAddresses().asIterator())) + .map(InetAddress::getHostAddress) + .filter(InetAddresses::isUriInetAddress) + .collect(Collectors.toList()); + } catch (SocketException e) { + return Collections.singletonList(InetAddress.getLocalHost().getHostAddress()); + } + } + + @Override + protected void setup(final TestDestinationEnv testEnv) { + PULSAR = new PulsarContainer(DockerImageName.parse("apachepulsar/pulsar:2.8.1")); + PULSAR.start(); + } + + @Override + protected void tearDown(final TestDestinationEnv testEnv) { + PULSAR.close(); + } + +} diff --git a/airbyte-integrations/connectors/destination-pulsar/src/test/java/io/airbyte/integrations/destination/pulsar/PulsarRecordConsumerTest.java b/airbyte-integrations/connectors/destination-pulsar/src/test/java/io/airbyte/integrations/destination/pulsar/PulsarRecordConsumerTest.java new file mode 100644 index 0000000000000..f61c8c4d05dc7 --- /dev/null +++ b/airbyte-integrations/connectors/destination-pulsar/src/test/java/io/airbyte/integrations/destination/pulsar/PulsarRecordConsumerTest.java @@ -0,0 +1,225 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.pulsar; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.mockito.Mockito.mock; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Sets; +import com.google.common.collect.Streams; +import com.google.common.net.InetAddresses; +import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; +import io.airbyte.integrations.destination.StandardNameTransformer; +import io.airbyte.protocol.models.AirbyteMessage; +import io.airbyte.protocol.models.AirbyteRecordMessage; +import io.airbyte.protocol.models.AirbyteStateMessage; +import io.airbyte.protocol.models.AirbyteStream; +import io.airbyte.protocol.models.CatalogHelpers; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.ConfiguredAirbyteStream; +import io.airbyte.protocol.models.Field; +import io.airbyte.protocol.models.JsonSchemaPrimitive; +import java.net.InetAddress; +import java.net.NetworkInterface; +import java.net.SocketException; +import java.net.UnknownHostException; +import java.time.Instant; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.function.Consumer; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import java.util.stream.Stream; +import org.apache.pulsar.client.api.Producer; +import org.apache.pulsar.client.api.schema.GenericRecord; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtensionContext; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.ArgumentsProvider; +import org.junit.jupiter.params.provider.ArgumentsSource; +import org.testcontainers.containers.PulsarContainer; +import org.testcontainers.utility.DockerImageName; + +@DisplayName("PulsarRecordConsumer") +public class PulsarRecordConsumerTest { + + private static final StandardNameTransformer NAMING_RESOLVER = new StandardNameTransformer(); + + private static PulsarContainer PULSAR; + + @ParameterizedTest + @ArgumentsSource(TopicMapArgumentsProvider.class) + @SuppressWarnings("unchecked") + public void testBuildProducerMap(final ConfiguredAirbyteCatalog catalog, + final String streamName, + final String namespace, + final String topicPattern, + final String expectedTopic) + throws UnknownHostException { + String brokers = Stream.concat(getIpAddresses().stream(), Stream.of("localhost")) + .map(ip -> ip + ":" + PULSAR.getMappedPort(PulsarContainer.BROKER_PORT)) + .collect(Collectors.joining(",")); + final PulsarDestinationConfig config = PulsarDestinationConfig + .getPulsarDestinationConfig(getConfig(brokers, topicPattern)); + + final PulsarRecordConsumer recordConsumer = new PulsarRecordConsumer(config, catalog, mock(Consumer.class), NAMING_RESOLVER); + final Map> producerMap = recordConsumer.buildProducerMap(); + assertEquals(Sets.newHashSet(catalog.getStreams()).size(), producerMap.size()); + + final AirbyteStreamNameNamespacePair streamNameNamespacePair = new AirbyteStreamNameNamespacePair(streamName, namespace); + assertEquals(expectedTopic, producerMap.get(streamNameNamespacePair).getTopic()); + } + + @Test + @SuppressWarnings("unchecked") + void testCannotConnectToBrokers() throws Exception { + final PulsarDestinationConfig config = PulsarDestinationConfig + .getPulsarDestinationConfig(getConfig(PULSAR.getHost() + ":" + (PULSAR.getMappedPort(PulsarContainer.BROKER_PORT) + 10), "test-topic")); + + final String streamName = "test-stream"; + final String namespace = "test-schema"; + final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog().withStreams(List.of( + CatalogHelpers.createConfiguredAirbyteStream( + streamName, + namespace, + Field.of("id", JsonSchemaPrimitive.NUMBER), + Field.of("name", JsonSchemaPrimitive.STRING)))); + final PulsarRecordConsumer consumer = new PulsarRecordConsumer(config, catalog, mock(Consumer.class), NAMING_RESOLVER); + final List expectedRecords = getNRecords(10, streamName, namespace); + + assertThrows(RuntimeException.class, consumer::start); + + expectedRecords.forEach(m -> assertThrows(RuntimeException.class, () -> consumer.accept(m))); + + consumer.accept(new AirbyteMessage() + .withType(AirbyteMessage.Type.STATE) + .withState(new AirbyteStateMessage().withData(Jsons.jsonNode(ImmutableMap.of(namespace + "." + streamName, 0))))); + consumer.close(); + } + + private JsonNode getConfig(final String brokers, final String topic) { + return Jsons.jsonNode(ImmutableMap.builder() + .put("brokers", brokers) + .put("use_tls", false) + .put("topic_type", "non-persistent") + .put("topic_tenant", "public") + .put("topic_namespace", "default") + .put("topic_pattern", topic) + .put("producer_sync", true) + .put("compression_type", "NONE") + .put("send_timeout_ms", 30000) + .put("max_pending_messages", 1000) + .put("max_pending_messages_across_partitions", 50000) + .put("batching_enabled", true) + .put("batching_max_messages", 1000) + .put("batching_max_publish_delay", 1) + .put("block_if_queue_full", true) + .build()); + } + + private List getNRecords(final int n, final String streamName, final String namespace) { + return IntStream.range(0, n) + .boxed() + .map(i -> new AirbyteMessage() + .withType(AirbyteMessage.Type.RECORD) + .withRecord(new AirbyteRecordMessage() + .withStream(streamName) + .withNamespace(namespace) + .withEmittedAt(Instant.now().toEpochMilli()) + .withData(Jsons.jsonNode(ImmutableMap.of("id", i, "name", "human " + i))))) + .collect(Collectors.toList()); + + } + + @SuppressWarnings("UnstableApiUsage") + private List getIpAddresses() throws UnknownHostException { + try { + return Streams.stream(NetworkInterface.getNetworkInterfaces().asIterator()) + .flatMap(ni -> Streams.stream(ni.getInetAddresses().asIterator())) + .map(InetAddress::getHostAddress) + .filter(InetAddresses::isUriInetAddress) + .collect(Collectors.toList()); + } catch (SocketException e) { + return Collections.singletonList(InetAddress.getLocalHost().getHostAddress()); + } + } + + public static class TopicMapArgumentsProvider implements ArgumentsProvider { + + private static final String TOPIC_NAME = "test.topic"; + private static final String SCHEMA_NAME1 = "public"; + private static final String STREAM_NAME1 = "id_and_name"; + private static final String SCHEMA_NAME2 = SCHEMA_NAME1 + 2; + private static final String STREAM_NAME2 = STREAM_NAME1 + 2; + + private final ConfiguredAirbyteStream stream1 = CatalogHelpers.createConfiguredAirbyteStream( + SCHEMA_NAME1, + STREAM_NAME1, + Field.of("id", JsonSchemaPrimitive.NUMBER), + Field.of("name", JsonSchemaPrimitive.STRING)); + private final ConfiguredAirbyteStream stream2 = CatalogHelpers.createConfiguredAirbyteStream( + SCHEMA_NAME2, + STREAM_NAME2, + Field.of("id", JsonSchemaPrimitive.NUMBER), + Field.of("name", JsonSchemaPrimitive.STRING)); + + @Override + public Stream provideArguments(final ExtensionContext context) { + final String prefix = "non-persistent://public/default/"; + + final List catalogs = new ArrayList<>(); + catalogs.add(new ConfiguredAirbyteCatalog().withStreams(List.of(stream1))); + catalogs.add(new ConfiguredAirbyteCatalog().withStreams(List.of(stream1, stream1))); + catalogs.add(new ConfiguredAirbyteCatalog().withStreams(List.of(stream1, stream2))); + + return catalogs.stream() + .flatMap(catalog -> catalog.getStreams().stream() + .map(stream -> buildArgs(catalog, stream.getStream(), prefix)) + .flatMap(Collection::stream)); + } + + private List buildArgs(final ConfiguredAirbyteCatalog catalog, final AirbyteStream stream, final String prefix) { + final String transformedTopic = NAMING_RESOLVER.getIdentifier(TOPIC_NAME); + final String transformedName = NAMING_RESOLVER.getIdentifier(stream.getName()); + final String transformedNamespace = NAMING_RESOLVER.getIdentifier(stream.getNamespace()); + + return ImmutableList.of( + Arguments.of(catalog, stream.getName(), stream.getNamespace(), TOPIC_NAME, prefix + "test_topic"), + Arguments.of(catalog, stream.getName(), stream.getNamespace(), "test-topic", prefix + "test_topic"), + Arguments.of(catalog, stream.getName(), stream.getNamespace(), "{namespace}", prefix + transformedNamespace), + Arguments.of(catalog, stream.getName(), stream.getNamespace(), "{stream}", prefix + transformedName), + Arguments.of(catalog, stream.getName(), stream.getNamespace(), "{namespace}.{stream}." + TOPIC_NAME, + prefix + transformedNamespace + "_" + transformedName + "_" + transformedTopic), + Arguments.of(catalog, stream.getName(), stream.getNamespace(), "{namespace}-{stream}-" + TOPIC_NAME, + prefix + transformedNamespace + "_" + transformedName + "_" + transformedTopic), + Arguments.of(catalog, stream.getName(), stream.getNamespace(), "topic with spaces", prefix + "topic_with_spaces")); + } + + } + + @BeforeEach + void setup() { + PULSAR = new PulsarContainer(DockerImageName.parse("apachepulsar/pulsar:2.8.1")); + PULSAR.start(); + } + + @AfterEach + void tearDown() { + PULSAR.close(); + } + +} diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index f8737de227ff3..9ad3de71305a3 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -159,6 +159,7 @@ * [MySQL](integrations/destinations/mysql.md) * [Oracle DB](integrations/destinations/oracle.md) * [Postgres](integrations/destinations/postgres.md) + * [Pulsar](integrations/destinations/pulsar.md) * [Redshift](integrations/destinations/redshift.md) * [S3](integrations/destinations/s3.md) * [Snowflake](integrations/destinations/snowflake.md) diff --git a/docs/integrations/README.md b/docs/integrations/README.md index e474a0b0d6299..51a7d00939997 100644 --- a/docs/integrations/README.md +++ b/docs/integrations/README.md @@ -143,6 +143,7 @@ Airbyte uses a grading system for connectors to help users understand what to ex | [MySQL](destinations/mysql.md) | Beta | | [Oracle](destinations/oracle.md) | Alpha | | [Postgres](destinations/postgres.md) | Certified | +| [Pulsar](destinations/pulsar.md) | Alpha | | [Redshift](destinations/redshift.md) | Certified | | [S3](destinations/s3.md) | Certified | | [SQL Server \(MSSQL\)](destinations/mssql.md) | Alpha | diff --git a/docs/integrations/destinations/pulsar.md b/docs/integrations/destinations/pulsar.md new file mode 100644 index 0000000000000..c6279745c7ecd --- /dev/null +++ b/docs/integrations/destinations/pulsar.md @@ -0,0 +1,87 @@ +# Pulsar + +## Overview + +The Airbyte Pulsar destination allows you to sync data to Pulsar. Each stream is written to the corresponding Pulsar topic. + +### Sync overview + +#### Output schema + +Each stream will be output into a Pulsar topic. + +Currently, this connector only writes data with JSON format. More formats \(e.g. Apache Avro\) will be supported in the future. + +Each record will contain in its key the uuid assigned by Airbyte, and in the value these 3 fields: + +* `_airbyte_ab_id`: a uuid assigned by Airbyte to each event that is processed. +* `_airbyte_emitted_at`: a timestamp representing when the event was pulled from the data source. +* `_airbyte_data`: a json blob representing with the event data encoded in base64 . +* `_airbyte_stream`: the name of each record's stream. + +#### Features + +| Feature | Supported?\(Yes/No\) | Notes | +| :--- | :--- | :--- | +| Full Refresh Sync | No | | +| Incremental - Append Sync | Yes | | +| Incremental - Deduped History | No | As this connector does not support dbt, we don't support this sync mode on this destination. | +| Namespaces | Yes | | + +## Getting started + +### Requirements + +To use the Pulsar destination, you'll need: + +* A Pulsar cluster 2.8 or above. + +### Setup guide + +#### Network Access + +Make sure your Pulsar brokers can be accessed by Airbyte. + +#### **Permissions** + +Airbyte should be allowed to write messages into topics, and these topics should be created before writing into Pulsar or, at least, enable the configuration in the brokers `allowAutoTopicCreation` \(which is not recommended for production environments\). + +Note that if you choose to use dynamic topic names, you will probably need to enable `allowAutoTopicCreation` to avoid your connection failing if there was an update to the source connector's schema. Otherwise a hardcoded topic name may be best. + +Also, notice that the messages will be sent to topics based on the configured Pulsar `topic_tenant` and `topic_namespace` configs with their `topic_type`. + +#### Target topics + +You can determine the topics to which messages are written via the `topic_pattern` configuration parameter in its corresponding Pulsar `topic_tenant`-`topic_namespace`. Messages can be written to either a hardcoded, pre-defined topic, or dynamically written to different topics based on the [namespace](https://docs.airbyte.io/understanding-airbyte/namespaces) or stream they came from. + +To write all messages to a single hardcoded topic, enter its name in the `topic_pattern` field e.g: setting `topic_pattern` to `my-topic-name` will write all messages from all streams and namespaces to that topic. + +To define the output topics dynamically, you can leverage the `{namespace}` and `{stream}` pattern variables, which cause messages to be written to different topics based on the values present when producing the records. For example, setting the `topic_pattern` parameter to `airbyte_syncs/{namespace}/{stream}` means that messages from namespace `n1` and stream `s1` will get written to the topic `airbyte_syncs/n1/s1`, and messages from `s2` to `airbyte_syncs/n1/s2` etc. + +If you define output topic dynamically, you might want to enable `allowAutoTopicCreation` to avoid your connection failing if there was an update to the source connector's schema. Otherwise, you'll need to manually create topics in Pulsar as they are added/updated in the source, which is the recommended option for production environments. + +**NOTICE**: a naming convention transformation will be applied to the target topic name using the `StandardNameTransformer` so that some special characters will be replaced. + +### Setup the Pulsar destination in Airbyte + +You should now have all the requirements needed to configure Pulsar as a destination in the UI. You can configure the following parameters on the Pulsar destination \(though many of these are optional or have default values\): + +* **Pulsar brokers** +* **Use TLS** +* **Topic type** +* **Topic tenant** +* **Topic namespace** +* **Topic pattern** +* **Test topic** +* **Producer name** +* **Sync producer** +* **Compression type** +* **Message send timeout** +* **Max pending messages** +* **Max pending messages across partitions** +* **Enable batching** +* **Batching max messages** +* **Batching max publish delay** +* **Block if queue is full** + +More info about this can be found in the [Pulsar producer configs documentation site](https://pulsar.apache.org/docs/en/client-libraries-java/#producer). From 45f6559c79b5b77ecaca3282889260ce43613787 Mon Sep 17 00:00:00 2001 From: Harsha Teja Kanna Date: Fri, 5 Nov 2021 18:14:36 -0500 Subject: [PATCH 65/83] Fixed Mac M1 build (#7687) --- airbyte-config/init/Dockerfile | 3 ++- airbyte-workers/Dockerfile | 4 ++-- build.gradle | 4 ++++ docs/contributing-to-airbyte/developing-locally.md | 2 ++ 4 files changed, 10 insertions(+), 3 deletions(-) diff --git a/airbyte-config/init/Dockerfile b/airbyte-config/init/Dockerfile index 3144a772ef4ef..c62cb080d822f 100644 --- a/airbyte-config/init/Dockerfile +++ b/airbyte-config/init/Dockerfile @@ -1,4 +1,5 @@ -FROM alpine:3.4 AS seed +ARG ALPINE_IMAGE=alpine:3.4 +FROM ${ALPINE_IMAGE} AS seed WORKDIR /app diff --git a/airbyte-workers/Dockerfile b/airbyte-workers/Dockerfile index 92301b8816437..3e06d45c6bd92 100644 --- a/airbyte-workers/Dockerfile +++ b/airbyte-workers/Dockerfile @@ -1,7 +1,7 @@ ARG JDK_VERSION=14.0.2 FROM openjdk:${JDK_VERSION}-slim AS worker -ARG ARCH=amd64 +ARG DOCKER_BUILD_ARCH=amd64 # Install Docker to launch worker images. Eventually should be replaced with Docker-java. # See https://gitter.im/docker-java/docker-java?at=5f3eb87ba8c1780176603f4e for more information on why we are not currently using Docker-java @@ -13,7 +13,7 @@ RUN apt-get update && apt-get install -y \ software-properties-common RUN curl -fsSL https://download.docker.com/linux/debian/gpg | apt-key add - RUN add-apt-repository \ - "deb [arch=${ARCH}] https://download.docker.com/linux/debian \ + "deb [arch=${DOCKER_BUILD_ARCH}] https://download.docker.com/linux/debian \ $(lsb_release -cs) \ stable" RUN apt-get update && apt-get install -y docker-ce-cli jq diff --git a/build.gradle b/build.gradle index 39b0b9e2f22d9..03433bf46f525 100644 --- a/build.gradle +++ b/build.gradle @@ -130,6 +130,8 @@ def Task getDockerBuildTask(String artifactName, String projectDir) { return task ("buildDockerImage-$artifactName" (type: DockerBuildImage) { def buildTag = System.getenv('VERSION') ?: 'dev' def buildPlatform = System.getenv('DOCKER_BUILD_PLATFORM') ?: 'linux/amd64' + def alpineImage = System.getenv('ALPINE_IMAGE') ?: 'alpine:3.4' + def postgresImage = System.getenv('POSTGRES_IMAGE') ?: 'postgres:13-alpine' def jdkVersion = System.getenv('JDK_VERSION') ?: '14.0.2' def buildArch = System.getenv('DOCKER_BUILD_ARCH') ?: 'amd64' @@ -138,6 +140,8 @@ def Task getDockerBuildTask(String artifactName, String projectDir) { images.add("airbyte/$artifactName:$buildTag") buildArgs.put('JDK_VERSION', jdkVersion) buildArgs.put('DOCKER_BUILD_ARCH', buildArch) + buildArgs.put('ALPINE_IMAGE', alpineImage) + buildArgs.put('POSTGRES_IMAGE', postgresImage) }) } diff --git a/docs/contributing-to-airbyte/developing-locally.md b/docs/contributing-to-airbyte/developing-locally.md index bfaa11d6283c2..ab7e4329e75ac 100644 --- a/docs/contributing-to-airbyte/developing-locally.md +++ b/docs/contributing-to-airbyte/developing-locally.md @@ -40,6 +40,8 @@ some additional environment variables: ```bash export DOCKER_BUILD_PLATFORM=linux/arm64 export DOCKER_BUILD_ARCH=arm64 +export ALPINE_IMAGE=arm64v8/alpine:3.14 +export POSTGRES_IMAGE=arm64v8/postgres:13-alpine export JDK_VERSION=17 export NODE_VERSION=16.11.1 SUB_BUILD=PLATFORM ./gradlew build From 9bda6a7aff54f444a8feab412bc6e9f7f322fb1c Mon Sep 17 00:00:00 2001 From: "Sherif A. Nada" Date: Fri, 5 Nov 2021 16:19:41 -0700 Subject: [PATCH 66/83] =?UTF-8?q?=F0=9F=90=9B=20=20Bugfix:=20inject=20http?= =?UTF-8?q?=20client=20into=20server=20to=20prevent=20file=20churn=20(#768?= =?UTF-8?q?8)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../java/io/airbyte/oauth/BaseOAuthFlow.java | 21 +++++--------- .../oauth/OAuthImplementationFactory.java | 29 ++++++++++--------- .../airbyte/oauth/flows/AsanaOAuthFlow.java | 4 +-- .../airbyte/oauth/flows/GithubOAuthFlow.java | 4 +-- .../airbyte/oauth/flows/HubspotOAuthFlow.java | 4 +-- .../oauth/flows/SalesforceOAuthFlow.java | 4 +-- .../oauth/flows/SurveymonkeyOAuthFlow.java | 4 +-- .../airbyte/oauth/flows/TrelloOAuthFlow.java | 3 +- .../facebook/FacebookMarketingOAuthFlow.java | 4 +-- .../flows/facebook/FacebookOAuthFlow.java | 4 +-- .../facebook/FacebookPagesOAuthFlow.java | 5 ++-- .../flows/facebook/InstagramOAuthFlow.java | 5 ++-- .../flows/google/GoogleAdsOAuthFlow.java | 4 +-- .../google/GoogleAnalyticsOAuthFlow.java | 4 +-- .../oauth/flows/google/GoogleOAuthFlow.java | 4 +-- .../google/GoogleSearchConsoleOAuthFlow.java | 4 +-- .../flows/google/GoogleSheetsOAuthFlow.java | 4 +-- .../FacebookOAuthFlowIntegrationTest.java | 5 ++-- .../GithubOAuthFlowIntegrationTest.java | 5 ++-- .../SalesforceOAuthFlowIntegrationTest.java | 5 +++- .../SurveymonkeyOAuthFlowIntegrationTest.java | 5 ++-- .../TrelloOAuthFlowIntegrationTest.java | 5 +++- .../HubspotOAuthFlowIntegrationTest.java | 7 +++-- .../oauth/flows/OAuthFlowIntegrationTest.java | 8 +++-- .../GoogleAdsOAuthFlowIntegrationTest.java | 5 +++- ...ogleAnalyticsOAuthFlowIntegrationTest.java | 5 +++- ...SearchConsoleOAuthFlowIntegrationTest.java | 5 +++- .../GoogleSheetsOAuthFlowIntegrationTest.java | 5 +++- .../server/ConfigurationApiFactory.java | 9 ++++-- .../java/io/airbyte/server/ServerApp.java | 6 +++- .../java/io/airbyte/server/ServerFactory.java | 10 +++++-- .../airbyte/server/apis/ConfigurationApi.java | 6 ++-- .../airbyte/server/handlers/OAuthHandler.java | 5 ++-- .../server/apis/ConfigurationApiTest.java | 4 ++- .../server/handlers/OAuthHandlerTest.java | 5 +++- 35 files changed, 130 insertions(+), 86 deletions(-) diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/BaseOAuthFlow.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/BaseOAuthFlow.java index 9fb0a22a76250..7e03902ce0788 100644 --- a/airbyte-oauth/src/main/java/io/airbyte/oauth/BaseOAuthFlow.java +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/BaseOAuthFlow.java @@ -16,7 +16,6 @@ import java.net.URI; import java.net.URLEncoder; import java.net.http.HttpClient; -import java.net.http.HttpClient.Version; import java.net.http.HttpRequest; import java.net.http.HttpResponse; import java.nio.charset.StandardCharsets; @@ -55,19 +54,12 @@ public enum TOKEN_REQUEST_CONTENT_TYPE { } - protected final HttpClient httpClient; private final TOKEN_REQUEST_CONTENT_TYPE tokenReqContentType; + protected HttpClient httpClient; private final Supplier stateSupplier; - public BaseOAuthFlow(final ConfigRepository configRepository) { - this(configRepository, HttpClient.newBuilder().version(Version.HTTP_1_1).build(), BaseOAuthFlow::generateRandomState); - } - - public BaseOAuthFlow(ConfigRepository configRepository, TOKEN_REQUEST_CONTENT_TYPE tokenReqContentType) { - this(configRepository, - HttpClient.newBuilder().version(Version.HTTP_1_1).build(), - BaseOAuthFlow::generateRandomState, - tokenReqContentType); + public BaseOAuthFlow(final ConfigRepository configRepository, HttpClient httpClient) { + this(configRepository, httpClient, BaseOAuthFlow::generateRandomState); } public BaseOAuthFlow(ConfigRepository configRepository, HttpClient httpClient, Supplier stateSupplier) { @@ -161,7 +153,8 @@ protected Map completeOAuthFlow(final String clientId, .header("Accept", "application/json") .build(); try { - final HttpResponse response = httpClient.send(request, HttpResponse.BodyHandlers.ofString()); + HttpResponse response; + response = httpClient.send(request, HttpResponse.BodyHandlers.ofString()); return extractRefreshToken(Jsons.deserialize(response.body()), accessTokenUrl); } catch (final InterruptedException e) { throw new IOException("Failed to complete OAuth flow", e); @@ -235,7 +228,9 @@ private static String toUrlEncodedString(final Map body) { protected static String toJson(final Map body) { final Gson gson = new Gson(); - Type gsonType = new TypeToken>() {}.getType(); + Type gsonType = new TypeToken>() { + + }.getType(); return gson.toJson(body, gsonType); } diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/OAuthImplementationFactory.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/OAuthImplementationFactory.java index 289390429be96..1f269911d46bc 100644 --- a/airbyte-oauth/src/main/java/io/airbyte/oauth/OAuthImplementationFactory.java +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/OAuthImplementationFactory.java @@ -14,6 +14,7 @@ import io.airbyte.oauth.flows.google.GoogleAnalyticsOAuthFlow; import io.airbyte.oauth.flows.google.GoogleSearchConsoleOAuthFlow; import io.airbyte.oauth.flows.google.GoogleSheetsOAuthFlow; +import java.net.http.HttpClient; import java.util.Map; import java.util.UUID; @@ -21,21 +22,21 @@ public class OAuthImplementationFactory { private final Map OAUTH_FLOW_MAPPING; - public OAuthImplementationFactory(final ConfigRepository configRepository) { + public OAuthImplementationFactory(final ConfigRepository configRepository, final HttpClient httpClient) { OAUTH_FLOW_MAPPING = ImmutableMap.builder() - .put("airbyte/source-asana", new AsanaOAuthFlow(configRepository)) - .put("airbyte/source-facebook-marketing", new FacebookMarketingOAuthFlow(configRepository)) - .put("airbyte/source-facebook-pages", new FacebookPagesOAuthFlow(configRepository)) - .put("airbyte/source-github", new GithubOAuthFlow(configRepository)) - .put("airbyte/source-google-ads", new GoogleAdsOAuthFlow(configRepository)) - .put("airbyte/source-google-analytics-v4", new GoogleAnalyticsOAuthFlow(configRepository)) - .put("airbyte/source-google-search-console", new GoogleSearchConsoleOAuthFlow(configRepository)) - .put("airbyte/source-google-sheets", new GoogleSheetsOAuthFlow(configRepository)) - .put("airbyte/source-instagram", new InstagramOAuthFlow(configRepository)) - .put("airbyte/source-salesforce", new SalesforceOAuthFlow(configRepository)) - .put("airbyte/source-surveymonkey", new SurveymonkeyOAuthFlow(configRepository)) - .put("airbyte/source-trello", new TrelloOAuthFlow(configRepository)) - .put("airbyte/source-hubspot", new HubspotOAuthFlow(configRepository)) + .put("airbyte/source-asana", new AsanaOAuthFlow(configRepository, httpClient)) + .put("airbyte/source-facebook-marketing", new FacebookMarketingOAuthFlow(configRepository, httpClient)) + .put("airbyte/source-facebook-pages", new FacebookPagesOAuthFlow(configRepository, httpClient)) + .put("airbyte/source-github", new GithubOAuthFlow(configRepository, httpClient)) + .put("airbyte/source-google-ads", new GoogleAdsOAuthFlow(configRepository, httpClient)) + .put("airbyte/source-google-analytics-v4", new GoogleAnalyticsOAuthFlow(configRepository, httpClient)) + .put("airbyte/source-google-search-console", new GoogleSearchConsoleOAuthFlow(configRepository, httpClient)) + .put("airbyte/source-google-sheets", new GoogleSheetsOAuthFlow(configRepository, httpClient)) + .put("airbyte/source-instagram", new InstagramOAuthFlow(configRepository, httpClient)) + .put("airbyte/source-salesforce", new SalesforceOAuthFlow(configRepository, httpClient)) + .put("airbyte/source-surveymonkey", new SurveymonkeyOAuthFlow(configRepository, httpClient)) + .put("airbyte/source-trello", new TrelloOAuthFlow(configRepository, httpClient)) + .put("airbyte/source-hubspot", new HubspotOAuthFlow(configRepository, httpClient)) .build(); } diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/AsanaOAuthFlow.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/AsanaOAuthFlow.java index 9e273b82b133d..19ee047bc6ea2 100644 --- a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/AsanaOAuthFlow.java +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/AsanaOAuthFlow.java @@ -24,8 +24,8 @@ public class AsanaOAuthFlow extends BaseOAuthFlow { private static final String AUTHORIZE_URL = "https://app.asana.com/-/oauth_authorize"; private static final String ACCESS_TOKEN_URL = "https://app.asana.com/-/oauth_token"; - public AsanaOAuthFlow(ConfigRepository configRepository) { - super(configRepository); + public AsanaOAuthFlow(ConfigRepository configRepository, HttpClient httpClient) { + super(configRepository, httpClient); } @VisibleForTesting diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/GithubOAuthFlow.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/GithubOAuthFlow.java index bb2e14ac11f6c..4b6f4932ea209 100644 --- a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/GithubOAuthFlow.java +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/GithubOAuthFlow.java @@ -26,8 +26,8 @@ public class GithubOAuthFlow extends BaseOAuthFlow { private static final String AUTHORIZE_URL = "https://github.com/login/oauth/authorize"; private static final String ACCESS_TOKEN_URL = "https://github.com/login/oauth/access_token"; - public GithubOAuthFlow(final ConfigRepository configRepository) { - super(configRepository); + public GithubOAuthFlow(final ConfigRepository configRepository, HttpClient httpClient) { + super(configRepository, httpClient); } @VisibleForTesting diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/HubspotOAuthFlow.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/HubspotOAuthFlow.java index 7e7e81d5e239e..e1edcfbf6efab 100644 --- a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/HubspotOAuthFlow.java +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/HubspotOAuthFlow.java @@ -19,8 +19,8 @@ public class HubspotOAuthFlow extends BaseOAuthFlow { private final String AUTHORIZE_URL = "https://app.hubspot.com/oauth/authorize"; - public HubspotOAuthFlow(ConfigRepository configRepository) { - super(configRepository); + public HubspotOAuthFlow(ConfigRepository configRepository, HttpClient httpClient) { + super(configRepository, httpClient); } public HubspotOAuthFlow(ConfigRepository configRepository, HttpClient httpClient, Supplier stateSupplier) { diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/SalesforceOAuthFlow.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/SalesforceOAuthFlow.java index c5eec1b59075e..ea1fd7c154a72 100644 --- a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/SalesforceOAuthFlow.java +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/SalesforceOAuthFlow.java @@ -29,8 +29,8 @@ public class SalesforceOAuthFlow extends BaseOAuthFlow { private static final String AUTHORIZE_URL = "https://login.salesforce.com/services/oauth2/authorize"; private static final String ACCESS_TOKEN_URL = "https://login.salesforce.com/services/oauth2/token"; - public SalesforceOAuthFlow(final ConfigRepository configRepository) { - super(configRepository); + public SalesforceOAuthFlow(final ConfigRepository configRepository, HttpClient httpClient) { + super(configRepository, httpClient); } @VisibleForTesting diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/SurveymonkeyOAuthFlow.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/SurveymonkeyOAuthFlow.java index 9cb40ef924928..9bf22e762a218 100644 --- a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/SurveymonkeyOAuthFlow.java +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/SurveymonkeyOAuthFlow.java @@ -26,8 +26,8 @@ public class SurveymonkeyOAuthFlow extends BaseOAuthFlow { private static final String AUTHORIZE_URL = "https://api.surveymonkey.com/oauth/authorize"; private static final String ACCESS_TOKEN_URL = "https://api.surveymonkey.com/oauth/token"; - public SurveymonkeyOAuthFlow(ConfigRepository configRepository) { - super(configRepository); + public SurveymonkeyOAuthFlow(ConfigRepository configRepository, HttpClient httpClient) { + super(configRepository, httpClient); } @VisibleForTesting diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/TrelloOAuthFlow.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/TrelloOAuthFlow.java index 1273f46e23414..81b5db8d559b3 100644 --- a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/TrelloOAuthFlow.java +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/TrelloOAuthFlow.java @@ -17,6 +17,7 @@ import io.airbyte.config.persistence.ConfigRepository; import io.airbyte.oauth.BaseOAuthConfig; import java.io.IOException; +import java.net.http.HttpClient; import java.util.Map; import java.util.UUID; @@ -38,7 +39,7 @@ public class TrelloOAuthFlow extends BaseOAuthConfig { private static final OAuthHmacSigner signer = new OAuthHmacSigner(); private final HttpTransport transport; - public TrelloOAuthFlow(final ConfigRepository configRepository) { + public TrelloOAuthFlow(final ConfigRepository configRepository, HttpClient httpClient) { super(configRepository); transport = new NetHttpTransport(); } diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/facebook/FacebookMarketingOAuthFlow.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/facebook/FacebookMarketingOAuthFlow.java index 0fe9832caa4e6..afe38d7f54037 100644 --- a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/facebook/FacebookMarketingOAuthFlow.java +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/facebook/FacebookMarketingOAuthFlow.java @@ -13,8 +13,8 @@ public class FacebookMarketingOAuthFlow extends FacebookOAuthFlow { private static final String SCOPES = "ads_management,ads_read,read_insights"; - public FacebookMarketingOAuthFlow(final ConfigRepository configRepository) { - super(configRepository); + public FacebookMarketingOAuthFlow(final ConfigRepository configRepository, HttpClient httpClient) { + super(configRepository, httpClient); } @VisibleForTesting diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/facebook/FacebookOAuthFlow.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/facebook/FacebookOAuthFlow.java index d1520cb1eabf4..08e5feeb64381 100644 --- a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/facebook/FacebookOAuthFlow.java +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/facebook/FacebookOAuthFlow.java @@ -30,8 +30,8 @@ public abstract class FacebookOAuthFlow extends BaseOAuthFlow { private static final String ACCESS_TOKEN_URL = "https://graph.facebook.com/v12.0/oauth/access_token"; private static final String AUTH_CODE_TOKEN_URL = "https://www.facebook.com/v12.0/dialog/oauth"; - public FacebookOAuthFlow(final ConfigRepository configRepository) { - super(configRepository); + public FacebookOAuthFlow(final ConfigRepository configRepository, HttpClient httpClient) { + super(configRepository, httpClient); } @VisibleForTesting diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/facebook/FacebookPagesOAuthFlow.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/facebook/FacebookPagesOAuthFlow.java index 1f5e596a18b56..8f8b832ae76e1 100644 --- a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/facebook/FacebookPagesOAuthFlow.java +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/facebook/FacebookPagesOAuthFlow.java @@ -5,13 +5,14 @@ package io.airbyte.oauth.flows.facebook; import io.airbyte.config.persistence.ConfigRepository; +import java.net.http.HttpClient; public class FacebookPagesOAuthFlow extends FacebookOAuthFlow { private static final String SCOPES = "pages_manage_ads,pages_manage_metadata,pages_read_engagement,pages_read_user_content"; - public FacebookPagesOAuthFlow(final ConfigRepository configRepository) { - super(configRepository); + public FacebookPagesOAuthFlow(final ConfigRepository configRepository, HttpClient httpClient) { + super(configRepository, httpClient); } @Override diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/facebook/InstagramOAuthFlow.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/facebook/InstagramOAuthFlow.java index f4478960a9797..20edf93f3f7e0 100644 --- a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/facebook/InstagramOAuthFlow.java +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/facebook/InstagramOAuthFlow.java @@ -5,14 +5,15 @@ package io.airbyte.oauth.flows.facebook; import io.airbyte.config.persistence.ConfigRepository; +import java.net.http.HttpClient; // Instagram Graph API require Facebook API User token public class InstagramOAuthFlow extends FacebookMarketingOAuthFlow { private static final String SCOPES = "ads_management,instagram_basic,instagram_manage_insights,read_insights"; - public InstagramOAuthFlow(final ConfigRepository configRepository) { - super(configRepository); + public InstagramOAuthFlow(final ConfigRepository configRepository, HttpClient httpClient) { + super(configRepository, httpClient); } @Override diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/google/GoogleAdsOAuthFlow.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/google/GoogleAdsOAuthFlow.java index eedbaf5036a06..5fda937bf6051 100644 --- a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/google/GoogleAdsOAuthFlow.java +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/google/GoogleAdsOAuthFlow.java @@ -16,8 +16,8 @@ public class GoogleAdsOAuthFlow extends GoogleOAuthFlow { @VisibleForTesting static final String SCOPE_URL = "https://www.googleapis.com/auth/adwords"; - public GoogleAdsOAuthFlow(final ConfigRepository configRepository) { - super(configRepository); + public GoogleAdsOAuthFlow(final ConfigRepository configRepository, HttpClient httpClient) { + super(configRepository, httpClient); } @VisibleForTesting diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/google/GoogleAnalyticsOAuthFlow.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/google/GoogleAnalyticsOAuthFlow.java index 8e26336783ae8..40a6322a50654 100644 --- a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/google/GoogleAnalyticsOAuthFlow.java +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/google/GoogleAnalyticsOAuthFlow.java @@ -15,8 +15,8 @@ public class GoogleAnalyticsOAuthFlow extends GoogleOAuthFlow { public static final String SCOPE_URL = "https://www.googleapis.com/auth/analytics.readonly"; - public GoogleAnalyticsOAuthFlow(final ConfigRepository configRepository) { - super(configRepository); + public GoogleAnalyticsOAuthFlow(final ConfigRepository configRepository, HttpClient httpClient) { + super(configRepository, httpClient); } @VisibleForTesting diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/google/GoogleOAuthFlow.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/google/GoogleOAuthFlow.java index 1c460101f8544..500309072d467 100644 --- a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/google/GoogleOAuthFlow.java +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/google/GoogleOAuthFlow.java @@ -23,8 +23,8 @@ public abstract class GoogleOAuthFlow extends BaseOAuthFlow { private static final String ACCESS_TOKEN_URL = "https://oauth2.googleapis.com/token"; - public GoogleOAuthFlow(final ConfigRepository configRepository) { - super(configRepository); + public GoogleOAuthFlow(final ConfigRepository configRepository, final HttpClient httpClient) { + super(configRepository, httpClient); } @VisibleForTesting diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/google/GoogleSearchConsoleOAuthFlow.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/google/GoogleSearchConsoleOAuthFlow.java index 77973683446c7..a4fa700e8d56b 100644 --- a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/google/GoogleSearchConsoleOAuthFlow.java +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/google/GoogleSearchConsoleOAuthFlow.java @@ -19,8 +19,8 @@ public class GoogleSearchConsoleOAuthFlow extends GoogleOAuthFlow { @VisibleForTesting static final String SCOPE_URL = "https://www.googleapis.com/auth/webmasters.readonly"; - public GoogleSearchConsoleOAuthFlow(final ConfigRepository configRepository) { - super(configRepository); + public GoogleSearchConsoleOAuthFlow(final ConfigRepository configRepository, HttpClient httpClient) { + super(configRepository, httpClient); } @VisibleForTesting diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/google/GoogleSheetsOAuthFlow.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/google/GoogleSheetsOAuthFlow.java index 11e2dd08e88d9..bff40fc9ef389 100644 --- a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/google/GoogleSheetsOAuthFlow.java +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/google/GoogleSheetsOAuthFlow.java @@ -18,8 +18,8 @@ public class GoogleSheetsOAuthFlow extends GoogleOAuthFlow { @VisibleForTesting static final String SCOPE_URL = "https://www.googleapis.com/auth/spreadsheets.readonly https://www.googleapis.com/auth/drive.readonly"; - public GoogleSheetsOAuthFlow(final ConfigRepository configRepository) { - super(configRepository); + public GoogleSheetsOAuthFlow(final ConfigRepository configRepository, HttpClient httpClient) { + super(configRepository, httpClient); } @VisibleForTesting diff --git a/airbyte-oauth/src/test-integration/java/io.airbyte.oauth.flows/FacebookOAuthFlowIntegrationTest.java b/airbyte-oauth/src/test-integration/java/io.airbyte.oauth.flows/FacebookOAuthFlowIntegrationTest.java index 53ccd82061154..16f2820cd3c7b 100644 --- a/airbyte-oauth/src/test-integration/java/io.airbyte.oauth.flows/FacebookOAuthFlowIntegrationTest.java +++ b/airbyte-oauth/src/test-integration/java/io.airbyte.oauth.flows/FacebookOAuthFlowIntegrationTest.java @@ -17,6 +17,7 @@ import io.airbyte.oauth.flows.OAuthFlowIntegrationTest; import io.airbyte.validation.json.JsonValidationException; import java.io.IOException; +import java.net.http.HttpClient; import java.nio.file.Files; import java.nio.file.Path; import java.util.List; @@ -36,8 +37,8 @@ protected Path getCredentialsPath() { } @Override - protected OAuthFlowImplementation getFlowObject(ConfigRepository configRepository) { - return new FacebookMarketingOAuthFlow(configRepository); + protected OAuthFlowImplementation getFlowImplementation(ConfigRepository configRepository, HttpClient httpClient) { + return new FacebookMarketingOAuthFlow(configRepository, httpClient); } @BeforeEach diff --git a/airbyte-oauth/src/test-integration/java/io.airbyte.oauth.flows/GithubOAuthFlowIntegrationTest.java b/airbyte-oauth/src/test-integration/java/io.airbyte.oauth.flows/GithubOAuthFlowIntegrationTest.java index 797af710644f2..9677a82a5e5c0 100644 --- a/airbyte-oauth/src/test-integration/java/io.airbyte.oauth.flows/GithubOAuthFlowIntegrationTest.java +++ b/airbyte-oauth/src/test-integration/java/io.airbyte.oauth.flows/GithubOAuthFlowIntegrationTest.java @@ -16,6 +16,7 @@ import io.airbyte.oauth.OAuthFlowImplementation; import io.airbyte.validation.json.JsonValidationException; import java.io.IOException; +import java.net.http.HttpClient; import java.nio.file.Files; import java.nio.file.Path; import java.util.List; @@ -36,8 +37,8 @@ protected Path getCredentialsPath() { } @Override - protected OAuthFlowImplementation getFlowObject(ConfigRepository configRepository) { - return new GithubOAuthFlow(configRepository); + protected OAuthFlowImplementation getFlowImplementation(ConfigRepository configRepository, HttpClient httpClient) { + return new GithubOAuthFlow(configRepository, httpClient); } @Override diff --git a/airbyte-oauth/src/test-integration/java/io.airbyte.oauth.flows/SalesforceOAuthFlowIntegrationTest.java b/airbyte-oauth/src/test-integration/java/io.airbyte.oauth.flows/SalesforceOAuthFlowIntegrationTest.java index 536ab196886d5..0cee2aaca757d 100644 --- a/airbyte-oauth/src/test-integration/java/io.airbyte.oauth.flows/SalesforceOAuthFlowIntegrationTest.java +++ b/airbyte-oauth/src/test-integration/java/io.airbyte.oauth.flows/SalesforceOAuthFlowIntegrationTest.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.io.OutputStream; import java.net.InetSocketAddress; +import java.net.http.HttpClient; import java.nio.file.Files; import java.nio.file.Path; import java.util.HashMap; @@ -43,6 +44,7 @@ public class SalesforceOAuthFlowIntegrationTest { private SalesforceOAuthFlow salesforceOAuthFlow; private HttpServer server; private ServerHandler serverHandler; + private HttpClient httpClient; @BeforeEach public void setup() throws IOException { @@ -51,7 +53,8 @@ public void setup() throws IOException { "Must provide path to a oauth credentials file."); } configRepository = mock(ConfigRepository.class); - salesforceOAuthFlow = new SalesforceOAuthFlow(configRepository); + httpClient = HttpClient.newBuilder().version(HttpClient.Version.HTTP_1_1).build(); + salesforceOAuthFlow = new SalesforceOAuthFlow(configRepository, httpClient); server = HttpServer.create(new InetSocketAddress(8000), 0); server.setExecutor(null); // creates a default executor diff --git a/airbyte-oauth/src/test-integration/java/io.airbyte.oauth.flows/SurveymonkeyOAuthFlowIntegrationTest.java b/airbyte-oauth/src/test-integration/java/io.airbyte.oauth.flows/SurveymonkeyOAuthFlowIntegrationTest.java index 60961ec15936c..5791ed62b1a4c 100644 --- a/airbyte-oauth/src/test-integration/java/io.airbyte.oauth.flows/SurveymonkeyOAuthFlowIntegrationTest.java +++ b/airbyte-oauth/src/test-integration/java/io.airbyte.oauth.flows/SurveymonkeyOAuthFlowIntegrationTest.java @@ -16,6 +16,7 @@ import io.airbyte.oauth.OAuthFlowImplementation; import io.airbyte.validation.json.JsonValidationException; import java.io.IOException; +import java.net.http.HttpClient; import java.nio.file.Files; import java.nio.file.Path; import java.util.List; @@ -35,8 +36,8 @@ protected Path getCredentialsPath() { } @Override - protected OAuthFlowImplementation getFlowObject(ConfigRepository configRepository) { - return new SurveymonkeyOAuthFlow(configRepository); + protected OAuthFlowImplementation getFlowImplementation(ConfigRepository configRepository, HttpClient httpClient) { + return new SurveymonkeyOAuthFlow(configRepository, httpClient); } @BeforeEach diff --git a/airbyte-oauth/src/test-integration/java/io.airbyte.oauth.flows/TrelloOAuthFlowIntegrationTest.java b/airbyte-oauth/src/test-integration/java/io.airbyte.oauth.flows/TrelloOAuthFlowIntegrationTest.java index 66fb6691347f8..57a0d2e883e2e 100644 --- a/airbyte-oauth/src/test-integration/java/io.airbyte.oauth.flows/TrelloOAuthFlowIntegrationTest.java +++ b/airbyte-oauth/src/test-integration/java/io.airbyte.oauth.flows/TrelloOAuthFlowIntegrationTest.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.io.OutputStream; import java.net.InetSocketAddress; +import java.net.http.HttpClient; import java.nio.file.Files; import java.nio.file.Path; import java.util.HashMap; @@ -43,6 +44,7 @@ public class TrelloOAuthFlowIntegrationTest { private TrelloOAuthFlow trelloOAuthFlow; private HttpServer server; private ServerHandler serverHandler; + private HttpClient httpClient; @BeforeEach public void setup() throws IOException { @@ -51,7 +53,8 @@ public void setup() throws IOException { "Must provide path to a oauth credentials file."); } configRepository = mock(ConfigRepository.class); - trelloOAuthFlow = new TrelloOAuthFlow(configRepository); + httpClient = HttpClient.newBuilder().version(HttpClient.Version.HTTP_1_1).build(); + trelloOAuthFlow = new TrelloOAuthFlow(configRepository, httpClient); server = HttpServer.create(new InetSocketAddress(8000), 0); server.setExecutor(null); // creates a default executor diff --git a/airbyte-oauth/src/test-integration/java/io/airbyte/oauth/flows/HubspotOAuthFlowIntegrationTest.java b/airbyte-oauth/src/test-integration/java/io/airbyte/oauth/flows/HubspotOAuthFlowIntegrationTest.java index bbe96e57956a2..234f31454dfb7 100644 --- a/airbyte-oauth/src/test-integration/java/io/airbyte/oauth/flows/HubspotOAuthFlowIntegrationTest.java +++ b/airbyte-oauth/src/test-integration/java/io/airbyte/oauth/flows/HubspotOAuthFlowIntegrationTest.java @@ -16,6 +16,7 @@ import io.airbyte.oauth.OAuthFlowImplementation; import io.airbyte.validation.json.JsonValidationException; import java.io.IOException; +import java.net.http.HttpClient; import java.nio.file.Files; import java.nio.file.Path; import java.util.List; @@ -31,8 +32,8 @@ protected Path getCredentialsPath() { } @Override - protected OAuthFlowImplementation getFlowObject(ConfigRepository configRepository) { - return new HubspotOAuthFlow(configRepository); + protected OAuthFlowImplementation getFlowImplementation(ConfigRepository configRepository, HttpClient httpClient) { + return new HubspotOAuthFlow(configRepository, httpClient); } @Test @@ -50,7 +51,7 @@ public void testFullOAuthFlow() throws InterruptedException, ConfigNotFoundExcep .put("client_id", credentialsJson.get("credentials").get("client_id").asText()) .put("client_secret", credentialsJson.get("credentials").get("client_secret").asText()) .build())))); - var flowObject = getFlowObject(configRepository); + var flowObject = getFlowImplementation(configRepository, httpClient); final String url = flowObject.getSourceConsentUrl(workspaceId, definitionId, REDIRECT_URL); LOGGER.info("Waiting for user consent at: {}", url); // TODO: To automate, start a selenium job to navigate to the Consent URL and click on allowing diff --git a/airbyte-oauth/src/test-integration/java/io/airbyte/oauth/flows/OAuthFlowIntegrationTest.java b/airbyte-oauth/src/test-integration/java/io/airbyte/oauth/flows/OAuthFlowIntegrationTest.java index d9124f645fd60..67be077899e4b 100644 --- a/airbyte-oauth/src/test-integration/java/io/airbyte/oauth/flows/OAuthFlowIntegrationTest.java +++ b/airbyte-oauth/src/test-integration/java/io/airbyte/oauth/flows/OAuthFlowIntegrationTest.java @@ -14,6 +14,7 @@ import java.io.IOException; import java.io.OutputStream; import java.net.InetSocketAddress; +import java.net.http.HttpClient; import java.nio.file.Files; import java.nio.file.Path; import java.util.HashMap; @@ -33,6 +34,7 @@ public abstract class OAuthFlowIntegrationTest { protected static final String REDIRECT_URL = "http://localhost/auth_flow"; protected static final int SERVER_LISTENING_PORT = 80; + protected HttpClient httpClient; protected ConfigRepository configRepository; protected OAuthFlowImplementation flow; protected HttpServer server; @@ -42,7 +44,7 @@ protected Path getCredentialsPath() { return Path.of("secrets/config.json"); }; - protected abstract OAuthFlowImplementation getFlowObject(ConfigRepository configRepository); + protected abstract OAuthFlowImplementation getFlowImplementation(ConfigRepository configRepository, HttpClient httpClient); @BeforeEach public void setup() throws IOException { @@ -51,8 +53,8 @@ public void setup() throws IOException { "Must provide path to a oauth credentials file."); } configRepository = mock(ConfigRepository.class); - - flow = this.getFlowObject(configRepository); + httpClient = HttpClient.newBuilder().version(HttpClient.Version.HTTP_1_1).build(); + flow = this.getFlowImplementation(configRepository, httpClient); System.out.println(getServerListeningPort()); server = HttpServer.create(new InetSocketAddress(getServerListeningPort()), 0); diff --git a/airbyte-oauth/src/test-integration/java/io/airbyte/oauth/flows/google/GoogleAdsOAuthFlowIntegrationTest.java b/airbyte-oauth/src/test-integration/java/io/airbyte/oauth/flows/google/GoogleAdsOAuthFlowIntegrationTest.java index a9c1ddfb31d50..29d6d909dba7d 100644 --- a/airbyte-oauth/src/test-integration/java/io/airbyte/oauth/flows/google/GoogleAdsOAuthFlowIntegrationTest.java +++ b/airbyte-oauth/src/test-integration/java/io/airbyte/oauth/flows/google/GoogleAdsOAuthFlowIntegrationTest.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.io.OutputStream; import java.net.InetSocketAddress; +import java.net.http.HttpClient; import java.nio.file.Files; import java.nio.file.Path; import java.util.HashMap; @@ -43,6 +44,7 @@ public class GoogleAdsOAuthFlowIntegrationTest { private GoogleAdsOAuthFlow googleAdsOAuthFlow; private HttpServer server; private ServerHandler serverHandler; + private HttpClient httpClient; @BeforeEach public void setup() throws IOException { @@ -51,7 +53,8 @@ public void setup() throws IOException { "Must provide path to a oauth credentials file."); } configRepository = mock(ConfigRepository.class); - googleAdsOAuthFlow = new GoogleAdsOAuthFlow(configRepository); + httpClient = HttpClient.newBuilder().version(HttpClient.Version.HTTP_1_1).build(); + googleAdsOAuthFlow = new GoogleAdsOAuthFlow(configRepository, httpClient); server = HttpServer.create(new InetSocketAddress(80), 0); server.setExecutor(null); // creates a default executor diff --git a/airbyte-oauth/src/test-integration/java/io/airbyte/oauth/flows/google/GoogleAnalyticsOAuthFlowIntegrationTest.java b/airbyte-oauth/src/test-integration/java/io/airbyte/oauth/flows/google/GoogleAnalyticsOAuthFlowIntegrationTest.java index b7683d8c34921..499f8ad56c0b4 100644 --- a/airbyte-oauth/src/test-integration/java/io/airbyte/oauth/flows/google/GoogleAnalyticsOAuthFlowIntegrationTest.java +++ b/airbyte-oauth/src/test-integration/java/io/airbyte/oauth/flows/google/GoogleAnalyticsOAuthFlowIntegrationTest.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.io.OutputStream; import java.net.InetSocketAddress; +import java.net.http.HttpClient; import java.nio.file.Files; import java.nio.file.Path; import java.util.HashMap; @@ -43,6 +44,7 @@ public class GoogleAnalyticsOAuthFlowIntegrationTest { private GoogleAnalyticsOAuthFlow googleAnalyticsOAuthFlow; private HttpServer server; private ServerHandler serverHandler; + private HttpClient httpClient; @BeforeEach public void setup() throws IOException { @@ -51,7 +53,8 @@ public void setup() throws IOException { "Must provide path to a oauth credentials file."); } configRepository = mock(ConfigRepository.class); - googleAnalyticsOAuthFlow = new GoogleAnalyticsOAuthFlow(configRepository); + httpClient = HttpClient.newBuilder().version(HttpClient.Version.HTTP_1_1).build(); + googleAnalyticsOAuthFlow = new GoogleAnalyticsOAuthFlow(configRepository, httpClient); server = HttpServer.create(new InetSocketAddress(80), 0); server.setExecutor(null); // creates a default executor diff --git a/airbyte-oauth/src/test-integration/java/io/airbyte/oauth/flows/google/GoogleSearchConsoleOAuthFlowIntegrationTest.java b/airbyte-oauth/src/test-integration/java/io/airbyte/oauth/flows/google/GoogleSearchConsoleOAuthFlowIntegrationTest.java index 92812c139bd79..886ba1c91f835 100644 --- a/airbyte-oauth/src/test-integration/java/io/airbyte/oauth/flows/google/GoogleSearchConsoleOAuthFlowIntegrationTest.java +++ b/airbyte-oauth/src/test-integration/java/io/airbyte/oauth/flows/google/GoogleSearchConsoleOAuthFlowIntegrationTest.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.io.OutputStream; import java.net.InetSocketAddress; +import java.net.http.HttpClient; import java.nio.file.Files; import java.nio.file.Path; import java.util.HashMap; @@ -43,6 +44,7 @@ public class GoogleSearchConsoleOAuthFlowIntegrationTest { private GoogleSearchConsoleOAuthFlow googleSearchConsoleOAuthFlow; private HttpServer server; private ServerHandler serverHandler; + private HttpClient httpClient; @BeforeEach public void setup() throws IOException { @@ -51,7 +53,8 @@ public void setup() throws IOException { "Must provide path to a oauth credentials file."); } configRepository = mock(ConfigRepository.class); - googleSearchConsoleOAuthFlow = new GoogleSearchConsoleOAuthFlow(configRepository); + httpClient = HttpClient.newBuilder().version(HttpClient.Version.HTTP_1_1).build(); + googleSearchConsoleOAuthFlow = new GoogleSearchConsoleOAuthFlow(configRepository, httpClient); server = HttpServer.create(new InetSocketAddress(80), 0); server.setExecutor(null); // creates a default executor diff --git a/airbyte-oauth/src/test-integration/java/io/airbyte/oauth/flows/google/GoogleSheetsOAuthFlowIntegrationTest.java b/airbyte-oauth/src/test-integration/java/io/airbyte/oauth/flows/google/GoogleSheetsOAuthFlowIntegrationTest.java index 3d4a84b44ee30..24f4ae1d9a94a 100644 --- a/airbyte-oauth/src/test-integration/java/io/airbyte/oauth/flows/google/GoogleSheetsOAuthFlowIntegrationTest.java +++ b/airbyte-oauth/src/test-integration/java/io/airbyte/oauth/flows/google/GoogleSheetsOAuthFlowIntegrationTest.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.io.OutputStream; import java.net.InetSocketAddress; +import java.net.http.HttpClient; import java.nio.file.Files; import java.nio.file.Path; import java.util.HashMap; @@ -43,6 +44,7 @@ public class GoogleSheetsOAuthFlowIntegrationTest { private GoogleSheetsOAuthFlow googleSheetsOAuthFlow; private HttpServer server; private ServerHandler serverHandler; + private HttpClient httpClient; @BeforeEach public void setup() throws IOException { @@ -51,7 +53,8 @@ public void setup() throws IOException { "Must provide path to a oauth credentials file."); } configRepository = mock(ConfigRepository.class); - googleSheetsOAuthFlow = new GoogleSheetsOAuthFlow(configRepository); + httpClient = HttpClient.newBuilder().version(HttpClient.Version.HTTP_1_1).build(); + googleSheetsOAuthFlow = new GoogleSheetsOAuthFlow(configRepository, httpClient); server = HttpServer.create(new InetSocketAddress(80), 0); server.setExecutor(null); // creates a default executor diff --git a/airbyte-server/src/main/java/io/airbyte/server/ConfigurationApiFactory.java b/airbyte-server/src/main/java/io/airbyte/server/ConfigurationApiFactory.java index 115031c7c4d7a..bdc9dd0105f4d 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/ConfigurationApiFactory.java +++ b/airbyte-server/src/main/java/io/airbyte/server/ConfigurationApiFactory.java @@ -17,6 +17,7 @@ import io.airbyte.scheduler.persistence.JobPersistence; import io.airbyte.server.apis.ConfigurationApi; import io.temporal.serviceclient.WorkflowServiceStubs; +import java.net.http.HttpClient; import java.nio.file.Path; import java.util.Map; import org.glassfish.hk2.api.Factory; @@ -40,6 +41,7 @@ public class ConfigurationApiFactory implements Factory { private static Path workspaceRoot; private static String webappUrl; private static AirbyteVersion airbyteVersion; + private static HttpClient httpClient; public static void setValues( final WorkflowServiceStubs temporalService, @@ -57,7 +59,8 @@ public static void setValues( final LogConfigs logConfigs, final String webappUrl, final AirbyteVersion airbyteVersion, - final Path workspaceRoot) { + final Path workspaceRoot, + final HttpClient httpClient) { ConfigurationApiFactory.configRepository = configRepository; ConfigurationApiFactory.jobPersistence = jobPersistence; ConfigurationApiFactory.seed = seed; @@ -74,6 +77,7 @@ public static void setValues( ConfigurationApiFactory.workspaceRoot = workspaceRoot; ConfigurationApiFactory.webappUrl = webappUrl; ConfigurationApiFactory.airbyteVersion = airbyteVersion; + ConfigurationApiFactory.httpClient = httpClient; } @Override @@ -95,7 +99,8 @@ public ConfigurationApi provide() { ConfigurationApiFactory.logConfigs, ConfigurationApiFactory.webappUrl, ConfigurationApiFactory.airbyteVersion, - ConfigurationApiFactory.workspaceRoot); + ConfigurationApiFactory.workspaceRoot, + ConfigurationApiFactory.httpClient); } @Override diff --git a/airbyte-server/src/main/java/io/airbyte/server/ServerApp.java b/airbyte-server/src/main/java/io/airbyte/server/ServerApp.java index 83eaf151822e8..9055c193060be 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/ServerApp.java +++ b/airbyte-server/src/main/java/io/airbyte/server/ServerApp.java @@ -51,6 +51,7 @@ import io.airbyte.workers.temporal.TemporalUtils; import io.temporal.serviceclient.WorkflowServiceStubs; import java.io.IOException; +import java.net.http.HttpClient; import java.util.Map; import java.util.Optional; import java.util.Set; @@ -229,6 +230,8 @@ public static ServerRunnable getServer(final ServerFactory apiFactory, final Con jobPersistence, configs); + final HttpClient httpClient = HttpClient.newBuilder().version(HttpClient.Version.HTTP_1_1).build(); + if (airbyteDatabaseVersion.isPresent() && AirbyteVersion.isCompatible(airbyteVersion, airbyteDatabaseVersion.get())) { LOGGER.info("Starting server..."); @@ -249,7 +252,8 @@ public static ServerRunnable getServer(final ServerFactory apiFactory, final Con configs.getLogConfigs(), configs.getWebappUrl(), configs.getAirbyteVersion(), - configs.getWorkspaceRoot()); + configs.getWorkspaceRoot(), + httpClient); } else { LOGGER.info("Start serving version mismatch errors. Automatic migration either failed or didn't run"); return new VersionMismatchServer(airbyteVersion, airbyteDatabaseVersion.orElseThrow(), PORT); diff --git a/airbyte-server/src/main/java/io/airbyte/server/ServerFactory.java b/airbyte-server/src/main/java/io/airbyte/server/ServerFactory.java index 28e1f302afcc7..1bf5528b9809e 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/ServerFactory.java +++ b/airbyte-server/src/main/java/io/airbyte/server/ServerFactory.java @@ -17,6 +17,7 @@ import io.airbyte.scheduler.persistence.JobPersistence; import io.airbyte.server.apis.ConfigurationApi; import io.temporal.serviceclient.WorkflowServiceStubs; +import java.net.http.HttpClient; import java.nio.file.Path; import java.util.Set; import java.util.concurrent.TimeUnit; @@ -37,7 +38,8 @@ ServerRunnable create(SchedulerJobClient schedulerJobClient, LogConfigs logConfigs, String webappUrl, AirbyteVersion airbyteVersion, - Path workspaceRoot); + Path workspaceRoot, + HttpClient httpClient); class Api implements ServerFactory { @@ -55,7 +57,8 @@ public ServerRunnable create(final SchedulerJobClient schedulerJobClient, final LogConfigs logConfigs, final String webappUrl, final AirbyteVersion airbyteVersion, - final Path workspaceRoot) { + final Path workspaceRoot, + final HttpClient httpClient) { // set static values for factory ConfigurationApiFactory.setValues( temporalService, @@ -73,7 +76,8 @@ public ServerRunnable create(final SchedulerJobClient schedulerJobClient, logConfigs, webappUrl, airbyteVersion, - workspaceRoot); + workspaceRoot, + httpClient); // server configurations final Set> componentClasses = Set.of(ConfigurationApi.class); diff --git a/airbyte-server/src/main/java/io/airbyte/server/apis/ConfigurationApi.java b/airbyte-server/src/main/java/io/airbyte/server/apis/ConfigurationApi.java index 62ccdebe87e66..2820c89b77040 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/apis/ConfigurationApi.java +++ b/airbyte-server/src/main/java/io/airbyte/server/apis/ConfigurationApi.java @@ -120,6 +120,7 @@ import io.temporal.serviceclient.WorkflowServiceStubs; import java.io.File; import java.io.IOException; +import java.net.http.HttpClient; import java.nio.file.Path; import java.util.Map; @@ -162,7 +163,8 @@ public ConfigurationApi(final ConfigRepository configRepository, final LogConfigs logConfigs, final String webappUrl, final AirbyteVersion airbyteVersion, - final Path workspaceRoot) { + final Path workspaceRoot, + HttpClient httpClient) { this.workerEnvironment = workerEnvironment; this.logConfigs = logConfigs; this.workspaceRoot = workspaceRoot; @@ -191,7 +193,7 @@ public ConfigurationApi(final ConfigRepository configRepository, sourceHandler = new SourceHandler(configRepository, schemaValidator, specFetcher, connectionsHandler); workspacesHandler = new WorkspacesHandler(configRepository, connectionsHandler, destinationHandler, sourceHandler); jobHistoryHandler = new JobHistoryHandler(jobPersistence, workerEnvironment, logConfigs); - oAuthHandler = new OAuthHandler(configRepository, trackingClient); + oAuthHandler = new OAuthHandler(configRepository, httpClient, trackingClient); webBackendConnectionsHandler = new WebBackendConnectionsHandler( connectionsHandler, sourceHandler, diff --git a/airbyte-server/src/main/java/io/airbyte/server/handlers/OAuthHandler.java b/airbyte-server/src/main/java/io/airbyte/server/handlers/OAuthHandler.java index 7fa5565bf7765..2d8e0a4b2b54d 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/handlers/OAuthHandler.java +++ b/airbyte-server/src/main/java/io/airbyte/server/handlers/OAuthHandler.java @@ -25,6 +25,7 @@ import io.airbyte.scheduler.persistence.job_tracker.TrackingMetadata; import io.airbyte.validation.json.JsonValidationException; import java.io.IOException; +import java.net.http.HttpClient; import java.util.Map; import java.util.UUID; import org.slf4j.Logger; @@ -38,9 +39,9 @@ public class OAuthHandler { private final OAuthImplementationFactory oAuthImplementationFactory; private final TrackingClient trackingClient; - public OAuthHandler(final ConfigRepository configRepository, final TrackingClient trackingClient) { + public OAuthHandler(final ConfigRepository configRepository, final HttpClient httpClient, final TrackingClient trackingClient) { this.configRepository = configRepository; - this.oAuthImplementationFactory = new OAuthImplementationFactory(configRepository); + this.oAuthImplementationFactory = new OAuthImplementationFactory(configRepository, httpClient); this.trackingClient = trackingClient; } diff --git a/airbyte-server/src/test/java/io/airbyte/server/apis/ConfigurationApiTest.java b/airbyte-server/src/test/java/io/airbyte/server/apis/ConfigurationApiTest.java index 48e0dc8094570..b71d05eda52de 100644 --- a/airbyte-server/src/test/java/io/airbyte/server/apis/ConfigurationApiTest.java +++ b/airbyte-server/src/test/java/io/airbyte/server/apis/ConfigurationApiTest.java @@ -21,6 +21,7 @@ import io.airbyte.scheduler.client.SchedulerJobClient; import io.airbyte.scheduler.persistence.JobPersistence; import io.temporal.serviceclient.WorkflowServiceStubs; +import java.net.http.HttpClient; import java.nio.file.Path; import org.junit.jupiter.api.Test; @@ -47,7 +48,8 @@ void testImportDefinitions() { LogConfiguration.EMPTY, "http://localhost", new AirbyteVersion("0.1.0-alpha"), - Path.of("")); + Path.of(""), + mock(HttpClient.class)); assertTrue(configurationApi.canImportDefinitons()); } diff --git a/airbyte-server/src/test/java/io/airbyte/server/handlers/OAuthHandlerTest.java b/airbyte-server/src/test/java/io/airbyte/server/handlers/OAuthHandlerTest.java index 3ca87acd7cb05..67c74aea83179 100644 --- a/airbyte-server/src/test/java/io/airbyte/server/handlers/OAuthHandlerTest.java +++ b/airbyte-server/src/test/java/io/airbyte/server/handlers/OAuthHandlerTest.java @@ -17,6 +17,7 @@ import io.airbyte.config.persistence.ConfigRepository; import io.airbyte.validation.json.JsonValidationException; import java.io.IOException; +import java.net.http.HttpClient; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -32,12 +33,14 @@ class OAuthHandlerTest { ConfigRepository configRepository; OAuthHandler handler; TrackingClient trackingClient; + private HttpClient httpClient; @BeforeEach public void init() { configRepository = Mockito.mock(ConfigRepository.class); trackingClient = mock(TrackingClient.class); - handler = new OAuthHandler(configRepository, trackingClient); + httpClient = Mockito.mock(HttpClient.class); + handler = new OAuthHandler(configRepository, httpClient, trackingClient); } @Test From 9791a14dda4c33599b9c1ce369d259340eb3c99f Mon Sep 17 00:00:00 2001 From: Yuhui Shi <74702693+yuhuishi-convect@users.noreply.github.com> Date: Fri, 5 Nov 2021 16:53:57 -0700 Subject: [PATCH 67/83] =?UTF-8?q?=F0=9F=8E=89=20Source=20Shopify:=20Add=20?= =?UTF-8?q?`FulfillmentOrders`=20and=20`Fulfillments`=20streams=20(#7107)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add FulfillmentOrder stream * Add fulfillments stream * Fix schema validation error * Bump docker version and update doc * Apply gradlew format * Fix source_definition * Fix doc after rebase * Fix format after rebase * Remove enum type in schemas * bump version * run seed config again Co-authored-by: Marcos Marx --- .../9da77001-af33-4bcd-be46-6252bf9342b9.json | 2 +- .../resources/seed/source_definitions.yaml | 2 +- .../src/main/resources/seed/source_specs.yaml | 8 +- .../connectors/source-shopify/Dockerfile | 2 +- .../integration_tests/abnormal_state.json | 6 + .../integration_tests/configured_catalog.json | 24 ++ .../integration_tests/state.json | 6 + .../schemas/fulfillment_orders.json | 180 ++++++++++++ .../source_shopify/schemas/fulfillments.json | 269 ++++++++++++++++++ .../source-shopify/source_shopify/source.py | 31 ++ docs/integrations/sources/shopify.md | 3 + 11 files changed, 527 insertions(+), 6 deletions(-) create mode 100644 airbyte-integrations/connectors/source-shopify/source_shopify/schemas/fulfillment_orders.json create mode 100644 airbyte-integrations/connectors/source-shopify/source_shopify/schemas/fulfillments.json diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/9da77001-af33-4bcd-be46-6252bf9342b9.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/9da77001-af33-4bcd-be46-6252bf9342b9.json index 90ce4d4c9d5e9..aeddeffbaf6e4 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/9da77001-af33-4bcd-be46-6252bf9342b9.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/9da77001-af33-4bcd-be46-6252bf9342b9.json @@ -2,6 +2,6 @@ "sourceDefinitionId": "9da77001-af33-4bcd-be46-6252bf9342b9", "name": "Shopify", "dockerRepository": "airbyte/source-shopify", - "dockerImageTag": "0.1.21", + "dockerImageTag": "0.1.22", "documentationUrl": "https://docs.airbyte.io/integrations/sources/shopify" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index db629e47be54d..60f3da68e88fb 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -482,7 +482,7 @@ - name: Shopify sourceDefinitionId: 9da77001-af33-4bcd-be46-6252bf9342b9 dockerRepository: airbyte/source-shopify - dockerImageTag: 0.1.21 + dockerImageTag: 0.1.22 documentationUrl: https://docs.airbyte.io/integrations/sources/shopify sourceType: api - name: Short.io diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index cafaf0adb6ad1..6aa40755838be 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -4857,7 +4857,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-salesforce:0.1.2" +- dockerImage: "airbyte/source-salesforce:0.1.3" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/salesforce" connectionSpecification: @@ -4889,7 +4889,9 @@ airbyte_secret: true start_date: description: "UTC date and time in the format 2017-01-25T00:00:00Z. Any\ - \ data before this date will not be replicated." + \ data before this date will not be replicated. Priority for filtering\ + \ by `updated` fields, and only then by `created` fields if they are available\ + \ for stream." type: "string" pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" examples: @@ -4946,7 +4948,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-shopify:0.1.21" +- dockerImage: "airbyte/source-shopify:0.1.22" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/shopify" connectionSpecification: diff --git a/airbyte-integrations/connectors/source-shopify/Dockerfile b/airbyte-integrations/connectors/source-shopify/Dockerfile index b20a97b483400..ca1eaaccfbaf7 100644 --- a/airbyte-integrations/connectors/source-shopify/Dockerfile +++ b/airbyte-integrations/connectors/source-shopify/Dockerfile @@ -28,5 +28,5 @@ COPY source_shopify ./source_shopify ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.21 +LABEL io.airbyte.version=0.1.22 LABEL io.airbyte.name=airbyte/source-shopify diff --git a/airbyte-integrations/connectors/source-shopify/integration_tests/abnormal_state.json b/airbyte-integrations/connectors/source-shopify/integration_tests/abnormal_state.json index a103ca3dd890b..d4bcc46c5271f 100644 --- a/airbyte-integrations/connectors/source-shopify/integration_tests/abnormal_state.json +++ b/airbyte-integrations/connectors/source-shopify/integration_tests/abnormal_state.json @@ -46,5 +46,11 @@ }, "inventory_levels": { "updated_at": "2024-07-08T05:40:38-07:00" + }, + "fulfillment_orders": { + "id": 9991307599038 + }, + "fulfillments": { + "updated_at": "2024-07-08T05:40:38-07:00" } } diff --git a/airbyte-integrations/connectors/source-shopify/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/source-shopify/integration_tests/configured_catalog.json index c9ebbd2ca0db9..260bbe53a1a6b 100644 --- a/airbyte-integrations/connectors/source-shopify/integration_tests/configured_catalog.json +++ b/airbyte-integrations/connectors/source-shopify/integration_tests/configured_catalog.json @@ -191,6 +191,30 @@ "sync_mode": "incremental", "cursor_field": ["updated_at"], "destination_sync_mode": "append" + }, + { + "stream": { + "name": "fulfillment_orders", + "json_schema": {}, + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": true, + "default_cursor_field": ["id"] + }, + "sync_mode": "incremental", + "cursor_field": ["id"], + "destination_sync_mode": "append" + }, + { + "stream": { + "name": "fulfillments", + "json_schema": {}, + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": true, + "default_cursor_field": ["updated_at"] + }, + "sync_mode": "incremental", + "cursor_field": ["updated_at"], + "destination_sync_mode": "append" } ] } diff --git a/airbyte-integrations/connectors/source-shopify/integration_tests/state.json b/airbyte-integrations/connectors/source-shopify/integration_tests/state.json index 65ca1ccdfed26..03f98c522caa5 100644 --- a/airbyte-integrations/connectors/source-shopify/integration_tests/state.json +++ b/airbyte-integrations/connectors/source-shopify/integration_tests/state.json @@ -46,5 +46,11 @@ }, "inventory_levels": { "updated_at": "2021-09-10T06:48:10-07:00" + }, + "fulfillment_orders": { + "id": 123 + }, + "fulfillments": { + "updated_at": "2021-09-10T06:48:10-07:00" } } diff --git a/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/fulfillment_orders.json b/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/fulfillment_orders.json new file mode 100644 index 0000000000000..16987cbedd703 --- /dev/null +++ b/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/fulfillment_orders.json @@ -0,0 +1,180 @@ +{ + "type": "object", + "properties": { + "assigned_location_id": { + "type": ["null", "integer"] + }, + "destination": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "integer"] + }, + "address1": { + "type": ["null", "string"] + }, + "address2": { + "type": ["null", "string"] + }, + "city": { + "type": ["null", "string"] + }, + "company": { + "type": ["null", "string"] + }, + "country": { + "type": ["null", "string"] + }, + "email": { + "type": ["null", "string"] + }, + "first_name": { + "type": ["null", "string"] + }, + "last_name": { + "type": ["null", "string"] + }, + "phone": { + "type": ["null", "string"] + }, + "province": { + "type": ["null", "string"] + }, + "zip": { + "type": ["null", "string"] + } + } + }, + "delivery_method": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "integer"] + }, + "method_type": { + "type": ["null", "string"] + } + } + }, + "fulfilled_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "fulfillment_holds": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "reason": { + "type": ["null", "string"] + }, + "reason_notes": { + "type": ["null", "string"] + } + } + } + }, + "id": { + "type": ["null", "integer"] + }, + "line_items": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "integer"] + }, + "shop_id": { + "type": ["null", "integer"] + }, + "fullfillment_order_id": { + "type": ["null", "integer"] + }, + "line_item_id": { + "type": ["null", "integer"] + }, + "inventory_item_id": { + "type": ["null", "integer"] + }, + "quantity": { + "type": ["null", "integer"] + }, + "fulfillable_quantity": { + "type": ["null", "integer"] + }, + "variant_id": { + "type": ["null", "integer"] + } + } + } + }, + "order_id": { + "type": ["null", "integer"] + }, + "request_status": { + "type": ["null", "string"] + }, + "shop_id": { + "type": ["null", "integer"] + }, + "status": { + "type": ["null", "string"] + }, + "supported_actions": { + "type": ["null", "array"], + "items": { + "type": ["null", "string"] + } + }, + "merchant_requests": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "message": { + "type": ["null", "string"] + }, + "kind": { + "type": ["null", "string"] + }, + "request_options": { + "type": ["null", "object"] + } + } + } + }, + "assigned_location": { + "type": ["null", "object"], + "properties": { + "address1": { + "type": ["null", "string"] + }, + "address2": { + "type": ["null", "string"] + }, + "city": { + "type": ["null", "string"] + }, + "country_code": { + "type": ["null", "string"] + }, + "location_id": { + "type": ["null", "integer"] + }, + "name": { + "type": ["null", "string"] + }, + "phone": { + "type": ["null", "string"] + }, + "province": { + "type": ["null", "string"] + }, + "zip": { + "type": ["null", "string"] + } + } + } + } +} diff --git a/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/fulfillments.json b/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/fulfillments.json new file mode 100644 index 0000000000000..05cdc8b4be903 --- /dev/null +++ b/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/fulfillments.json @@ -0,0 +1,269 @@ +{ + "type": "object", + "properties": { + "created_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "id": { + "type": ["null", "integer"] + }, + "location_id": { + "type": ["null", "integer"] + }, + "name": { + "type": ["null", "string"] + }, + "notify_customer": { + "type": ["null", "boolean"] + }, + "order_id": { + "type": ["null", "integer"] + }, + "origin_address": { + "type": ["null", "object"], + "properties": { + "address1": { + "type": "string" + }, + "address2": { + "type": "string" + }, + "city": { + "type": "string" + }, + "country_code": { + "type": "string" + }, + "province_code": { + "type": "string" + }, + "zip": { + "type": "string" + } + } + }, + "receipt": { + "type": ["null", "object"], + "properties": { + "testcase": { + "type": ["null", "boolean"] + }, + "authorization": { + "type": ["null", "string"] + } + } + }, + "service": { + "type": ["null", "string"] + }, + "shipment_status": { + "type": ["null", "string"] + }, + "status": { + "type": ["null", "string"] + }, + "tracking_company": { + "type": ["null", "string"] + }, + "tracking_numbers": { + "type": ["null", "array"], + "items": { + "type": ["null", "string"] + } + }, + "tracking_urls": { + "type": ["null", "array"], + "items": { + "type": ["null", "string"] + } + }, + "updated_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "variant_inventory_management": { + "type": ["null", "string"] + }, + "line_items": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "integer"] + }, + "variant_id": { + "type": ["null", "integer"] + }, + "title": { + "type": ["null", "string"] + }, + "quantity": { + "type": ["null", "integer"] + }, + "price": { + "type": ["null", "string"] + }, + "grams": { + "type": ["null", "number"] + }, + "sku": { + "type": ["null", "string"] + }, + "variant_title": { + "type": ["null", "string"] + }, + "vendor": { + "type": ["null", "string"] + }, + "fulfillment_service": { + "type": ["null", "string"] + }, + "product_id": { + "type": ["null", "integer"] + }, + "requires_shipping": { + "type": ["null", "boolean"] + }, + "taxable": { + "type": ["null", "boolean"] + }, + "gift_card": { + "type": ["null", "boolean"] + }, + "name": { + "type": ["null", "string"] + }, + "variant_inventory_management": { + "type": ["null", "string"] + }, + "properties": { + "type": ["null", "array"], + "items": { + "type": ["null", "string"] + } + }, + "product_exists": { + "type": ["null", "boolean"] + }, + "fulfillable_quantity": { + "type": ["null", "integer"] + }, + "total_discount": { + "type": ["null", "string"] + }, + "fulfillment_status": { + "type": ["null", "string"] + }, + "fulfillment_line_item_id": { + "type": ["null", "integer"] + }, + "tax_lines": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "price": { + "type": ["null", "number"] + }, + "rate": { + "type": ["null", "number"] + }, + "title": { + "type": ["null", "string"] + } + } + } + } + } + } + }, + "duties": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "string"] + }, + "harmonized_system_code": { + "type": ["null", "string"] + }, + "country_code_of_origin": { + "type": ["null", "string"] + }, + "shop_money": { + "type": ["null", "object"], + "properties": { + "amount": { + "type": ["null", "string"] + }, + "currency_code": { + "type": ["null", "string"] + } + } + }, + "presentment_money": { + "type": ["null", "object"], + "properties": { + "amount": { + "type": ["null", "string"] + }, + "currency_code": { + "type": ["null", "string"] + } + } + }, + "tax_lines": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "title": { + "type": ["null", "string"] + }, + "price": { + "type": ["null", "string"] + }, + "rate": { + "type": ["null", "number"] + }, + "price_set": { + "type": ["null", "object"], + "properties": { + "shop_money": { + "type": ["null", "object"], + "properties": { + "amount": { + "type": ["null", "string"] + }, + "currency_code": { + "type": ["null", "string"] + } + } + }, + "presentment_money": { + "type": ["null", "object"], + "properties": { + "amount": { + "type": ["null", "string"] + }, + "currency_code": { + "type": ["null", "string"] + } + } + } + } + }, + "channel_liable": { + "type": ["null", "boolean"] + } + } + } + } + } + } + } + } +} diff --git a/airbyte-integrations/connectors/source-shopify/source_shopify/source.py b/airbyte-integrations/connectors/source-shopify/source_shopify/source.py index 17e348790ec6a..e011b74dc3567 100644 --- a/airbyte-integrations/connectors/source-shopify/source_shopify/source.py +++ b/airbyte-integrations/connectors/source-shopify/source_shopify/source.py @@ -365,6 +365,35 @@ def generate_key(record): ) +class FulfillmentOrders(ChildSubstream): + + parent_stream_class: object = Orders + slice_key = "order_id" + + data_field = "fulfillment_orders" + + cursor_field = "id" + + def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str: + order_id = stream_slice[self.slice_key] + return f"orders/{order_id}/{self.data_field}.json" + + def get_updated_state(self, current_stream_state: MutableMapping[str, Any], latest_record: Mapping[str, Any]) -> Mapping[str, Any]: + return {self.cursor_field: max(latest_record.get(self.cursor_field, 0), current_stream_state.get(self.cursor_field, 0))} + + +class Fulfillments(ChildSubstream): + + parent_stream_class: object = Orders + slice_key = "order_id" + + data_field = "fulfillments" + + def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str: + order_id = stream_slice[self.slice_key] + return f"orders/{order_id}/{self.data_field}.json" + + class SourceShopify(AbstractSource): def check_connection(self, logger: AirbyteLogger, config: Mapping[str, Any]) -> Tuple[bool, any]: @@ -407,4 +436,6 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: DiscountCodes(config), Locations(config), InventoryLevels(config), + FulfillmentOrders(config), + Fulfillments(config), ] diff --git a/docs/integrations/sources/shopify.md b/docs/integrations/sources/shopify.md index db1b2d51560ad..4a28983a70800 100644 --- a/docs/integrations/sources/shopify.md +++ b/docs/integrations/sources/shopify.md @@ -37,6 +37,8 @@ This Source is capable of syncing the following core Streams: * [Price Rules](https://help.shopify.com/en/api/reference/discounts/pricerule) * [Locations](https://shopify.dev/api/admin-rest/2021-10/resources/location) * [InventoryLevels](https://shopify.dev/api/admin-rest/2021-10/resources/inventorylevel) +* [Fulfillment Orders](https://shopify.dev/api/admin-rest/2021-07/resources/fulfillmentorder) +* [Fulfillments](https://shopify.dev/api/admin-rest/2021-07/resources/fulfillment) #### NOTE: @@ -97,6 +99,7 @@ This connector support both: `OAuth 2.0` and `API PASSWORD` (for private applica | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.1.22 | 2021-10-18 | [7101](https://github.com/airbytehq/airbyte/pull/7107) | Added FulfillmentOrders, Fulfillments streams | | 0.1.21 | 2021-10-14 | [7382](https://github.com/airbytehq/airbyte/pull/7382) | Fixed `InventoryLevels` primary key | | 0.1.20 | 2021-10-14 | [7063](https://github.com/airbytehq/airbyte/pull/7063) | Added `Location` and `InventoryLevels` as streams | | 0.1.19 | 2021-10-11 | [6951](https://github.com/airbytehq/airbyte/pull/6951) | Added support of `OAuth 2.0` authorisation option | From 058c8f802ffd1f55b102e3a8e1a6271c0aab190f Mon Sep 17 00:00:00 2001 From: Lake Mossman Date: Fri, 5 Nov 2021 17:34:11 -0700 Subject: [PATCH 68/83] Backfill specs into definitions (#7616) --- .../java/io/airbyte/server/ServerApp.java | 123 ++++++++++++++ .../io/airbyte/server/BackfillSpecTest.java | 159 ++++++++++++++++++ 2 files changed, 282 insertions(+) create mode 100644 airbyte-server/src/test/java/io/airbyte/server/BackfillSpecTest.java diff --git a/airbyte-server/src/main/java/io/airbyte/server/ServerApp.java b/airbyte-server/src/main/java/io/airbyte/server/ServerApp.java index 9055c193060be..b05fb8b22451f 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/ServerApp.java +++ b/airbyte-server/src/main/java/io/airbyte/server/ServerApp.java @@ -4,17 +4,24 @@ package io.airbyte.server; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableMap; import io.airbyte.analytics.Deployment; import io.airbyte.analytics.TrackingClient; import io.airbyte.analytics.TrackingClientSingleton; +import io.airbyte.api.model.LogRead; +import io.airbyte.commons.json.Jsons; import io.airbyte.commons.lang.Exceptions; import io.airbyte.commons.resources.MoreResources; import io.airbyte.commons.version.AirbyteVersion; import io.airbyte.config.Configs; import io.airbyte.config.Configs.WorkerEnvironment; import io.airbyte.config.EnvConfigs; +import io.airbyte.config.StandardDestinationDefinition; +import io.airbyte.config.StandardSourceDefinition; import io.airbyte.config.StandardWorkspace; import io.airbyte.config.helpers.LogClientSingleton; +import io.airbyte.config.helpers.LogConfigs; import io.airbyte.config.init.YamlSeedConfigPersistence; import io.airbyte.config.persistence.ConfigPersistence; import io.airbyte.config.persistence.ConfigRepository; @@ -27,17 +34,20 @@ import io.airbyte.db.instance.configs.ConfigsDatabaseMigrator; import io.airbyte.db.instance.jobs.JobsDatabaseInstance; import io.airbyte.db.instance.jobs.JobsDatabaseMigrator; +import io.airbyte.protocol.models.ConnectorSpecification; import io.airbyte.scheduler.client.BucketSpecCacheSchedulerClient; import io.airbyte.scheduler.client.DefaultSchedulerJobClient; import io.airbyte.scheduler.client.DefaultSynchronousSchedulerClient; import io.airbyte.scheduler.client.SchedulerJobClient; import io.airbyte.scheduler.client.SpecCachingSynchronousSchedulerClient; +import io.airbyte.scheduler.client.SynchronousResponse; import io.airbyte.scheduler.client.SynchronousSchedulerClient; import io.airbyte.scheduler.persistence.DefaultJobCreator; import io.airbyte.scheduler.persistence.DefaultJobPersistence; import io.airbyte.scheduler.persistence.JobPersistence; import io.airbyte.scheduler.persistence.job_factory.OAuthConfigSupplier; import io.airbyte.scheduler.persistence.job_tracker.JobTracker; +import io.airbyte.server.converters.JobConverter; import io.airbyte.server.converters.SpecFetcher; import io.airbyte.server.errors.InvalidInputExceptionMapper; import io.airbyte.server.errors.InvalidJsonExceptionMapper; @@ -238,6 +248,15 @@ public static ServerRunnable getServer(final ServerFactory apiFactory, final Con runFlywayMigration(configs, configDatabase, jobDatabase); configPersistence.loadData(seed); + // todo (lmossman) - this will only exist temporarily to ensure all definitions contain specs. It + // will be removed after the faux major version bump + migrateAllDefinitionsToContainSpec( + configRepository, + cachingSchedulerClient, + trackingClient, + configs.getWorkerEnvironment(), + configs.getLogConfigs()); + return apiFactory.create( schedulerJobClient, cachingSchedulerClient, @@ -260,6 +279,110 @@ public static ServerRunnable getServer(final ServerFactory apiFactory, final Con } } + /** + * Check that each spec in the database has a spec. If it doesn't, add it. If it can't be added, + * track the failure in Segment. The goal is to try to end up in a state where all definitions in + * the db contain specs, and to understand what is stopping us from getting there. + * + * @param configRepository - access to the db + * @param schedulerClient - scheduler client so that specs can be fetched as needed + * @param trackingClient + * @param workerEnvironment + * @param logConfigs + */ + @VisibleForTesting + static void migrateAllDefinitionsToContainSpec(final ConfigRepository configRepository, + final SynchronousSchedulerClient schedulerClient, + final TrackingClient trackingClient, + final WorkerEnvironment workerEnvironment, + final LogConfigs logConfigs) + throws JsonValidationException, IOException { + final JobConverter jobConverter = new JobConverter(workerEnvironment, logConfigs); + for (final StandardSourceDefinition sourceDef : configRepository.listStandardSourceDefinitions()) { + try { + if (sourceDef.getSpec() == null) { + LOGGER.info( + "migrateAllDefinitionsToContainSpec - Source Definition {} does not have a spec. Attempting to retrieve spec...", + sourceDef.getName()); + final SynchronousResponse getSpecJob = schedulerClient + .createGetSpecJob(sourceDef.getDockerRepository() + ":" + sourceDef.getDockerImageTag()); + if (getSpecJob.isSuccess()) { + LOGGER.info( + "migrateAllDefinitionsToContainSpec - Spec for Source Definition {} was successfully retrieved. Writing to the db...", + sourceDef.getName()); + final StandardSourceDefinition updatedDef = Jsons.clone(sourceDef).withSpec(getSpecJob.getOutput()); + configRepository.writeStandardSourceDefinition(updatedDef); + LOGGER.info( + "migrateAllDefinitionsToContainSpec - Spec for Source Definition {} was successfully written to the db record.", + sourceDef.getName()); + } else { + final LogRead logRead = jobConverter.getLogRead(getSpecJob.getMetadata().getLogPath()); + LOGGER.info( + "migrateAllDefinitionsToContainSpec - Failed to retrieve spec for Source Definition {}. Logs: {}", + sourceDef.getName(), + logRead.toString()); + throw new RuntimeException(String.format( + "Failed to retrieve spec for Source Definition %s. Logs: %s", + sourceDef.getName(), + logRead.toString())); + } + } + } catch (final Exception e) { + trackSpecBackfillFailure(trackingClient, configRepository, sourceDef.getDockerRepository(), sourceDef.getDockerImageTag(), e); + } + } + + for (final StandardDestinationDefinition destDef : configRepository.listStandardDestinationDefinitions()) { + try { + if (destDef.getSpec() == null) { + LOGGER.info( + "migrateAllDefinitionsToContainSpec - Destination Definition {} does not have a spec. Attempting to retrieve spec...", + destDef.getName()); + final SynchronousResponse getSpecJob = schedulerClient + .createGetSpecJob(destDef.getDockerRepository() + ":" + destDef.getDockerImageTag()); + if (getSpecJob.isSuccess()) { + LOGGER.info( + "migrateAllDefinitionsToContainSpec - Spec for Destination Definition {} was successfully retrieved. Writing to the db...", + destDef.getName()); + final StandardDestinationDefinition updatedDef = Jsons.clone(destDef).withSpec(getSpecJob.getOutput()); + configRepository.writeStandardDestinationDefinition(updatedDef); + LOGGER.info( + "migrateAllDefinitionsToContainSpec - Spec for Destination Definition {} was successfully written to the db record.", + destDef.getName()); + } else { + final LogRead logRead = jobConverter.getLogRead(getSpecJob.getMetadata().getLogPath()); + LOGGER.info( + "migrateAllDefinitionsToContainSpec - Failed to retrieve spec for Destination Definition {}. Logs: {}", + destDef.getName(), + logRead.toString()); + throw new RuntimeException(String.format( + "Failed to retrieve spec for Destination Definition %s. Logs: %s", + destDef.getName(), + logRead.toString())); + } + } + } catch (final Exception e) { + trackSpecBackfillFailure(trackingClient, configRepository, destDef.getDockerRepository(), destDef.getDockerImageTag(), e); + } + } + } + + private static void trackSpecBackfillFailure(final TrackingClient trackingClient, + final ConfigRepository configRepository, + final String dockerRepo, + final String dockerImageTag, + final Exception exception) + throws JsonValidationException, IOException { + // There is guaranteed to be at least one workspace, because the getServer() function enforces that + final UUID workspaceId = configRepository.listStandardWorkspaces(true).get(0).getWorkspaceId(); + + final ImmutableMap metadata = ImmutableMap.of( + "docker_image_name", dockerRepo, + "docker_image_tag", dockerImageTag, + "exception", exception); + trackingClient.track(workspaceId, "failed_spec_backfill", metadata); + } + @Deprecated @SuppressWarnings({"DeprecatedIsStillUsed"}) private static Optional runFileMigration(final AirbyteVersion airbyteVersion, diff --git a/airbyte-server/src/test/java/io/airbyte/server/BackfillSpecTest.java b/airbyte-server/src/test/java/io/airbyte/server/BackfillSpecTest.java new file mode 100644 index 0000000000000..541eb3c4a6d31 --- /dev/null +++ b/airbyte-server/src/test/java/io/airbyte/server/BackfillSpecTest.java @@ -0,0 +1,159 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.server; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyMap; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import io.airbyte.analytics.TrackingClient; +import io.airbyte.commons.json.Jsons; +import io.airbyte.config.Configs.WorkerEnvironment; +import io.airbyte.config.JobConfig.ConfigType; +import io.airbyte.config.StandardDestinationDefinition; +import io.airbyte.config.StandardSourceDefinition; +import io.airbyte.config.StandardWorkspace; +import io.airbyte.config.helpers.LogConfigs; +import io.airbyte.config.persistence.ConfigRepository; +import io.airbyte.protocol.models.ConnectorSpecification; +import io.airbyte.scheduler.client.SynchronousJobMetadata; +import io.airbyte.scheduler.client.SynchronousResponse; +import io.airbyte.scheduler.client.SynchronousSchedulerClient; +import io.airbyte.validation.json.JsonValidationException; +import java.io.IOException; +import java.net.URI; +import java.nio.file.Path; +import java.time.Instant; +import java.util.List; +import java.util.UUID; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +class BackfillSpecTest { + + private static final String SOURCE_DOCKER_REPO = "docker-repo/source"; + private static final String DEST_DOCKER_REPO = "docker-repo/destination"; + private static final String DOCKER_IMAGE_TAG = "tag"; + private static final String FAILED_SPEC_BACKFILL_ACTION = "failed_spec_backfill"; + private static final StandardWorkspace WORKSPACE = new StandardWorkspace().withWorkspaceId(UUID.randomUUID()); + + private ConfigRepository configRepository; + private TrackingClient trackingClient; + private SynchronousSchedulerClient schedulerClient; + + @BeforeEach + void setup() throws IOException, JsonValidationException { + configRepository = mock(ConfigRepository.class); + when(configRepository.listStandardWorkspaces(true)).thenReturn(List.of(WORKSPACE)); + + trackingClient = mock(TrackingClient.class); + schedulerClient = mock(SynchronousSchedulerClient.class); + } + + @Test + public void testBackfillSpecSuccessful() throws JsonValidationException, IOException { + final StandardSourceDefinition sourceDef = new StandardSourceDefinition().withDockerRepository(SOURCE_DOCKER_REPO) + .withDockerImageTag(DOCKER_IMAGE_TAG); + final StandardDestinationDefinition destDef = new StandardDestinationDefinition().withDockerRepository(DEST_DOCKER_REPO) + .withDockerImageTag(DOCKER_IMAGE_TAG); + + when(configRepository.listStandardSourceDefinitions()).thenReturn(List.of(sourceDef)); + when(configRepository.listStandardDestinationDefinitions()).thenReturn(List.of(destDef)); + + final ConnectorSpecification sourceSpec = new ConnectorSpecification().withDocumentationUrl(URI.create("http://source.org")); + final ConnectorSpecification destSpec = new ConnectorSpecification().withDocumentationUrl(URI.create("http://dest.org")); + + final SynchronousResponse successfulSourceResponse = new SynchronousResponse<>( + sourceSpec, + mockJobMetadata(true)); + final SynchronousResponse successfulDestResponse = new SynchronousResponse<>( + destSpec, + mockJobMetadata(true)); + + final SynchronousSchedulerClient schedulerClient = mock(SynchronousSchedulerClient.class); + when(schedulerClient.createGetSpecJob(SOURCE_DOCKER_REPO + ":" + DOCKER_IMAGE_TAG)).thenReturn(successfulSourceResponse); + when(schedulerClient.createGetSpecJob(DEST_DOCKER_REPO + ":" + DOCKER_IMAGE_TAG)).thenReturn(successfulDestResponse); + + ServerApp.migrateAllDefinitionsToContainSpec(configRepository, schedulerClient, trackingClient, WorkerEnvironment.DOCKER, mock(LogConfigs.class)); + + final StandardSourceDefinition expectedSourceDef = Jsons.clone(sourceDef).withSpec(sourceSpec); + final StandardDestinationDefinition expectedDestDef = Jsons.clone(destDef).withSpec(destSpec); + verify(configRepository, times(1)).writeStandardSourceDefinition(expectedSourceDef); + verify(configRepository, times(1)).writeStandardDestinationDefinition(expectedDestDef); + } + + @Test + public void testBackfillSpecFailure() throws JsonValidationException, IOException { + final StandardSourceDefinition sourceDef = new StandardSourceDefinition().withDockerRepository(SOURCE_DOCKER_REPO) + .withDockerImageTag(DOCKER_IMAGE_TAG); + final StandardDestinationDefinition destDef = new StandardDestinationDefinition().withDockerRepository(DEST_DOCKER_REPO) + .withDockerImageTag(DOCKER_IMAGE_TAG); + + when(configRepository.listStandardSourceDefinitions()).thenReturn(List.of(sourceDef)); + when(configRepository.listStandardDestinationDefinitions()).thenReturn(List.of(destDef)); + + final ConnectorSpecification sourceSpec = new ConnectorSpecification().withDocumentationUrl(URI.create("http://source.org")); + final ConnectorSpecification destSpec = new ConnectorSpecification().withDocumentationUrl(URI.create("http://dest.org")); + + final SynchronousResponse failureSourceResponse = new SynchronousResponse<>( + sourceSpec, + mockJobMetadata(false)); + final SynchronousResponse failureDestResponse = new SynchronousResponse<>( + destSpec, + mockJobMetadata(false)); + + when(schedulerClient.createGetSpecJob(SOURCE_DOCKER_REPO + ":" + DOCKER_IMAGE_TAG)).thenReturn(failureSourceResponse); + when(schedulerClient.createGetSpecJob(DEST_DOCKER_REPO + ":" + DOCKER_IMAGE_TAG)).thenReturn(failureDestResponse); + + ServerApp.migrateAllDefinitionsToContainSpec(configRepository, schedulerClient, trackingClient, WorkerEnvironment.DOCKER, mock(LogConfigs.class)); + + verify(configRepository, never()).writeStandardSourceDefinition(any()); + verify(configRepository, never()).writeStandardDestinationDefinition(any()); + + verify(trackingClient, times(2)).track(eq(WORKSPACE.getWorkspaceId()), eq(FAILED_SPEC_BACKFILL_ACTION), anyMap()); + } + + @Test + public void testSpecAlreadyExists() throws JsonValidationException, IOException { + final ConnectorSpecification sourceSpec = new ConnectorSpecification().withDocumentationUrl(URI.create("http://source.org")); + final ConnectorSpecification destSpec = new ConnectorSpecification().withDocumentationUrl(URI.create("http://dest.org")); + final StandardSourceDefinition sourceDef = new StandardSourceDefinition().withDockerRepository(SOURCE_DOCKER_REPO) + .withDockerImageTag(DOCKER_IMAGE_TAG).withSpec(sourceSpec); + final StandardDestinationDefinition destDef = new StandardDestinationDefinition().withDockerRepository(DEST_DOCKER_REPO) + .withDockerImageTag(DOCKER_IMAGE_TAG).withSpec(destSpec); + + when(configRepository.listStandardSourceDefinitions()).thenReturn(List.of(sourceDef)); + when(configRepository.listStandardDestinationDefinitions()).thenReturn(List.of(destDef)); + + ServerApp.migrateAllDefinitionsToContainSpec( + configRepository, + mock(SynchronousSchedulerClient.class), + trackingClient, + WorkerEnvironment.DOCKER, + mock(LogConfigs.class)); + + verify(schedulerClient, never()).createGetSpecJob(any()); + verify(configRepository, never()).writeStandardSourceDefinition(any()); + verify(configRepository, never()).writeStandardDestinationDefinition(any()); + } + + private SynchronousJobMetadata mockJobMetadata(final boolean succeeded) { + final long now = Instant.now().toEpochMilli(); + return new SynchronousJobMetadata( + UUID.randomUUID(), + ConfigType.GET_SPEC, + UUID.randomUUID(), + now, + now, + succeeded, + Path.of("path", "to", "logs")); + } + +} From b5af3f1d561e486c17083ff207973d67113b7a40 Mon Sep 17 00:00:00 2001 From: Charles Date: Fri, 5 Nov 2021 18:17:34 -0700 Subject: [PATCH 69/83] have docker-compose.build.yaml to use new docker context (#7696) --- docker-compose.build-m1.yaml | 14 +++++++------- docker-compose.build.yaml | 14 +++++++------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/docker-compose.build-m1.yaml b/docker-compose.build-m1.yaml index 71e8dc4f6d5e1..142a85c9efe57 100644 --- a/docker-compose.build-m1.yaml +++ b/docker-compose.build-m1.yaml @@ -14,7 +14,7 @@ services: image: airbyte/init:${VERSION} build: dockerfile: Dockerfile - context: airbyte-config/init + context: airbyte-config/init/build/docker labels: io.airbyte.git-revision: ${GIT_REVISION} db: @@ -22,7 +22,7 @@ services: image: airbyte/db:${VERSION} build: dockerfile: Dockerfile - context: airbyte-db/lib + context: airbyte-db/lib/build/docker labels: io.airbyte.git-revision: ${GIT_REVISION} scheduler: @@ -32,7 +32,7 @@ services: dockerfile: Dockerfile args: JDK_VERSION: ${JDK_VERSION} - context: airbyte-scheduler/app + context: airbyte-scheduler/app/build/docker labels: io.airbyte.git-revision: ${GIT_REVISION} worker: @@ -43,7 +43,7 @@ services: args: ARCH: ${DOCKER_BUILD_ARCH} JDK_VERSION: ${JDK_VERSION} - context: airbyte-workers + context: airbyte-workers/build/docker labels: io.airbyte.git-revision: ${GIT_REVISION} server: @@ -53,7 +53,7 @@ services: dockerfile: Dockerfile args: JDK_VERSION: ${JDK_VERSION} - context: airbyte-server + context: airbyte-server/build/docker labels: io.airbyte.git-revision: ${GIT_REVISION} webapp: @@ -61,7 +61,7 @@ services: image: airbyte/webapp:${VERSION} build: dockerfile: Dockerfile - context: airbyte-webapp + context: airbyte-webapp/build/docker labels: io.airbyte.git-revision: ${GIT_REVISION} migration: @@ -71,6 +71,6 @@ services: dockerfile: Dockerfile args: JDK_VERSION: ${JDK_VERSION} - context: airbyte-migration + context: airbyte-migration/build/docker labels: io.airbyte.git-revision: ${GIT_REVISION} diff --git a/docker-compose.build.yaml b/docker-compose.build.yaml index bce29f9bbb074..de1a6cfcc613b 100644 --- a/docker-compose.build.yaml +++ b/docker-compose.build.yaml @@ -5,48 +5,48 @@ services: image: airbyte/init:${VERSION} build: dockerfile: Dockerfile - context: airbyte-config/init + context: airbyte-config/init/build/docker labels: io.airbyte.git-revision: ${GIT_REVISION} db: image: airbyte/db:${VERSION} build: dockerfile: Dockerfile - context: airbyte-db/lib + context: airbyte-db/lib/build/docker labels: io.airbyte.git-revision: ${GIT_REVISION} scheduler: image: airbyte/scheduler:${VERSION} build: dockerfile: Dockerfile - context: airbyte-scheduler/app + context: airbyte-scheduler/app/build/docker labels: io.airbyte.git-revision: ${GIT_REVISION} worker: image: airbyte/worker:${VERSION} build: dockerfile: Dockerfile - context: airbyte-workers + context: airbyte-workers/build/docker labels: io.airbyte.git-revision: ${GIT_REVISION} server: image: airbyte/server:${VERSION} build: dockerfile: Dockerfile - context: airbyte-server + context: airbyte-server/build/docker labels: io.airbyte.git-revision: ${GIT_REVISION} webapp: image: airbyte/webapp:${VERSION} build: dockerfile: Dockerfile - context: airbyte-webapp + context: airbyte-webapp/build/docker labels: io.airbyte.git-revision: ${GIT_REVISION} migration: image: airbyte/migration:${VERSION} build: dockerfile: Dockerfile - context: airbyte-migration + context: airbyte-migration/build/docker labels: io.airbyte.git-revision: ${GIT_REVISION} From d84f33a626da85d8cf3011fe3f1524249cfe5087 Mon Sep 17 00:00:00 2001 From: Charles Date: Fri, 5 Nov 2021 18:48:42 -0700 Subject: [PATCH 70/83] Revert "have docker-compose.build.yaml to use new docker context (#7696)" (#7697) This reverts commit b5af3f1d561e486c17083ff207973d67113b7a40. --- docker-compose.build-m1.yaml | 14 +++++++------- docker-compose.build.yaml | 14 +++++++------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/docker-compose.build-m1.yaml b/docker-compose.build-m1.yaml index 142a85c9efe57..71e8dc4f6d5e1 100644 --- a/docker-compose.build-m1.yaml +++ b/docker-compose.build-m1.yaml @@ -14,7 +14,7 @@ services: image: airbyte/init:${VERSION} build: dockerfile: Dockerfile - context: airbyte-config/init/build/docker + context: airbyte-config/init labels: io.airbyte.git-revision: ${GIT_REVISION} db: @@ -22,7 +22,7 @@ services: image: airbyte/db:${VERSION} build: dockerfile: Dockerfile - context: airbyte-db/lib/build/docker + context: airbyte-db/lib labels: io.airbyte.git-revision: ${GIT_REVISION} scheduler: @@ -32,7 +32,7 @@ services: dockerfile: Dockerfile args: JDK_VERSION: ${JDK_VERSION} - context: airbyte-scheduler/app/build/docker + context: airbyte-scheduler/app labels: io.airbyte.git-revision: ${GIT_REVISION} worker: @@ -43,7 +43,7 @@ services: args: ARCH: ${DOCKER_BUILD_ARCH} JDK_VERSION: ${JDK_VERSION} - context: airbyte-workers/build/docker + context: airbyte-workers labels: io.airbyte.git-revision: ${GIT_REVISION} server: @@ -53,7 +53,7 @@ services: dockerfile: Dockerfile args: JDK_VERSION: ${JDK_VERSION} - context: airbyte-server/build/docker + context: airbyte-server labels: io.airbyte.git-revision: ${GIT_REVISION} webapp: @@ -61,7 +61,7 @@ services: image: airbyte/webapp:${VERSION} build: dockerfile: Dockerfile - context: airbyte-webapp/build/docker + context: airbyte-webapp labels: io.airbyte.git-revision: ${GIT_REVISION} migration: @@ -71,6 +71,6 @@ services: dockerfile: Dockerfile args: JDK_VERSION: ${JDK_VERSION} - context: airbyte-migration/build/docker + context: airbyte-migration labels: io.airbyte.git-revision: ${GIT_REVISION} diff --git a/docker-compose.build.yaml b/docker-compose.build.yaml index de1a6cfcc613b..bce29f9bbb074 100644 --- a/docker-compose.build.yaml +++ b/docker-compose.build.yaml @@ -5,48 +5,48 @@ services: image: airbyte/init:${VERSION} build: dockerfile: Dockerfile - context: airbyte-config/init/build/docker + context: airbyte-config/init labels: io.airbyte.git-revision: ${GIT_REVISION} db: image: airbyte/db:${VERSION} build: dockerfile: Dockerfile - context: airbyte-db/lib/build/docker + context: airbyte-db/lib labels: io.airbyte.git-revision: ${GIT_REVISION} scheduler: image: airbyte/scheduler:${VERSION} build: dockerfile: Dockerfile - context: airbyte-scheduler/app/build/docker + context: airbyte-scheduler/app labels: io.airbyte.git-revision: ${GIT_REVISION} worker: image: airbyte/worker:${VERSION} build: dockerfile: Dockerfile - context: airbyte-workers/build/docker + context: airbyte-workers labels: io.airbyte.git-revision: ${GIT_REVISION} server: image: airbyte/server:${VERSION} build: dockerfile: Dockerfile - context: airbyte-server/build/docker + context: airbyte-server labels: io.airbyte.git-revision: ${GIT_REVISION} webapp: image: airbyte/webapp:${VERSION} build: dockerfile: Dockerfile - context: airbyte-webapp/build/docker + context: airbyte-webapp labels: io.airbyte.git-revision: ${GIT_REVISION} migration: image: airbyte/migration:${VERSION} build: dockerfile: Dockerfile - context: airbyte-migration/build/docker + context: airbyte-migration labels: io.airbyte.git-revision: ${GIT_REVISION} From 797d11a8d7ad1f14ed0f2436f6584ca9664dd2bd Mon Sep 17 00:00:00 2001 From: Benoit Moriceau Date: Fri, 5 Nov 2021 18:55:15 -0700 Subject: [PATCH 71/83] Revert "Bmoric/remove docker compose for build (#7500)" (#7698) This reverts commit 4e17fa21a5a4571cf25407e2423c8b59af615cca. --- .bumpversion.cfg | 2 - .github/workflows/gradle.yml | 4 +- airbyte-cli/build.gradle | 6 +- airbyte-config/init/.dockerignore | 3 + airbyte-config/init/Dockerfile | 2 +- airbyte-config/init/build.gradle | 11 ---- airbyte-db/lib/.dockerignore | 2 + airbyte-db/lib/Dockerfile | 2 +- airbyte-db/lib/build.gradle | 13 ---- airbyte-migration/.dockerignore | 3 + airbyte-migration/Dockerfile | 2 +- airbyte-migration/build.gradle | 13 ---- .../java/io/airbyte/oauth/BaseOAuthFlow.java | 1 + airbyte-scheduler/app/.dockerignore | 3 + airbyte-scheduler/app/Dockerfile | 2 +- airbyte-scheduler/app/build.gradle | 14 ----- airbyte-server/.dockerignore | 3 + airbyte-server/Dockerfile | 2 +- airbyte-server/build.gradle | 14 ----- airbyte-webapp/.dockerignore | 4 ++ airbyte-webapp/Dockerfile | 8 ++- airbyte-webapp/build.gradle | 31 +--------- airbyte-workers/Dockerfile | 2 +- airbyte-workers/build.gradle | 14 ----- build.gradle | 59 ++++++++++++------- docs/SUMMARY.md | 1 - .../developing-on-docker.md | 43 -------------- docs/project-overview/changelog/README.md | 3 +- .../acceptance_test_kube_gke.sh | 2 +- tools/bin/release_version.sh | 2 +- 30 files changed, 77 insertions(+), 194 deletions(-) create mode 100644 airbyte-config/init/.dockerignore create mode 100644 airbyte-db/lib/.dockerignore create mode 100644 airbyte-migration/.dockerignore create mode 100644 airbyte-scheduler/app/.dockerignore create mode 100644 airbyte-server/.dockerignore create mode 100644 airbyte-webapp/.dockerignore delete mode 100644 docs/contributing-to-airbyte/developing-on-docker.md diff --git a/.bumpversion.cfg b/.bumpversion.cfg index e9bda52d54a98..5b463ca898bed 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -10,8 +10,6 @@ serialize = [bumpversion:file:.env] -[bumpversion:file:airbyte-migration/Dockerfile] - [bumpversion:file:airbyte-server/Dockerfile] [bumpversion:file:airbyte-workers/Dockerfile] diff --git a/.github/workflows/gradle.yml b/.github/workflows/gradle.yml index 174a134cca29b..87ff147195de7 100644 --- a/.github/workflows/gradle.yml +++ b/.github/workflows/gradle.yml @@ -339,7 +339,7 @@ jobs: EOF - name: Build Platform Docker Images - run: SUB_BUILD=PLATFORM ./gradlew --no-daemon assemble --scan + run: SUB_BUILD=PLATFORM ./gradlew --no-daemon composebuild --scan - name: Run End-to-End Frontend Tests run: ./tools/bin/e2e_test.sh @@ -457,7 +457,7 @@ jobs: HOME: /home/runner - name: Build Platform Docker Images - run: SUB_BUILD=PLATFORM ./gradlew assemble --scan + run: SUB_BUILD=PLATFORM ./gradlew composeBuild --scan - name: Run Kubernetes End-to-End Acceptance Tests env: diff --git a/airbyte-cli/build.gradle b/airbyte-cli/build.gradle index 4cccd9d4f4018..3388338f60a7a 100644 --- a/airbyte-cli/build.gradle +++ b/airbyte-cli/build.gradle @@ -1,3 +1,3 @@ -Task dockerBuildTask = getDockerBuildTask("cli", "$project.projectDir") -dockerBuildTask.dependsOn(copyDocker) -assemble.dependsOn(dockerBuildTask) +plugins { + id "airbyte-docker" +} diff --git a/airbyte-config/init/.dockerignore b/airbyte-config/init/.dockerignore new file mode 100644 index 0000000000000..5ad9d43099b67 --- /dev/null +++ b/airbyte-config/init/.dockerignore @@ -0,0 +1,3 @@ +* +!src +!scripts diff --git a/airbyte-config/init/Dockerfile b/airbyte-config/init/Dockerfile index c62cb080d822f..89bd5491e53a6 100644 --- a/airbyte-config/init/Dockerfile +++ b/airbyte-config/init/Dockerfile @@ -5,4 +5,4 @@ WORKDIR /app # the sole purpose of this image is to seed the data volume with the default data # that the app should have when it is first installed. -COPY bin/scripts scripts +COPY scripts scripts diff --git a/airbyte-config/init/build.gradle b/airbyte-config/init/build.gradle index 05bfa389043cd..c7117fd16ea78 100644 --- a/airbyte-config/init/build.gradle +++ b/airbyte-config/init/build.gradle @@ -11,14 +11,3 @@ dependencies { implementation project(':airbyte-commons-docker') implementation project(':airbyte-json-validation') } - -task copyScripts(type: Copy) { - dependsOn copyDocker - - from('scripts') - into 'build/docker/bin/scripts' -} - -Task dockerBuildTask = getDockerBuildTask("init", "$project.projectDir") -dockerBuildTask.dependsOn(copyScripts) -assemble.dependsOn(dockerBuildTask) diff --git a/airbyte-db/lib/.dockerignore b/airbyte-db/lib/.dockerignore new file mode 100644 index 0000000000000..7a1eba35d5be1 --- /dev/null +++ b/airbyte-db/lib/.dockerignore @@ -0,0 +1,2 @@ +* +!src diff --git a/airbyte-db/lib/Dockerfile b/airbyte-db/lib/Dockerfile index a9f42ce521ba1..a2feafc85de83 100644 --- a/airbyte-db/lib/Dockerfile +++ b/airbyte-db/lib/Dockerfile @@ -1,3 +1,3 @@ FROM postgres:13-alpine -COPY bin/init.sql /docker-entrypoint-initdb.d/000_init.sql +COPY src/main/resources/init.sql /docker-entrypoint-initdb.d/000_init.sql diff --git a/airbyte-db/lib/build.gradle b/airbyte-db/lib/build.gradle index 5be6ca9a1e8e9..1d3d5dd2d3312 100644 --- a/airbyte-db/lib/build.gradle +++ b/airbyte-db/lib/build.gradle @@ -69,16 +69,3 @@ task(dumpJobsSchema, dependsOn: 'classes', type: JavaExec) { classpath = sourceSets.main.runtimeClasspath args 'jobs', 'dump_schema' } - -task copyInitSql(type: Copy) { - dependsOn copyDocker - - from('src/main/resources') { - include 'init.sql' - } - into 'build/docker/bin' -} - -Task dockerBuildTask = getDockerBuildTask("db", "$project.projectDir") -dockerBuildTask.dependsOn(copyInitSql) -assemble.dependsOn(dockerBuildTask) diff --git a/airbyte-migration/.dockerignore b/airbyte-migration/.dockerignore new file mode 100644 index 0000000000000..65c7d0ad3e73c --- /dev/null +++ b/airbyte-migration/.dockerignore @@ -0,0 +1,3 @@ +* +!Dockerfile +!build diff --git a/airbyte-migration/Dockerfile b/airbyte-migration/Dockerfile index 1fe50c4560c60..8a657e07ee5bb 100644 --- a/airbyte-migration/Dockerfile +++ b/airbyte-migration/Dockerfile @@ -6,7 +6,7 @@ ENV APPLICATION airbyte-migration WORKDIR /app # Move and run scheduler -COPY bin/${APPLICATION}-0.30.31-alpha.tar ${APPLICATION}.tar +COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar RUN tar xf ${APPLICATION}.tar --strip-components=1 diff --git a/airbyte-migration/build.gradle b/airbyte-migration/build.gradle index 1a9504dec522b..ac752cf739c7d 100644 --- a/airbyte-migration/build.gradle +++ b/airbyte-migration/build.gradle @@ -15,16 +15,3 @@ application { mainClass = 'io.airbyte.migrate.MigrationRunner' } -task copyGeneratedTar(type: Copy) { - dependsOn distTar - dependsOn copyDocker - - from('build/distributions') { - include 'airbyte-migration-*.tar' - } - into 'build/docker/bin' -} - -Task dockerBuildTask = getDockerBuildTask("migration", "$project.projectDir") -dockerBuildTask.dependsOn(copyGeneratedTar) -assemble.dependsOn(dockerBuildTask) diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/BaseOAuthFlow.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/BaseOAuthFlow.java index 7e03902ce0788..917711f944a86 100644 --- a/airbyte-oauth/src/main/java/io/airbyte/oauth/BaseOAuthFlow.java +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/BaseOAuthFlow.java @@ -25,6 +25,7 @@ import java.util.function.Function; import java.util.function.Supplier; import org.apache.commons.lang3.RandomStringUtils; +import org.apache.http.client.utils.URIBuilder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/airbyte-scheduler/app/.dockerignore b/airbyte-scheduler/app/.dockerignore new file mode 100644 index 0000000000000..65c7d0ad3e73c --- /dev/null +++ b/airbyte-scheduler/app/.dockerignore @@ -0,0 +1,3 @@ +* +!Dockerfile +!build diff --git a/airbyte-scheduler/app/Dockerfile b/airbyte-scheduler/app/Dockerfile index c603b3d9e8034..f4cb9b7d537ad 100644 --- a/airbyte-scheduler/app/Dockerfile +++ b/airbyte-scheduler/app/Dockerfile @@ -5,7 +5,7 @@ ENV APPLICATION airbyte-scheduler WORKDIR /app -ADD bin/${APPLICATION}-0.30.31-alpha.tar /app +ADD build/distributions/${APPLICATION}-0.30.31-alpha.tar /app # wait for upstream dependencies to become available before starting server ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.31-alpha/bin/${APPLICATION}"] diff --git a/airbyte-scheduler/app/build.gradle b/airbyte-scheduler/app/build.gradle index 371137316379c..d40206efb3ae9 100644 --- a/airbyte-scheduler/app/build.gradle +++ b/airbyte-scheduler/app/build.gradle @@ -57,17 +57,3 @@ run { environment "TEMPORAL_HOST", "localhost:7233" } - -task copyGeneratedTar(type: Copy) { - dependsOn copyDocker - dependsOn distTar - - from('build/distributions') { - include 'airbyte-scheduler-*.tar' - } - into 'build/docker/bin' -} - -Task dockerBuildTask = getDockerBuildTask("scheduler", "$project.projectDir") -dockerBuildTask.dependsOn(copyGeneratedTar) -assemble.dependsOn(dockerBuildTask) diff --git a/airbyte-server/.dockerignore b/airbyte-server/.dockerignore new file mode 100644 index 0000000000000..65c7d0ad3e73c --- /dev/null +++ b/airbyte-server/.dockerignore @@ -0,0 +1,3 @@ +* +!Dockerfile +!build diff --git a/airbyte-server/Dockerfile b/airbyte-server/Dockerfile index 56ccd6192bcde..ec7aefe8c4c11 100644 --- a/airbyte-server/Dockerfile +++ b/airbyte-server/Dockerfile @@ -7,7 +7,7 @@ ENV APPLICATION airbyte-server WORKDIR /app -ADD bin/${APPLICATION}-0.30.31-alpha.tar /app +ADD build/distributions/${APPLICATION}-0.30.31-alpha.tar /app # wait for upstream dependencies to become available before starting server ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.31-alpha/bin/${APPLICATION}"] diff --git a/airbyte-server/build.gradle b/airbyte-server/build.gradle index a5ce58eb79463..a38db0edc5dbe 100644 --- a/airbyte-server/build.gradle +++ b/airbyte-server/build.gradle @@ -127,17 +127,3 @@ run { environment "AIRBYTE_ROLE", System.getenv('AIRBYTE_ROLE') environment "TEMPORAL_HOST", "localhost:7233" } - -task copyGeneratedTar(type: Copy) { - dependsOn copyDocker - dependsOn distTar - - from('build/distributions') { - include 'airbyte-server-*.tar' - } - into 'build/docker/bin' -} - -Task dockerBuildTask = getDockerBuildTask("server", "$project.projectDir") -dockerBuildTask.dependsOn(copyGeneratedTar) -assemble.dependsOn(dockerBuildTask) diff --git a/airbyte-webapp/.dockerignore b/airbyte-webapp/.dockerignore new file mode 100644 index 0000000000000..b284b9daeb6c5 --- /dev/null +++ b/airbyte-webapp/.dockerignore @@ -0,0 +1,4 @@ +* +!Dockerfile +!build +!nginx diff --git a/airbyte-webapp/Dockerfile b/airbyte-webapp/Dockerfile index 1eead5631b6fb..e1054ff5154fa 100644 --- a/airbyte-webapp/Dockerfile +++ b/airbyte-webapp/Dockerfile @@ -2,6 +2,8 @@ FROM nginx:1.19-alpine as webapp EXPOSE 80 -COPY bin/docs docs/ -COPY bin/build /usr/share/nginx/html -COPY bin/nginx/default.conf.template /etc/nginx/templates/default.conf.template +COPY build/docs docs/ +# docs get copied twice because npm gradle plugin ignores output dir. +COPY build /usr/share/nginx/html +RUN rm -rf /usr/share/nginx/html/docs +COPY nginx/default.conf.template /etc/nginx/templates/default.conf.template diff --git a/airbyte-webapp/build.gradle b/airbyte-webapp/build.gradle index 886827b844947..1ea5a0ed0dbc8 100644 --- a/airbyte-webapp/build.gradle +++ b/airbyte-webapp/build.gradle @@ -32,38 +32,11 @@ task test(type: NpmTask) { assemble.dependsOn npm_run_build build.finalizedBy test -task copyBuild(type: Copy) { - dependsOn copyDocker - - from "${project.projectDir}/build" - into "build/docker/bin/build" - exclude ".docker" - exclude "docker" -} - task copyDocs(type: Copy) { - dependsOn copyDocker - - from "${project.rootProject.projectDir}/docs/integrations" - into "build/docker/bin/docs/integrations" + from "${System.getProperty("user.dir")}/docs/integrations" + into "${buildDir}/docs/integrations" duplicatesStrategy DuplicatesStrategy.INCLUDE } -task copyNginx(type: Copy) { - dependsOn copyDocker - - from "${project.projectDir}/nginx" - into "build/docker/bin/nginx" -} - -copyBuild.dependsOn npm_run_build -copyNginx.dependsOn npm_run_build copyDocs.dependsOn npm_run_build assemble.dependsOn copyDocs -copyDocker.dependsOn(npm_run_build) - -Task dockerBuildTask = getDockerBuildTask("webapp", "$project.projectDir") -dockerBuildTask.dependsOn(copyBuild) -dockerBuildTask.dependsOn(copyNginx) -dockerBuildTask.dependsOn(copyDocs) -assemble.dependsOn(dockerBuildTask) diff --git a/airbyte-workers/Dockerfile b/airbyte-workers/Dockerfile index 3e06d45c6bd92..09cbbe5c89907 100644 --- a/airbyte-workers/Dockerfile +++ b/airbyte-workers/Dockerfile @@ -23,7 +23,7 @@ ENV APPLICATION airbyte-workers WORKDIR /app # Move worker app -ADD bin/${APPLICATION}-0.30.31-alpha.tar /app +ADD build/distributions/${APPLICATION}-0.30.31-alpha.tar /app # wait for upstream dependencies to become available before starting server ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.31-alpha/bin/${APPLICATION}"] diff --git a/airbyte-workers/build.gradle b/airbyte-workers/build.gradle index f461116ce2d70..b1a2f9a38afd5 100644 --- a/airbyte-workers/build.gradle +++ b/airbyte-workers/build.gradle @@ -62,17 +62,3 @@ application { mainClass = mainClassName applicationDefaultJvmArgs = ['-XX:MaxRAMPercentage=75.0'] } - -task copyGeneratedTar(type: Copy) { - dependsOn copyDocker - dependsOn distTar - - from('build/distributions') { - include 'airbyte-workers-*.tar' - } - into 'build/docker/bin' -} - -Task dockerBuildTask = getDockerBuildTask("worker", "$project.projectDir") -dockerBuildTask.dependsOn(copyGeneratedTar) -assemble.dependsOn(dockerBuildTask) diff --git a/build.gradle b/build.gradle index 03433bf46f525..e45384dd0a55a 100644 --- a/build.gradle +++ b/build.gradle @@ -1,16 +1,3 @@ -import com.bmuschko.gradle.docker.tasks.image.DockerBuildImage - -buildscript { - repositories { - maven { - url "https://plugins.gradle.org/m2/" - } - } - dependencies { - classpath 'com.bmuschko:gradle-docker-plugin:7.1.0' - } -} - plugins { id 'base' id 'pmd' @@ -146,19 +133,26 @@ def Task getDockerBuildTask(String artifactName, String projectDir) { } allprojects { - apply plugin: 'com.bmuschko.docker-remote-api' - - task copyDocker(type: Copy) { - delete "build/docker" + apply plugin: 'base' - from "${project.projectDir}/Dockerfile" - into "build/docker/" + afterEvaluate { project -> + def composeDeps = [ + ":airbyte-config:init", + ":airbyte-db:lib", + ":airbyte-migration", + ":airbyte-scheduler:app", + ":airbyte-workers", + ":airbyte-server", + ":airbyte-webapp", + ].toSet().asImmutable() + + if (project.getPath() in composeDeps) { + composeBuild.dependsOn(project.getPath() + ':assemble') + } } } allprojects { - apply plugin: 'base' - // by default gradle uses directory as the project name. That works very well in a single project environment but // projects clobber each other in an environments with subprojects when projects are in directories named identically. def sub = rootDir.relativePath(projectDir.parentFile).replace('/', '.') @@ -260,7 +254,6 @@ subprojects { testImplementation 'org.junit.jupiter:junit-jupiter-params:5.7.2' testImplementation 'org.mockito:mockito-junit-jupiter:3.12.4' testImplementation 'org.assertj:assertj-core:3.21.0' - } tasks.withType(Tar) { @@ -272,6 +265,28 @@ subprojects { } } +task composeBuild { + def buildTag = System.getenv('VERSION') ?: 'dev' + def buildPlatform = System.getenv('DOCKER_BUILD_PLATFORM') ?: 'linux/amd64' + def buildArch = System.getenv('DOCKER_BUILD_ARCH') ?: 'amd64' + def jdkVersion = System.getenv('JDK_VERSION') ?: '14.0.2' + def dockerComposeFile = buildArch == 'arm64' ? 'docker-compose.build-m1.yaml' : 'docker-compose.build.yaml' + doFirst { + exec { + workingDir rootDir + commandLine 'docker-compose', '-f', dockerComposeFile, 'build', '--parallel', '--quiet' + environment 'VERSION', buildTag + environment 'DOCKER_BUILD_PLATFORM', buildPlatform + environment 'DOCKER_BUILD_ARCH', buildArch + environment 'JDK_VERSION', jdkVersion + } + } +} + +if (!System.getenv().containsKey("SUB_BUILD") || System.getenv().get("SUB_BUILD") == "PLATFORM") { + build.dependsOn(composeBuild) +} + task('generate') { dependsOn subprojects.collect { it.getTasksByName('generateProtocolClassFiles', true) } dependsOn subprojects.collect { it.getTasksByName('generateJsonSchema2Pojo', true) } diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 9ad3de71305a3..f3f371e432011 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -199,7 +199,6 @@ * [Contributing to Airbyte](contributing-to-airbyte/README.md) * [Code of Conduct](contributing-to-airbyte/code-of-conduct.md) * [Developing Locally](contributing-to-airbyte/developing-locally.md) - * [Developing on Docker](contributing-to-airbyte/developing-on-docker.md) * [Developing on Kubernetes](contributing-to-airbyte/developing-on-kubernetes.md) * [Monorepo Python Development](contributing-to-airbyte/monorepo-python-development.md) * [Code Style](contributing-to-airbyte/code-style.md) diff --git a/docs/contributing-to-airbyte/developing-on-docker.md b/docs/contributing-to-airbyte/developing-on-docker.md deleted file mode 100644 index 34f225c8b0172..0000000000000 --- a/docs/contributing-to-airbyte/developing-on-docker.md +++ /dev/null @@ -1,43 +0,0 @@ -# Developing on docker - -## Incrementality - -The docker build is fully incremental for the platform build, which means that it will only build an image if it is needed. We need to keep it that -way. -A task generator, `getDockerBuildTask`, is available for building a docker image for any given module. Behind the scene, it will generate a -task which will run the build of a docker image in a specific folder. The goal is to make sure that we have an isolated -context which helps with incrementality. All files that need to be present in the docker image will need to be copy into this folder. The generate -method takes 2 arguments: -- The image name, for example if `foo` is given as an image name, the image `airbyte/foo` will be created -- The project directory folder. It is needed because the `getDockerBuildTask` is declared in the rootProject - -## Adding a new docker build - -Once you have a `Dockerfile`, generating the docker image is done in the following way: -- specify the artifact name and the project directory, -- make sure that the Dockerfile is properly copied to the docker context dir before building the image -- make the build docker task to depend on the `assemble` task. - -For example: -```groovy -Task dockerBuildTask = getDockerBuildTask("cli", project.projectDir) -dockerBuildTask.dependsOn(copyDocker) -assemble.dependsOn(dockerBuildTask) -``` - -If you need to add files in your image you need to copy them in `build/docker/bin` first. The need to happen after the `copyDocker` task. -The `copyDocker` task clean up the `build/docker` folder as a first step. - -For example: -```groovy -task copyScripts(type: Copy) { - dependsOn copyDocker - - from('scripts') - into 'build/docker/bin/scripts' -} - -Task dockerBuildTask = getDockerBuildTask("init", project.projectDir) -dockerBuildTask.dependsOn(copyScripts) -assemble.dependsOn(dockerBuildTask) -``` diff --git a/docs/project-overview/changelog/README.md b/docs/project-overview/changelog/README.md index 6729332c0004b..86c8266c239b0 100644 --- a/docs/project-overview/changelog/README.md +++ b/docs/project-overview/changelog/README.md @@ -257,8 +257,7 @@ Airbyte is comprised of 2 parts: The "production" version of Airbyte is the version of the app specified in `.env`. With each production release, we update the version in the `.env` file. This version will always be available for download on DockerHub. It is the version of the app that runs when a user runs `docker-compose up`. -The "development" version of Airbyte is the head of master branch. It is the version of the app that runs when a user runs `./gradlew build && -VERSION=dev docker compose up`. +The "development" version of Airbyte is the head of master branch. It is the version of the app that runs when a user runs `./gradlew composeBuild && VERSION=dev docker compose up`. ### Production Release Schedule diff --git a/tools/bin/gke-kube-acceptance-test/acceptance_test_kube_gke.sh b/tools/bin/gke-kube-acceptance-test/acceptance_test_kube_gke.sh index 85ee8e4c517bd..f796e94bb5fa8 100755 --- a/tools/bin/gke-kube-acceptance-test/acceptance_test_kube_gke.sh +++ b/tools/bin/gke-kube-acceptance-test/acceptance_test_kube_gke.sh @@ -14,7 +14,7 @@ TAG=$(openssl rand -hex 12) echo "Tag" $TAG docker login -u airbytebot -p $DOCKER_PASSWORD -VERSION=$TAG ./gradlew build +VERSION=$TAG ./gradlew composeBuild VERSION=$TAG docker-compose -f docker-compose.build.yaml push # For running on Mac diff --git a/tools/bin/release_version.sh b/tools/bin/release_version.sh index 1ae49cddacc13..5cc3d245fcd1c 100755 --- a/tools/bin/release_version.sh +++ b/tools/bin/release_version.sh @@ -38,7 +38,7 @@ GIT_REVISION=$(git rev-parse HEAD) echo "Bumped version from ${PREV_VERSION} to ${NEW_VERSION}" echo "Building and publishing version $NEW_VERSION for git revision $GIT_REVISION..." -SUB_BUILD=PLATFORM ./gradlew clean build +SUB_BUILD=PLATFORM ./gradlew clean composeBuild SUB_BUILD=PLATFORM ./gradlew publish VERSION=$NEW_VERSION GIT_REVISION=$GIT_REVISION docker-compose -f docker-compose.build.yaml build VERSION=$NEW_VERSION GIT_REVISION=$GIT_REVISION docker-compose -f docker-compose.build.yaml push From 76cb8448c848b05f5395210a60b5f286aa71d56b Mon Sep 17 00:00:00 2001 From: Charles Date: Fri, 5 Nov 2021 19:00:42 -0700 Subject: [PATCH 72/83] format (#7699) --- airbyte-oauth/src/main/java/io/airbyte/oauth/BaseOAuthFlow.java | 1 - 1 file changed, 1 deletion(-) diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/BaseOAuthFlow.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/BaseOAuthFlow.java index 917711f944a86..7e03902ce0788 100644 --- a/airbyte-oauth/src/main/java/io/airbyte/oauth/BaseOAuthFlow.java +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/BaseOAuthFlow.java @@ -25,7 +25,6 @@ import java.util.function.Function; import java.util.function.Supplier; import org.apache.commons.lang3.RandomStringUtils; -import org.apache.http.client.utils.URIBuilder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; From 56185e2f62f8a53fffc3a3b8ffa9bcd8dd7015ac Mon Sep 17 00:00:00 2001 From: Jared Rhizor Date: Fri, 5 Nov 2021 23:07:20 -0700 Subject: [PATCH 73/83] Bump Airbyte version from 0.30.31-alpha to 0.30.32-alpha (#7700) * Bump Airbyte version from 0.30.31-alpha to 0.30.32-alpha * remove * fix format Co-authored-by: cgardens Co-authored-by: cgardens --- .bumpversion.cfg | 2 +- .env | 2 +- airbyte-scheduler/app/Dockerfile | 4 ++-- airbyte-server/Dockerfile | 4 ++-- .../src/main/java/io/airbyte/server/ServerApp.java | 12 ++++++------ airbyte-webapp/package-lock.json | 2 +- airbyte-webapp/package.json | 2 +- airbyte-workers/Dockerfile | 4 ++-- charts/airbyte/Chart.yaml | 2 +- charts/airbyte/README.md | 8 ++++---- charts/airbyte/values.yaml | 8 ++++---- docs/operator-guides/upgrading-airbyte.md | 2 +- kube/overlays/stable-with-resource-limits/.env | 2 +- .../stable-with-resource-limits/kustomization.yaml | 10 +++++----- kube/overlays/stable/.env | 2 +- kube/overlays/stable/kustomization.yaml | 10 +++++----- 16 files changed, 38 insertions(+), 38 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 5b463ca898bed..fa2ec3466e659 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.30.31-alpha +current_version = 0.30.32-alpha commit = False tag = False parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-[a-z]+)? diff --git a/.env b/.env index ece63bb799b42..266e27d177d7e 100644 --- a/.env +++ b/.env @@ -1,4 +1,4 @@ -VERSION=0.30.31-alpha +VERSION=0.30.32-alpha # Airbyte Internal Job Database, see https://docs.airbyte.io/operator-guides/configuring-airbyte-db DATABASE_USER=docker diff --git a/airbyte-scheduler/app/Dockerfile b/airbyte-scheduler/app/Dockerfile index f4cb9b7d537ad..bef947fddd6f9 100644 --- a/airbyte-scheduler/app/Dockerfile +++ b/airbyte-scheduler/app/Dockerfile @@ -5,7 +5,7 @@ ENV APPLICATION airbyte-scheduler WORKDIR /app -ADD build/distributions/${APPLICATION}-0.30.31-alpha.tar /app +ADD build/distributions/${APPLICATION}-0.30.32-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.31-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.32-alpha/bin/${APPLICATION}"] diff --git a/airbyte-server/Dockerfile b/airbyte-server/Dockerfile index ec7aefe8c4c11..3256a05871367 100644 --- a/airbyte-server/Dockerfile +++ b/airbyte-server/Dockerfile @@ -7,7 +7,7 @@ ENV APPLICATION airbyte-server WORKDIR /app -ADD build/distributions/${APPLICATION}-0.30.31-alpha.tar /app +ADD build/distributions/${APPLICATION}-0.30.32-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.31-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.32-alpha/bin/${APPLICATION}"] diff --git a/airbyte-server/src/main/java/io/airbyte/server/ServerApp.java b/airbyte-server/src/main/java/io/airbyte/server/ServerApp.java index b05fb8b22451f..ef9fac5c5ca34 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/ServerApp.java +++ b/airbyte-server/src/main/java/io/airbyte/server/ServerApp.java @@ -250,12 +250,12 @@ public static ServerRunnable getServer(final ServerFactory apiFactory, final Con // todo (lmossman) - this will only exist temporarily to ensure all definitions contain specs. It // will be removed after the faux major version bump - migrateAllDefinitionsToContainSpec( - configRepository, - cachingSchedulerClient, - trackingClient, - configs.getWorkerEnvironment(), - configs.getLogConfigs()); + // migrateAllDefinitionsToContainSpec( + // configRepository, + // cachingSchedulerClient, + // trackingClient, + // configs.getWorkerEnvironment(), + // configs.getLogConfigs()); return apiFactory.create( schedulerJobClient, diff --git a/airbyte-webapp/package-lock.json b/airbyte-webapp/package-lock.json index 1c99dcdcf0498..760e18114b90d 100644 --- a/airbyte-webapp/package-lock.json +++ b/airbyte-webapp/package-lock.json @@ -1,6 +1,6 @@ { "name": "airbyte-webapp", - "version": "0.30.31-alpha", + "version": "0.30.32-alpha", "lockfileVersion": 1, "requires": true, "dependencies": { diff --git a/airbyte-webapp/package.json b/airbyte-webapp/package.json index 814b84907962c..d90969f3ffbd8 100644 --- a/airbyte-webapp/package.json +++ b/airbyte-webapp/package.json @@ -1,6 +1,6 @@ { "name": "airbyte-webapp", - "version": "0.30.31-alpha", + "version": "0.30.32-alpha", "private": true, "scripts": { "start": "react-scripts start", diff --git a/airbyte-workers/Dockerfile b/airbyte-workers/Dockerfile index 09cbbe5c89907..3fb1f07877fdc 100644 --- a/airbyte-workers/Dockerfile +++ b/airbyte-workers/Dockerfile @@ -23,7 +23,7 @@ ENV APPLICATION airbyte-workers WORKDIR /app # Move worker app -ADD build/distributions/${APPLICATION}-0.30.31-alpha.tar /app +ADD build/distributions/${APPLICATION}-0.30.32-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.31-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.32-alpha/bin/${APPLICATION}"] diff --git a/charts/airbyte/Chart.yaml b/charts/airbyte/Chart.yaml index fd030c9bf6ffc..caf1e86d3af4c 100644 --- a/charts/airbyte/Chart.yaml +++ b/charts/airbyte/Chart.yaml @@ -21,7 +21,7 @@ version: 0.3.0 # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "0.30.31-alpha" +appVersion: "0.30.32-alpha" dependencies: - name: common diff --git a/charts/airbyte/README.md b/charts/airbyte/README.md index 42c1706c94713..bc2df64dfbc49 100644 --- a/charts/airbyte/README.md +++ b/charts/airbyte/README.md @@ -29,7 +29,7 @@ | `webapp.replicaCount` | Number of webapp replicas | `1` | | `webapp.image.repository` | The repository to use for the airbyte webapp image. | `airbyte/webapp` | | `webapp.image.pullPolicy` | the pull policy to use for the airbyte webapp image | `IfNotPresent` | -| `webapp.image.tag` | The airbyte webapp image tag. Defaults to the chart's AppVersion | `0.30.31-alpha` | +| `webapp.image.tag` | The airbyte webapp image tag. Defaults to the chart's AppVersion | `0.30.32-alpha` | | `webapp.podAnnotations` | Add extra annotations to the webapp pod(s) | `{}` | | `webapp.service.type` | The service type to use for the webapp service | `ClusterIP` | | `webapp.service.port` | The service port to expose the webapp on | `80` | @@ -56,7 +56,7 @@ | `scheduler.replicaCount` | Number of scheduler replicas | `1` | | `scheduler.image.repository` | The repository to use for the airbyte scheduler image. | `airbyte/scheduler` | | `scheduler.image.pullPolicy` | the pull policy to use for the airbyte scheduler image | `IfNotPresent` | -| `scheduler.image.tag` | The airbyte scheduler image tag. Defaults to the chart's AppVersion | `0.30.31-alpha` | +| `scheduler.image.tag` | The airbyte scheduler image tag. Defaults to the chart's AppVersion | `0.30.32-alpha` | | `scheduler.podAnnotations` | Add extra annotations to the scheduler pod | `{}` | | `scheduler.resources.limits` | The resources limits for the scheduler container | `{}` | | `scheduler.resources.requests` | The requested resources for the scheduler container | `{}` | @@ -87,7 +87,7 @@ | `server.replicaCount` | Number of server replicas | `1` | | `server.image.repository` | The repository to use for the airbyte server image. | `airbyte/server` | | `server.image.pullPolicy` | the pull policy to use for the airbyte server image | `IfNotPresent` | -| `server.image.tag` | The airbyte server image tag. Defaults to the chart's AppVersion | `0.30.31-alpha` | +| `server.image.tag` | The airbyte server image tag. Defaults to the chart's AppVersion | `0.30.32-alpha` | | `server.podAnnotations` | Add extra annotations to the server pod | `{}` | | `server.livenessProbe.enabled` | Enable livenessProbe on the server | `true` | | `server.livenessProbe.initialDelaySeconds` | Initial delay seconds for livenessProbe | `30` | @@ -121,7 +121,7 @@ | `worker.replicaCount` | Number of worker replicas | `1` | | `worker.image.repository` | The repository to use for the airbyte worker image. | `airbyte/worker` | | `worker.image.pullPolicy` | the pull policy to use for the airbyte worker image | `IfNotPresent` | -| `worker.image.tag` | The airbyte worker image tag. Defaults to the chart's AppVersion | `0.30.31-alpha` | +| `worker.image.tag` | The airbyte worker image tag. Defaults to the chart's AppVersion | `0.30.32-alpha` | | `worker.podAnnotations` | Add extra annotations to the worker pod(s) | `{}` | | `worker.livenessProbe.enabled` | Enable livenessProbe on the worker | `true` | | `worker.livenessProbe.initialDelaySeconds` | Initial delay seconds for livenessProbe | `30` | diff --git a/charts/airbyte/values.yaml b/charts/airbyte/values.yaml index 051a5b3b61300..dcc0d44424394 100644 --- a/charts/airbyte/values.yaml +++ b/charts/airbyte/values.yaml @@ -44,7 +44,7 @@ webapp: image: repository: airbyte/webapp pullPolicy: IfNotPresent - tag: 0.30.31-alpha + tag: 0.30.32-alpha ## @param webapp.podAnnotations [object] Add extra annotations to the webapp pod(s) ## @@ -141,7 +141,7 @@ scheduler: image: repository: airbyte/scheduler pullPolicy: IfNotPresent - tag: 0.30.31-alpha + tag: 0.30.32-alpha ## @param scheduler.podAnnotations [object] Add extra annotations to the scheduler pod ## @@ -248,7 +248,7 @@ server: image: repository: airbyte/server pullPolicy: IfNotPresent - tag: 0.30.31-alpha + tag: 0.30.32-alpha ## @param server.podAnnotations [object] Add extra annotations to the server pod ## @@ -360,7 +360,7 @@ worker: image: repository: airbyte/worker pullPolicy: IfNotPresent - tag: 0.30.31-alpha + tag: 0.30.32-alpha ## @param worker.podAnnotations [object] Add extra annotations to the worker pod(s) ## diff --git a/docs/operator-guides/upgrading-airbyte.md b/docs/operator-guides/upgrading-airbyte.md index 2b01414738a4b..f9df8ddd1fbd6 100644 --- a/docs/operator-guides/upgrading-airbyte.md +++ b/docs/operator-guides/upgrading-airbyte.md @@ -82,7 +82,7 @@ If you are upgrading from \(i.e. your current version of Airbyte is\) Airbyte ve Here's an example of what it might look like with the values filled in. It assumes that the downloaded `airbyte_archive.tar.gz` is in `/tmp`. ```bash - docker run --rm -v /tmp:/config airbyte/migration:0.30.31-alpha --\ + docker run --rm -v /tmp:/config airbyte/migration:0.30.32-alpha --\ --input /config/airbyte_archive.tar.gz\ --output /config/airbyte_archive_migrated.tar.gz ``` diff --git a/kube/overlays/stable-with-resource-limits/.env b/kube/overlays/stable-with-resource-limits/.env index 012c2866c7644..d6459f48f3872 100644 --- a/kube/overlays/stable-with-resource-limits/.env +++ b/kube/overlays/stable-with-resource-limits/.env @@ -1,4 +1,4 @@ -AIRBYTE_VERSION=0.30.31-alpha +AIRBYTE_VERSION=0.30.32-alpha # Airbyte Internal Database, see https://docs.airbyte.io/operator-guides/configuring-airbyte-db DATABASE_HOST=airbyte-db-svc diff --git a/kube/overlays/stable-with-resource-limits/kustomization.yaml b/kube/overlays/stable-with-resource-limits/kustomization.yaml index 6a7c6f06e0050..df7fafffacfe1 100644 --- a/kube/overlays/stable-with-resource-limits/kustomization.yaml +++ b/kube/overlays/stable-with-resource-limits/kustomization.yaml @@ -8,15 +8,15 @@ bases: images: - name: airbyte/db - newTag: 0.30.31-alpha + newTag: 0.30.32-alpha - name: airbyte/scheduler - newTag: 0.30.31-alpha + newTag: 0.30.32-alpha - name: airbyte/server - newTag: 0.30.31-alpha + newTag: 0.30.32-alpha - name: airbyte/webapp - newTag: 0.30.31-alpha + newTag: 0.30.32-alpha - name: airbyte/worker - newTag: 0.30.31-alpha + newTag: 0.30.32-alpha - name: temporalio/auto-setup newTag: 1.7.0 diff --git a/kube/overlays/stable/.env b/kube/overlays/stable/.env index 012c2866c7644..d6459f48f3872 100644 --- a/kube/overlays/stable/.env +++ b/kube/overlays/stable/.env @@ -1,4 +1,4 @@ -AIRBYTE_VERSION=0.30.31-alpha +AIRBYTE_VERSION=0.30.32-alpha # Airbyte Internal Database, see https://docs.airbyte.io/operator-guides/configuring-airbyte-db DATABASE_HOST=airbyte-db-svc diff --git a/kube/overlays/stable/kustomization.yaml b/kube/overlays/stable/kustomization.yaml index 534696d1ad17f..f23b29647c8f0 100644 --- a/kube/overlays/stable/kustomization.yaml +++ b/kube/overlays/stable/kustomization.yaml @@ -8,15 +8,15 @@ bases: images: - name: airbyte/db - newTag: 0.30.31-alpha + newTag: 0.30.32-alpha - name: airbyte/scheduler - newTag: 0.30.31-alpha + newTag: 0.30.32-alpha - name: airbyte/server - newTag: 0.30.31-alpha + newTag: 0.30.32-alpha - name: airbyte/webapp - newTag: 0.30.31-alpha + newTag: 0.30.32-alpha - name: airbyte/worker - newTag: 0.30.31-alpha + newTag: 0.30.32-alpha - name: temporalio/auto-setup newTag: 1.7.0 From d00c2288e9bde7183088c8b6bed71ffc88eb0348 Mon Sep 17 00:00:00 2001 From: Jared Rhizor Date: Fri, 5 Nov 2021 23:52:08 -0700 Subject: [PATCH 74/83] Bump Airbyte version from 0.30.32-alpha to 0.30.33-alpha (#7701) Co-authored-by: cgardens --- .bumpversion.cfg | 2 +- .env | 2 +- airbyte-scheduler/app/Dockerfile | 4 ++-- airbyte-server/Dockerfile | 4 ++-- airbyte-webapp/package-lock.json | 2 +- airbyte-webapp/package.json | 2 +- airbyte-workers/Dockerfile | 4 ++-- charts/airbyte/Chart.yaml | 2 +- charts/airbyte/README.md | 8 ++++---- charts/airbyte/values.yaml | 8 ++++---- docs/operator-guides/upgrading-airbyte.md | 2 +- kube/overlays/stable-with-resource-limits/.env | 2 +- .../stable-with-resource-limits/kustomization.yaml | 10 +++++----- kube/overlays/stable/.env | 2 +- kube/overlays/stable/kustomization.yaml | 10 +++++----- 15 files changed, 32 insertions(+), 32 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index fa2ec3466e659..4b3e869a65b44 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.30.32-alpha +current_version = 0.30.33-alpha commit = False tag = False parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-[a-z]+)? diff --git a/.env b/.env index 266e27d177d7e..02221b52be6e1 100644 --- a/.env +++ b/.env @@ -1,4 +1,4 @@ -VERSION=0.30.32-alpha +VERSION=0.30.33-alpha # Airbyte Internal Job Database, see https://docs.airbyte.io/operator-guides/configuring-airbyte-db DATABASE_USER=docker diff --git a/airbyte-scheduler/app/Dockerfile b/airbyte-scheduler/app/Dockerfile index bef947fddd6f9..66981aff1c8bd 100644 --- a/airbyte-scheduler/app/Dockerfile +++ b/airbyte-scheduler/app/Dockerfile @@ -5,7 +5,7 @@ ENV APPLICATION airbyte-scheduler WORKDIR /app -ADD build/distributions/${APPLICATION}-0.30.32-alpha.tar /app +ADD build/distributions/${APPLICATION}-0.30.33-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.32-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.33-alpha/bin/${APPLICATION}"] diff --git a/airbyte-server/Dockerfile b/airbyte-server/Dockerfile index 3256a05871367..2e61e4344cc6a 100644 --- a/airbyte-server/Dockerfile +++ b/airbyte-server/Dockerfile @@ -7,7 +7,7 @@ ENV APPLICATION airbyte-server WORKDIR /app -ADD build/distributions/${APPLICATION}-0.30.32-alpha.tar /app +ADD build/distributions/${APPLICATION}-0.30.33-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.32-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.33-alpha/bin/${APPLICATION}"] diff --git a/airbyte-webapp/package-lock.json b/airbyte-webapp/package-lock.json index 760e18114b90d..e85a5e1274dee 100644 --- a/airbyte-webapp/package-lock.json +++ b/airbyte-webapp/package-lock.json @@ -1,6 +1,6 @@ { "name": "airbyte-webapp", - "version": "0.30.32-alpha", + "version": "0.30.33-alpha", "lockfileVersion": 1, "requires": true, "dependencies": { diff --git a/airbyte-webapp/package.json b/airbyte-webapp/package.json index d90969f3ffbd8..3af45e2759895 100644 --- a/airbyte-webapp/package.json +++ b/airbyte-webapp/package.json @@ -1,6 +1,6 @@ { "name": "airbyte-webapp", - "version": "0.30.32-alpha", + "version": "0.30.33-alpha", "private": true, "scripts": { "start": "react-scripts start", diff --git a/airbyte-workers/Dockerfile b/airbyte-workers/Dockerfile index 3fb1f07877fdc..b00b62515f98f 100644 --- a/airbyte-workers/Dockerfile +++ b/airbyte-workers/Dockerfile @@ -23,7 +23,7 @@ ENV APPLICATION airbyte-workers WORKDIR /app # Move worker app -ADD build/distributions/${APPLICATION}-0.30.32-alpha.tar /app +ADD build/distributions/${APPLICATION}-0.30.33-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.32-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.33-alpha/bin/${APPLICATION}"] diff --git a/charts/airbyte/Chart.yaml b/charts/airbyte/Chart.yaml index caf1e86d3af4c..41be22d6d2e78 100644 --- a/charts/airbyte/Chart.yaml +++ b/charts/airbyte/Chart.yaml @@ -21,7 +21,7 @@ version: 0.3.0 # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "0.30.32-alpha" +appVersion: "0.30.33-alpha" dependencies: - name: common diff --git a/charts/airbyte/README.md b/charts/airbyte/README.md index bc2df64dfbc49..f5526deefc749 100644 --- a/charts/airbyte/README.md +++ b/charts/airbyte/README.md @@ -29,7 +29,7 @@ | `webapp.replicaCount` | Number of webapp replicas | `1` | | `webapp.image.repository` | The repository to use for the airbyte webapp image. | `airbyte/webapp` | | `webapp.image.pullPolicy` | the pull policy to use for the airbyte webapp image | `IfNotPresent` | -| `webapp.image.tag` | The airbyte webapp image tag. Defaults to the chart's AppVersion | `0.30.32-alpha` | +| `webapp.image.tag` | The airbyte webapp image tag. Defaults to the chart's AppVersion | `0.30.33-alpha` | | `webapp.podAnnotations` | Add extra annotations to the webapp pod(s) | `{}` | | `webapp.service.type` | The service type to use for the webapp service | `ClusterIP` | | `webapp.service.port` | The service port to expose the webapp on | `80` | @@ -56,7 +56,7 @@ | `scheduler.replicaCount` | Number of scheduler replicas | `1` | | `scheduler.image.repository` | The repository to use for the airbyte scheduler image. | `airbyte/scheduler` | | `scheduler.image.pullPolicy` | the pull policy to use for the airbyte scheduler image | `IfNotPresent` | -| `scheduler.image.tag` | The airbyte scheduler image tag. Defaults to the chart's AppVersion | `0.30.32-alpha` | +| `scheduler.image.tag` | The airbyte scheduler image tag. Defaults to the chart's AppVersion | `0.30.33-alpha` | | `scheduler.podAnnotations` | Add extra annotations to the scheduler pod | `{}` | | `scheduler.resources.limits` | The resources limits for the scheduler container | `{}` | | `scheduler.resources.requests` | The requested resources for the scheduler container | `{}` | @@ -87,7 +87,7 @@ | `server.replicaCount` | Number of server replicas | `1` | | `server.image.repository` | The repository to use for the airbyte server image. | `airbyte/server` | | `server.image.pullPolicy` | the pull policy to use for the airbyte server image | `IfNotPresent` | -| `server.image.tag` | The airbyte server image tag. Defaults to the chart's AppVersion | `0.30.32-alpha` | +| `server.image.tag` | The airbyte server image tag. Defaults to the chart's AppVersion | `0.30.33-alpha` | | `server.podAnnotations` | Add extra annotations to the server pod | `{}` | | `server.livenessProbe.enabled` | Enable livenessProbe on the server | `true` | | `server.livenessProbe.initialDelaySeconds` | Initial delay seconds for livenessProbe | `30` | @@ -121,7 +121,7 @@ | `worker.replicaCount` | Number of worker replicas | `1` | | `worker.image.repository` | The repository to use for the airbyte worker image. | `airbyte/worker` | | `worker.image.pullPolicy` | the pull policy to use for the airbyte worker image | `IfNotPresent` | -| `worker.image.tag` | The airbyte worker image tag. Defaults to the chart's AppVersion | `0.30.32-alpha` | +| `worker.image.tag` | The airbyte worker image tag. Defaults to the chart's AppVersion | `0.30.33-alpha` | | `worker.podAnnotations` | Add extra annotations to the worker pod(s) | `{}` | | `worker.livenessProbe.enabled` | Enable livenessProbe on the worker | `true` | | `worker.livenessProbe.initialDelaySeconds` | Initial delay seconds for livenessProbe | `30` | diff --git a/charts/airbyte/values.yaml b/charts/airbyte/values.yaml index dcc0d44424394..ad7f6a7288fd6 100644 --- a/charts/airbyte/values.yaml +++ b/charts/airbyte/values.yaml @@ -44,7 +44,7 @@ webapp: image: repository: airbyte/webapp pullPolicy: IfNotPresent - tag: 0.30.32-alpha + tag: 0.30.33-alpha ## @param webapp.podAnnotations [object] Add extra annotations to the webapp pod(s) ## @@ -141,7 +141,7 @@ scheduler: image: repository: airbyte/scheduler pullPolicy: IfNotPresent - tag: 0.30.32-alpha + tag: 0.30.33-alpha ## @param scheduler.podAnnotations [object] Add extra annotations to the scheduler pod ## @@ -248,7 +248,7 @@ server: image: repository: airbyte/server pullPolicy: IfNotPresent - tag: 0.30.32-alpha + tag: 0.30.33-alpha ## @param server.podAnnotations [object] Add extra annotations to the server pod ## @@ -360,7 +360,7 @@ worker: image: repository: airbyte/worker pullPolicy: IfNotPresent - tag: 0.30.32-alpha + tag: 0.30.33-alpha ## @param worker.podAnnotations [object] Add extra annotations to the worker pod(s) ## diff --git a/docs/operator-guides/upgrading-airbyte.md b/docs/operator-guides/upgrading-airbyte.md index f9df8ddd1fbd6..512c4f1cddb15 100644 --- a/docs/operator-guides/upgrading-airbyte.md +++ b/docs/operator-guides/upgrading-airbyte.md @@ -82,7 +82,7 @@ If you are upgrading from \(i.e. your current version of Airbyte is\) Airbyte ve Here's an example of what it might look like with the values filled in. It assumes that the downloaded `airbyte_archive.tar.gz` is in `/tmp`. ```bash - docker run --rm -v /tmp:/config airbyte/migration:0.30.32-alpha --\ + docker run --rm -v /tmp:/config airbyte/migration:0.30.33-alpha --\ --input /config/airbyte_archive.tar.gz\ --output /config/airbyte_archive_migrated.tar.gz ``` diff --git a/kube/overlays/stable-with-resource-limits/.env b/kube/overlays/stable-with-resource-limits/.env index d6459f48f3872..c84dbc866cb42 100644 --- a/kube/overlays/stable-with-resource-limits/.env +++ b/kube/overlays/stable-with-resource-limits/.env @@ -1,4 +1,4 @@ -AIRBYTE_VERSION=0.30.32-alpha +AIRBYTE_VERSION=0.30.33-alpha # Airbyte Internal Database, see https://docs.airbyte.io/operator-guides/configuring-airbyte-db DATABASE_HOST=airbyte-db-svc diff --git a/kube/overlays/stable-with-resource-limits/kustomization.yaml b/kube/overlays/stable-with-resource-limits/kustomization.yaml index df7fafffacfe1..5021006aaefd4 100644 --- a/kube/overlays/stable-with-resource-limits/kustomization.yaml +++ b/kube/overlays/stable-with-resource-limits/kustomization.yaml @@ -8,15 +8,15 @@ bases: images: - name: airbyte/db - newTag: 0.30.32-alpha + newTag: 0.30.33-alpha - name: airbyte/scheduler - newTag: 0.30.32-alpha + newTag: 0.30.33-alpha - name: airbyte/server - newTag: 0.30.32-alpha + newTag: 0.30.33-alpha - name: airbyte/webapp - newTag: 0.30.32-alpha + newTag: 0.30.33-alpha - name: airbyte/worker - newTag: 0.30.32-alpha + newTag: 0.30.33-alpha - name: temporalio/auto-setup newTag: 1.7.0 diff --git a/kube/overlays/stable/.env b/kube/overlays/stable/.env index d6459f48f3872..c84dbc866cb42 100644 --- a/kube/overlays/stable/.env +++ b/kube/overlays/stable/.env @@ -1,4 +1,4 @@ -AIRBYTE_VERSION=0.30.32-alpha +AIRBYTE_VERSION=0.30.33-alpha # Airbyte Internal Database, see https://docs.airbyte.io/operator-guides/configuring-airbyte-db DATABASE_HOST=airbyte-db-svc diff --git a/kube/overlays/stable/kustomization.yaml b/kube/overlays/stable/kustomization.yaml index f23b29647c8f0..a6a90372ec840 100644 --- a/kube/overlays/stable/kustomization.yaml +++ b/kube/overlays/stable/kustomization.yaml @@ -8,15 +8,15 @@ bases: images: - name: airbyte/db - newTag: 0.30.32-alpha + newTag: 0.30.33-alpha - name: airbyte/scheduler - newTag: 0.30.32-alpha + newTag: 0.30.33-alpha - name: airbyte/server - newTag: 0.30.32-alpha + newTag: 0.30.33-alpha - name: airbyte/webapp - newTag: 0.30.32-alpha + newTag: 0.30.33-alpha - name: airbyte/worker - newTag: 0.30.32-alpha + newTag: 0.30.33-alpha - name: temporalio/auto-setup newTag: 1.7.0 From 0e3fd837398e8dfe4c88f2b4370ba69174055556 Mon Sep 17 00:00:00 2001 From: Charles Date: Fri, 5 Nov 2021 23:53:56 -0700 Subject: [PATCH 75/83] bring back spec backfill (#7702) --- .../src/main/java/io/airbyte/server/ServerApp.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/airbyte-server/src/main/java/io/airbyte/server/ServerApp.java b/airbyte-server/src/main/java/io/airbyte/server/ServerApp.java index ef9fac5c5ca34..b05fb8b22451f 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/ServerApp.java +++ b/airbyte-server/src/main/java/io/airbyte/server/ServerApp.java @@ -250,12 +250,12 @@ public static ServerRunnable getServer(final ServerFactory apiFactory, final Con // todo (lmossman) - this will only exist temporarily to ensure all definitions contain specs. It // will be removed after the faux major version bump - // migrateAllDefinitionsToContainSpec( - // configRepository, - // cachingSchedulerClient, - // trackingClient, - // configs.getWorkerEnvironment(), - // configs.getLogConfigs()); + migrateAllDefinitionsToContainSpec( + configRepository, + cachingSchedulerClient, + trackingClient, + configs.getWorkerEnvironment(), + configs.getLogConfigs()); return apiFactory.create( schedulerJobClient, From 65d1956a9ed446141f871baa46a487a829248cf1 Mon Sep 17 00:00:00 2001 From: Jared Rhizor Date: Sat, 6 Nov 2021 11:19:46 -0700 Subject: [PATCH 76/83] Bump Airbyte version from 0.30.33-alpha to 0.30.34-alpha (#7703) Co-authored-by: cgardens --- .bumpversion.cfg | 2 +- .env | 2 +- airbyte-scheduler/app/Dockerfile | 4 ++-- airbyte-server/Dockerfile | 4 ++-- airbyte-webapp/package-lock.json | 2 +- airbyte-webapp/package.json | 2 +- airbyte-workers/Dockerfile | 4 ++-- charts/airbyte/Chart.yaml | 2 +- charts/airbyte/README.md | 8 ++++---- charts/airbyte/values.yaml | 8 ++++---- docs/operator-guides/upgrading-airbyte.md | 2 +- kube/overlays/stable-with-resource-limits/.env | 2 +- .../stable-with-resource-limits/kustomization.yaml | 10 +++++----- kube/overlays/stable/.env | 2 +- kube/overlays/stable/kustomization.yaml | 10 +++++----- 15 files changed, 32 insertions(+), 32 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 4b3e869a65b44..56541065c0f4f 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.30.33-alpha +current_version = 0.30.34-alpha commit = False tag = False parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-[a-z]+)? diff --git a/.env b/.env index 02221b52be6e1..eb0f60456990c 100644 --- a/.env +++ b/.env @@ -1,4 +1,4 @@ -VERSION=0.30.33-alpha +VERSION=0.30.34-alpha # Airbyte Internal Job Database, see https://docs.airbyte.io/operator-guides/configuring-airbyte-db DATABASE_USER=docker diff --git a/airbyte-scheduler/app/Dockerfile b/airbyte-scheduler/app/Dockerfile index 66981aff1c8bd..9e6c32336682b 100644 --- a/airbyte-scheduler/app/Dockerfile +++ b/airbyte-scheduler/app/Dockerfile @@ -5,7 +5,7 @@ ENV APPLICATION airbyte-scheduler WORKDIR /app -ADD build/distributions/${APPLICATION}-0.30.33-alpha.tar /app +ADD build/distributions/${APPLICATION}-0.30.34-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.33-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.34-alpha/bin/${APPLICATION}"] diff --git a/airbyte-server/Dockerfile b/airbyte-server/Dockerfile index 2e61e4344cc6a..f68e343973bd1 100644 --- a/airbyte-server/Dockerfile +++ b/airbyte-server/Dockerfile @@ -7,7 +7,7 @@ ENV APPLICATION airbyte-server WORKDIR /app -ADD build/distributions/${APPLICATION}-0.30.33-alpha.tar /app +ADD build/distributions/${APPLICATION}-0.30.34-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.33-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.34-alpha/bin/${APPLICATION}"] diff --git a/airbyte-webapp/package-lock.json b/airbyte-webapp/package-lock.json index e85a5e1274dee..f5419d28e80ea 100644 --- a/airbyte-webapp/package-lock.json +++ b/airbyte-webapp/package-lock.json @@ -1,6 +1,6 @@ { "name": "airbyte-webapp", - "version": "0.30.33-alpha", + "version": "0.30.34-alpha", "lockfileVersion": 1, "requires": true, "dependencies": { diff --git a/airbyte-webapp/package.json b/airbyte-webapp/package.json index 3af45e2759895..c581ef0e01f70 100644 --- a/airbyte-webapp/package.json +++ b/airbyte-webapp/package.json @@ -1,6 +1,6 @@ { "name": "airbyte-webapp", - "version": "0.30.33-alpha", + "version": "0.30.34-alpha", "private": true, "scripts": { "start": "react-scripts start", diff --git a/airbyte-workers/Dockerfile b/airbyte-workers/Dockerfile index b00b62515f98f..0f093dd81ec88 100644 --- a/airbyte-workers/Dockerfile +++ b/airbyte-workers/Dockerfile @@ -23,7 +23,7 @@ ENV APPLICATION airbyte-workers WORKDIR /app # Move worker app -ADD build/distributions/${APPLICATION}-0.30.33-alpha.tar /app +ADD build/distributions/${APPLICATION}-0.30.34-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.33-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.30.34-alpha/bin/${APPLICATION}"] diff --git a/charts/airbyte/Chart.yaml b/charts/airbyte/Chart.yaml index 41be22d6d2e78..4eaad31fb81c9 100644 --- a/charts/airbyte/Chart.yaml +++ b/charts/airbyte/Chart.yaml @@ -21,7 +21,7 @@ version: 0.3.0 # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "0.30.33-alpha" +appVersion: "0.30.34-alpha" dependencies: - name: common diff --git a/charts/airbyte/README.md b/charts/airbyte/README.md index f5526deefc749..fe00d1305495e 100644 --- a/charts/airbyte/README.md +++ b/charts/airbyte/README.md @@ -29,7 +29,7 @@ | `webapp.replicaCount` | Number of webapp replicas | `1` | | `webapp.image.repository` | The repository to use for the airbyte webapp image. | `airbyte/webapp` | | `webapp.image.pullPolicy` | the pull policy to use for the airbyte webapp image | `IfNotPresent` | -| `webapp.image.tag` | The airbyte webapp image tag. Defaults to the chart's AppVersion | `0.30.33-alpha` | +| `webapp.image.tag` | The airbyte webapp image tag. Defaults to the chart's AppVersion | `0.30.34-alpha` | | `webapp.podAnnotations` | Add extra annotations to the webapp pod(s) | `{}` | | `webapp.service.type` | The service type to use for the webapp service | `ClusterIP` | | `webapp.service.port` | The service port to expose the webapp on | `80` | @@ -56,7 +56,7 @@ | `scheduler.replicaCount` | Number of scheduler replicas | `1` | | `scheduler.image.repository` | The repository to use for the airbyte scheduler image. | `airbyte/scheduler` | | `scheduler.image.pullPolicy` | the pull policy to use for the airbyte scheduler image | `IfNotPresent` | -| `scheduler.image.tag` | The airbyte scheduler image tag. Defaults to the chart's AppVersion | `0.30.33-alpha` | +| `scheduler.image.tag` | The airbyte scheduler image tag. Defaults to the chart's AppVersion | `0.30.34-alpha` | | `scheduler.podAnnotations` | Add extra annotations to the scheduler pod | `{}` | | `scheduler.resources.limits` | The resources limits for the scheduler container | `{}` | | `scheduler.resources.requests` | The requested resources for the scheduler container | `{}` | @@ -87,7 +87,7 @@ | `server.replicaCount` | Number of server replicas | `1` | | `server.image.repository` | The repository to use for the airbyte server image. | `airbyte/server` | | `server.image.pullPolicy` | the pull policy to use for the airbyte server image | `IfNotPresent` | -| `server.image.tag` | The airbyte server image tag. Defaults to the chart's AppVersion | `0.30.33-alpha` | +| `server.image.tag` | The airbyte server image tag. Defaults to the chart's AppVersion | `0.30.34-alpha` | | `server.podAnnotations` | Add extra annotations to the server pod | `{}` | | `server.livenessProbe.enabled` | Enable livenessProbe on the server | `true` | | `server.livenessProbe.initialDelaySeconds` | Initial delay seconds for livenessProbe | `30` | @@ -121,7 +121,7 @@ | `worker.replicaCount` | Number of worker replicas | `1` | | `worker.image.repository` | The repository to use for the airbyte worker image. | `airbyte/worker` | | `worker.image.pullPolicy` | the pull policy to use for the airbyte worker image | `IfNotPresent` | -| `worker.image.tag` | The airbyte worker image tag. Defaults to the chart's AppVersion | `0.30.33-alpha` | +| `worker.image.tag` | The airbyte worker image tag. Defaults to the chart's AppVersion | `0.30.34-alpha` | | `worker.podAnnotations` | Add extra annotations to the worker pod(s) | `{}` | | `worker.livenessProbe.enabled` | Enable livenessProbe on the worker | `true` | | `worker.livenessProbe.initialDelaySeconds` | Initial delay seconds for livenessProbe | `30` | diff --git a/charts/airbyte/values.yaml b/charts/airbyte/values.yaml index ad7f6a7288fd6..674a5d1c3a699 100644 --- a/charts/airbyte/values.yaml +++ b/charts/airbyte/values.yaml @@ -44,7 +44,7 @@ webapp: image: repository: airbyte/webapp pullPolicy: IfNotPresent - tag: 0.30.33-alpha + tag: 0.30.34-alpha ## @param webapp.podAnnotations [object] Add extra annotations to the webapp pod(s) ## @@ -141,7 +141,7 @@ scheduler: image: repository: airbyte/scheduler pullPolicy: IfNotPresent - tag: 0.30.33-alpha + tag: 0.30.34-alpha ## @param scheduler.podAnnotations [object] Add extra annotations to the scheduler pod ## @@ -248,7 +248,7 @@ server: image: repository: airbyte/server pullPolicy: IfNotPresent - tag: 0.30.33-alpha + tag: 0.30.34-alpha ## @param server.podAnnotations [object] Add extra annotations to the server pod ## @@ -360,7 +360,7 @@ worker: image: repository: airbyte/worker pullPolicy: IfNotPresent - tag: 0.30.33-alpha + tag: 0.30.34-alpha ## @param worker.podAnnotations [object] Add extra annotations to the worker pod(s) ## diff --git a/docs/operator-guides/upgrading-airbyte.md b/docs/operator-guides/upgrading-airbyte.md index 512c4f1cddb15..d25e0889c915a 100644 --- a/docs/operator-guides/upgrading-airbyte.md +++ b/docs/operator-guides/upgrading-airbyte.md @@ -82,7 +82,7 @@ If you are upgrading from \(i.e. your current version of Airbyte is\) Airbyte ve Here's an example of what it might look like with the values filled in. It assumes that the downloaded `airbyte_archive.tar.gz` is in `/tmp`. ```bash - docker run --rm -v /tmp:/config airbyte/migration:0.30.33-alpha --\ + docker run --rm -v /tmp:/config airbyte/migration:0.30.34-alpha --\ --input /config/airbyte_archive.tar.gz\ --output /config/airbyte_archive_migrated.tar.gz ``` diff --git a/kube/overlays/stable-with-resource-limits/.env b/kube/overlays/stable-with-resource-limits/.env index c84dbc866cb42..e7c0e4a4be7a2 100644 --- a/kube/overlays/stable-with-resource-limits/.env +++ b/kube/overlays/stable-with-resource-limits/.env @@ -1,4 +1,4 @@ -AIRBYTE_VERSION=0.30.33-alpha +AIRBYTE_VERSION=0.30.34-alpha # Airbyte Internal Database, see https://docs.airbyte.io/operator-guides/configuring-airbyte-db DATABASE_HOST=airbyte-db-svc diff --git a/kube/overlays/stable-with-resource-limits/kustomization.yaml b/kube/overlays/stable-with-resource-limits/kustomization.yaml index 5021006aaefd4..415e36f44ef1b 100644 --- a/kube/overlays/stable-with-resource-limits/kustomization.yaml +++ b/kube/overlays/stable-with-resource-limits/kustomization.yaml @@ -8,15 +8,15 @@ bases: images: - name: airbyte/db - newTag: 0.30.33-alpha + newTag: 0.30.34-alpha - name: airbyte/scheduler - newTag: 0.30.33-alpha + newTag: 0.30.34-alpha - name: airbyte/server - newTag: 0.30.33-alpha + newTag: 0.30.34-alpha - name: airbyte/webapp - newTag: 0.30.33-alpha + newTag: 0.30.34-alpha - name: airbyte/worker - newTag: 0.30.33-alpha + newTag: 0.30.34-alpha - name: temporalio/auto-setup newTag: 1.7.0 diff --git a/kube/overlays/stable/.env b/kube/overlays/stable/.env index c84dbc866cb42..e7c0e4a4be7a2 100644 --- a/kube/overlays/stable/.env +++ b/kube/overlays/stable/.env @@ -1,4 +1,4 @@ -AIRBYTE_VERSION=0.30.33-alpha +AIRBYTE_VERSION=0.30.34-alpha # Airbyte Internal Database, see https://docs.airbyte.io/operator-guides/configuring-airbyte-db DATABASE_HOST=airbyte-db-svc diff --git a/kube/overlays/stable/kustomization.yaml b/kube/overlays/stable/kustomization.yaml index a6a90372ec840..0df9c15d4d9c7 100644 --- a/kube/overlays/stable/kustomization.yaml +++ b/kube/overlays/stable/kustomization.yaml @@ -8,15 +8,15 @@ bases: images: - name: airbyte/db - newTag: 0.30.33-alpha + newTag: 0.30.34-alpha - name: airbyte/scheduler - newTag: 0.30.33-alpha + newTag: 0.30.34-alpha - name: airbyte/server - newTag: 0.30.33-alpha + newTag: 0.30.34-alpha - name: airbyte/webapp - newTag: 0.30.33-alpha + newTag: 0.30.34-alpha - name: airbyte/worker - newTag: 0.30.33-alpha + newTag: 0.30.34-alpha - name: temporalio/auto-setup newTag: 1.7.0 From 1b08e306974549cdcee7acd141670961a0f18ff6 Mon Sep 17 00:00:00 2001 From: Vladimir remar Date: Sat, 6 Nov 2021 20:03:55 +0100 Subject: [PATCH 77/83] :tada: Source Stripe: add checkout_sessions stream, checkout_sessions_line_item stream and promotion_codes stream (#7345) * update: add checkout_sessions and checkout_sessions_line_item streams * update: add discount object to checkout_session_line_items, update json schema and requests_params * fix: checkout_session_line_items json schema * update: add new stream promotion_codes * fix: json schemas * fix: checkout sessions line items json schema * update: add new streams to integration tests * format streams file * fix type in checkout_sessions_line_items json schema * update parse_response in CheckoutSessionsLineItems * bump version + docs * generate seed and format Co-authored-by: Marcos Marx --- .../e094cb9a-26de-4645-8761-65c0c425d1de.json | 2 +- .../resources/seed/source_definitions.yaml | 2 +- .../src/main/resources/seed/source_specs.yaml | 2 +- .../connectors/source-stripe/Dockerfile | 2 +- .../integration_tests/abnormal_state.json | 3 +- .../integration_tests/configured_catalog.json | 33 +++ .../full_refresh_configured_catalog.json | 20 ++ .../non_invoice_line_items_catalog.json | 13 + .../schemas/checkout_sessions.json | 226 ++++++++++++++++++ .../schemas/checkout_sessions_line_items.json | 151 ++++++++++++ .../schemas/promotion_codes.json | 59 +++++ .../source-stripe/source_stripe/source.py | 6 + .../source-stripe/source_stripe/streams.py | 54 ++++- docs/integrations/sources/stripe.md | 4 + 14 files changed, 571 insertions(+), 6 deletions(-) create mode 100644 airbyte-integrations/connectors/source-stripe/source_stripe/schemas/checkout_sessions.json create mode 100644 airbyte-integrations/connectors/source-stripe/source_stripe/schemas/checkout_sessions_line_items.json create mode 100644 airbyte-integrations/connectors/source-stripe/source_stripe/schemas/promotion_codes.json diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/e094cb9a-26de-4645-8761-65c0c425d1de.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/e094cb9a-26de-4645-8761-65c0c425d1de.json index 712f4c4558c82..327a7d21a5e48 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/e094cb9a-26de-4645-8761-65c0c425d1de.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/e094cb9a-26de-4645-8761-65c0c425d1de.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "e094cb9a-26de-4645-8761-65c0c425d1de", "name": "Stripe", "dockerRepository": "airbyte/source-stripe", - "dockerImageTag": "0.1.21", + "dockerImageTag": "0.1.22", "documentationUrl": "https://docs.airbyte.io/integrations/sources/stripe", "icon": "stripe.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 60f3da68e88fb..10634cd08a5ee 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -530,7 +530,7 @@ - name: Stripe sourceDefinitionId: e094cb9a-26de-4645-8761-65c0c425d1de dockerRepository: airbyte/source-stripe - dockerImageTag: 0.1.21 + dockerImageTag: 0.1.22 documentationUrl: https://docs.airbyte.io/integrations/sources/stripe icon: stripe.svg sourceType: api diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 6aa40755838be..905dbcb0617a0 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -5400,7 +5400,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-stripe:0.1.21" +- dockerImage: "airbyte/source-stripe:0.1.22" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/stripe" connectionSpecification: diff --git a/airbyte-integrations/connectors/source-stripe/Dockerfile b/airbyte-integrations/connectors/source-stripe/Dockerfile index b6467d211d351..c9d18d752c733 100644 --- a/airbyte-integrations/connectors/source-stripe/Dockerfile +++ b/airbyte-integrations/connectors/source-stripe/Dockerfile @@ -12,5 +12,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.21 +LABEL io.airbyte.version=0.1.22 LABEL io.airbyte.name=airbyte/source-stripe diff --git a/airbyte-integrations/connectors/source-stripe/integration_tests/abnormal_state.json b/airbyte-integrations/connectors/source-stripe/integration_tests/abnormal_state.json index 1703284cdb4de..309cd7f38fe47 100644 --- a/airbyte-integrations/connectors/source-stripe/integration_tests/abnormal_state.json +++ b/airbyte-integrations/connectors/source-stripe/integration_tests/abnormal_state.json @@ -13,5 +13,6 @@ "disputes": { "created": 161099630500 }, "products": { "created": 158551134100 }, "refunds": { "created": 161959562900 }, - "payment_intents": { "created": 161959562900 } + "payment_intents": { "created": 161959562900 }, + "promotion_codes": { "created": 163534157100 } } diff --git a/airbyte-integrations/connectors/source-stripe/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/source-stripe/integration_tests/configured_catalog.json index bbe8a73f9fb2d..5043eebc4b4e6 100644 --- a/airbyte-integrations/connectors/source-stripe/integration_tests/configured_catalog.json +++ b/airbyte-integrations/connectors/source-stripe/integration_tests/configured_catalog.json @@ -36,6 +36,26 @@ "destination_sync_mode": "overwrite", "cursor_field": ["created"] }, + { + "stream": { + "name": "checkout_sessions", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "checkout_sessions_line_items", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, { "stream": { "name": "coupons", @@ -186,6 +206,19 @@ "destination_sync_mode": "overwrite", "cursor_field": ["created"] }, + { + "stream": { + "name": "promotion_codes", + "json_schema": {}, + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": true, + "default_cursor_field": ["created"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "incremental", + "destination_sync_mode": "overwrite", + "cursor_field": ["created"] + }, { "stream": { "name": "refunds", diff --git a/airbyte-integrations/connectors/source-stripe/integration_tests/full_refresh_configured_catalog.json b/airbyte-integrations/connectors/source-stripe/integration_tests/full_refresh_configured_catalog.json index e820bbce21c99..7c13c3b49d540 100644 --- a/airbyte-integrations/connectors/source-stripe/integration_tests/full_refresh_configured_catalog.json +++ b/airbyte-integrations/connectors/source-stripe/integration_tests/full_refresh_configured_catalog.json @@ -18,6 +18,26 @@ }, "sync_mode": "full_refresh", "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "checkout_sessions", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "checkout_sessions_line_items", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" } ] } diff --git a/airbyte-integrations/connectors/source-stripe/integration_tests/non_invoice_line_items_catalog.json b/airbyte-integrations/connectors/source-stripe/integration_tests/non_invoice_line_items_catalog.json index 020378c924509..1467d1f2242ed 100644 --- a/airbyte-integrations/connectors/source-stripe/integration_tests/non_invoice_line_items_catalog.json +++ b/airbyte-integrations/connectors/source-stripe/integration_tests/non_invoice_line_items_catalog.json @@ -104,6 +104,19 @@ "destination_sync_mode": "overwrite", "cursor_field": ["created"] }, + { + "stream": { + "name": "promotion_codes", + "json_schema": {}, + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": true, + "default_cursor_field": ["created"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "incremental", + "destination_sync_mode": "overwrite", + "cursor_field": ["created"] + }, { "stream": { "name": "refunds", diff --git a/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/checkout_sessions.json b/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/checkout_sessions.json new file mode 100644 index 0000000000000..18e90e928a952 --- /dev/null +++ b/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/checkout_sessions.json @@ -0,0 +1,226 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "type": ["null", "object"], + "properties": { + "id": { "type": ["null", "string"] }, + "object": { "type": ["null", "string"] }, + "after_expiration": { + "type": ["null", "object"], + "properties": { + "recovery": { + "type": ["null", "object"], + "properties": { + "allow_promotion_codes": { "type": ["null", "boolean"] }, + "enabled": { "type": ["null", "boolean"] }, + "expires_at": { "type": ["null", "integer"] }, + "url": { "type": ["null", "string"] } + } + } + } + }, + "allow_promotion_codes": { "type": ["null", "boolean"] }, + "amount_subtotal": { "type": ["null", "integer"] }, + "amount_total": { "type": ["null", "integer"] }, + "automatic_tax": { + "type": ["null", "object"], + "properties": { + "enabled": { "type": ["null", "boolean"] }, + "status": { "type": ["null", "string"] } + } + }, + "billing_address_collection": { "type": ["null", "string"] }, + "cancel_url": { "type": ["null", "string"] }, + "client_reference_id": { "type": ["null", "string"] }, + "consent": { + "type": ["null", "object"], + "properties": { + "promotions": { "type": ["null", "string"] } + } + }, + "consent_collection": { + "type": ["null", "object"], + "properties": { + "promotions": { "type": ["null", "string"] } + } + }, + "currency": { "type": ["null", "string"] }, + "customer": { "type": ["null", "string"] }, + "customer_details": { + "type": ["null", "object"], + "properties": { + "email": { "type": ["null", "string"] }, + "phone": { "type": ["null", "string"] }, + "tax_exempt": { "type": ["null", "string"] }, + "tax_ids": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "type": { "type": ["null", "string"] }, + "value": { "type": ["null", "string"] } + } + } + } + } + }, + "customer_email": { "type": ["null", "string"] }, + "expires_at": { "type": ["null", "integer"] }, + "livemode": { "type": ["null", "boolean"] }, + "locale": { "type": ["null", "string"] }, + "metadata": { + "type": ["null", "object"], + "properties": {} + }, + "mode": { "type": ["null", "string"] }, + "payment_intent": { "type": ["null", "string"] }, + "payment_method_options": { + "type": ["null", "object"], + "properties": { + "acss_debit": { + "type": ["null", "object"], + "properties": { + "currency": { "type": ["null", "string"] }, + "mandate_options": { + "type": ["null", "object"], + "properties": { + "custom_mandate_url": { "type": ["null", "string"] }, + "default_for": { + "type": ["null", "array"], + "items": { + "type": ["null", "string"] + } + }, + "interval_description": { "type": ["null", "string"] }, + "payment_schedule": { "type": ["null", "string"] }, + "transaction_type": { "type": ["null", "string"] } + } + }, + "verification_method": { "type": ["null", "string"] } + } + }, + "boleto": { + "type": ["null", "object"], + "properties": { + "expires_after_days": { "type": ["null", "integer"] } + } + }, + "oxxo": { + "type": ["null", "object"], + "properties": { + "expires_after_days": { "type": ["null", "integer"] } + } + } + } + }, + "payment_method_types": { + "type": ["null", "array"], + "items": { + "card": { "type": ["null", "string"] } + } + }, + "payment_status": { "type": ["null", "string"] }, + "phone_number_collection": { + "type": ["null", "object"], + "properties": { + "enabled": { "type": ["null", "boolean"] } + } + }, + "recovered_from": { "type": ["null", "string"] }, + "setup_intent": { "type": ["null", "string"] }, + "shipping": { + "type": ["null", "object"], + "properties": { + "address": { + "type": ["null", "object"], + "properties": { + "city": { "type": ["null", "string"] }, + "country": { "type": ["null", "string"] }, + "line1": { "type": ["null", "string"] }, + "line2": { "type": ["null", "string"] }, + "postal_code": { "type": ["null", "string"] }, + "state": { "type": ["null", "string"] } + } + }, + "name": { "type": ["null", "string"] } + } + }, + "shipping_address_collection": { + "type": ["null", "object"], + "properties": { + "allowed_countries": { + "type": ["null", "array"], + "items": { + "type": ["null", "string"] + } + } + } + }, + "submit_type": { "type": ["null", "string"] }, + "subscription": { "type": ["null", "string"] }, + "success_url": { "type": ["null", "string"] }, + "tax_id_collection": { + "type": ["null", "object"], + "properties": { + "enabled": { "type": ["null", "boolean"] } + } + }, + "total_details": { + "type": ["null", "object"], + "properties": { + "amount_discount": { "type": ["null", "integer"] }, + "amount_shipping": { "type": ["null", "integer"] }, + "amount_tax": { "type": ["null", "integer"] }, + "breakdown": { + "type": ["null", "object"], + "properties": { + "discounts": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "amount": { "type": ["null", "integer"] }, + "discount": { + "type": ["null", "object"], + "properties": {} + } + } + } + }, + "taxes": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "amount": { "type": ["null", "integer"] }, + "rate": { + "type": ["null", "object"], + "properties": { + "id": { "type": ["null", "string"] }, + "object": { "type": ["null", "string"] }, + "active": { "type": ["null", "boolean"] }, + "country": { "type": ["null", "string"] }, + "created": { "type": ["null", "integer"] }, + "description": { "type": ["null", "string"] }, + "display_name": { "type": ["null", "string"] }, + "inclusive": { "type": ["null", "boolean"] }, + "jurisdiction": { "type": ["null", "string"] }, + "livemode": { "type": ["null", "boolean"] }, + "metadata": { + "type": ["null", "object"], + "properties": {} + }, + "percentage": { "type": ["null", "number"] }, + "state": { "type": ["null", "string"] }, + "tax_type": { "type": ["null", "string"] } + } + } + } + } + } + } + } + } + }, + "url": { "type": ["null", "string"] } + } +} diff --git a/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/checkout_sessions_line_items.json b/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/checkout_sessions_line_items.json new file mode 100644 index 0000000000000..3440bcab5a376 --- /dev/null +++ b/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/checkout_sessions_line_items.json @@ -0,0 +1,151 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "type": ["null", "object"], + "properties": { + "id": { "type": ["null", "string"] }, + "checkout_session_id": { "type": ["null", "string"] }, + "object": { "type": ["null", "string"] }, + "amount_subtotal": { "type": ["null", "integer"] }, + "amount_total": { "type": ["null", "integer"] }, + "currency": { "type": ["null", "string"] }, + "description": { "type": ["null", "string"] }, + "discounts": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "amount": { "type": ["null", "integer"] }, + "discount": { + "type": ["null", "object"], + "properties": { + "id": { "type": ["null", "string"] }, + "coupon": { + "type": ["null", "object"], + "properties": { + "id": { "type": ["null", "string"] }, + "amount_off": { "type": ["null", "integer"] }, + "currency": { "type": ["null", "string"] }, + "duration": { "type": ["null", "string"] }, + "duration_in_months": { "type": ["null", "integer"] }, + "metadata": { + "type": ["null", "object"], + "properties": {} + }, + "name": { "type": ["null", "string"] }, + "percent_off": { "type": ["null", "number"] }, + "object": { "type": ["null", "string"] }, + "applies_to": { + "type": ["null", "object"], + "properties": { + "products": { + "type": ["null", "array"], + "items": { "type": ["null", "string"] } + } + } + }, + "created": { "type": ["null", "integer"] }, + "livemode": { "type": ["null", "boolean"] }, + "max_redemptions": { "type": ["null", "integer"] }, + "redeem_by": { "type": ["null", "integer"] }, + "times_redeemed": { "type": ["null", "integer"] }, + "valid": { "type": ["null", "boolean"] } + } + }, + "customer": { "type": ["null", "string"] }, + "end": { "type": ["null", "integer"] }, + "start": { "type": ["null", "integer"] }, + "subscription": { "type": ["null", "string"] }, + "object": { "type": ["null", "string"] }, + "checkout_session": { "type": ["null", "string"] }, + "invoice": { "type": ["null", "string"] }, + "invoice_item": { "type": ["null", "string"] }, + "promotion_code": { "type": ["null", "string"] } + } + } + } + } + }, + "price": { + "type": ["null", "object"], + "properties": { + "id": { "type": ["null", "string"] }, + "object": { "type": ["null", "string"] }, + "active": { "type": ["null", "boolean"] }, + "billing_scheme": { "type": ["null", "string"] }, + "created": { "type": ["null", "integer"] }, + "currency": { "type": ["null", "string"] }, + "livemode": { "type": ["null", "boolean"] }, + "lookup_key": { "type": ["null", "string"] }, + "metadata": { + "type": ["null", "object"], + "properties": {} + }, + "nickname": { "type": ["null", "string"] }, + "product": { "type": ["null", "string"] }, + "recurring": { + "type": ["null", "object"], + "properties": { + "aggregate_usage": { "type": ["null", "string"] }, + "interval": { "type": ["null", "string"] }, + "interval_count": { "type": ["null", "integer"] }, + "usage_type": { "type": ["null", "string"] } + } + }, + "tax_behavior": { "type": ["null", "string"] }, + "tiers": { + "type": ["null", "object"], + "properties": { + "flat_amount": { "type": ["null", "integer"] }, + "flat_amount_decimal": { "type": ["null", "string"] }, + "unit_amount": { "type": ["null", "integer"] }, + "unit_amount_decimal": { "type": ["null", "string"] }, + "up_to": { "type": ["null", "integer"] } + } + }, + "tiers_mode": { "type": ["null", "string"] }, + "transform_quantity": { + "type": ["null", "object"], + "properties": { + "divide_by": { "type": ["null", "integer"] }, + "round": { "type": ["null", "string"] } + } + }, + "type": { "type": ["null", "string"] }, + "unit_amount": { "type": ["null", "integer"] }, + "unit_amount_decimal": { "type": ["null", "string"] } + } + }, + "quantity": { "type": ["null", "integer"] }, + "taxes": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "amount": { "types": ["null", "integer"] }, + "rate": { + "type": ["null", "object"], + "properties": { + "id": { "type": ["null", "string"] }, + "object": { "type": ["null", "string"] }, + "active": { "type": ["null", "boolean"] }, + "country": { "type": ["null", "string"] }, + "created": { "type": ["null", "integer"] }, + "description": { "type": ["null", "string"] }, + "display_name": { "type": ["null", "string"] }, + "inclusive": { "type": ["null", "boolean"] }, + "jurisdiction": { "type": ["null", "string"] }, + "livemode": { "type": ["null", "boolean"] }, + "metadata": { + "type": ["null", "object"], + "properties": {} + }, + "percentage": { "type": ["null", "number"] }, + "state": { "type": ["null", "string"] }, + "tax_type": { "type": ["null", "string"] } + } + } + } + } + } + } +} diff --git a/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/promotion_codes.json b/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/promotion_codes.json new file mode 100644 index 0000000000000..030254e5a0ab1 --- /dev/null +++ b/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/promotion_codes.json @@ -0,0 +1,59 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "type": ["null", "object"], + "properties": { + "id": { "type": ["null", "string"] }, + "code": { "type": ["null", "string"] }, + "coupon": { + "type": ["null", "object"], + "properties": { + "id": { "type": ["null", "string"] }, + "amount_off": { "type": ["null", "integer"] }, + "currency": { "type": ["null", "string"] }, + "duration": { "type": ["null", "string"] }, + "duration_in_months": { "type": ["null", "integer"] }, + "metadata": { + "type": ["null", "object"], + "properties": {} + }, + "name": { "type": ["null", "string"] }, + "percent_off": { "type": ["null", "number"] }, + "object": { "type": ["null", "string"] }, + "applies_to": { + "type": ["null", "object"], + "properties": { + "products": { + "type": ["null", "array"], + "items": { "type": ["null", "string"] } + } + } + }, + "created": { "type": ["null", "integer"] }, + "livemode": { "type": ["null", "boolean"] }, + "max_redemptions": { "type": ["null", "integer"] }, + "redeem_by": { "type": ["null", "integer"] }, + "times_redeemed": { "type": ["null", "integer"] }, + "valid": { "type": ["null", "boolean"] } + } + }, + "metadata": { + "type": ["null", "object"], + "properties": {} + }, + "object": { "type": ["null", "string"] }, + "active": { "type": ["null", "boolean"] }, + "created": { "type": ["null", "integer"] }, + "customer": { "type": ["null", "string"] }, + "expires_at": { "type": ["null", "integer"] }, + "livemode": { "type": ["null", "boolean"] }, + "max_redemptions": { "type": ["null", "integer"] }, + "restrictions": { + "type": ["null", "object"], + "properties": { + "first_time_transaction": { "type": ["null", "boolean"] }, + "minimum_amount": { "type": ["null", "integer"] }, + "minimum_amount_currency": { "type": ["null", "string"] } + } + } + } +} diff --git a/airbyte-integrations/connectors/source-stripe/source_stripe/source.py b/airbyte-integrations/connectors/source-stripe/source_stripe/source.py index 3063b3ae52d57..6ca0a21320dba 100644 --- a/airbyte-integrations/connectors/source-stripe/source_stripe/source.py +++ b/airbyte-integrations/connectors/source-stripe/source_stripe/source.py @@ -15,6 +15,8 @@ BalanceTransactions, BankAccounts, Charges, + CheckoutSessions, + CheckoutSessionsLineItems, Coupons, CustomerBalanceTransactions, Customers, @@ -27,6 +29,7 @@ Payouts, Plans, Products, + PromotionCodes, Refunds, SubscriptionItems, Subscriptions, @@ -52,6 +55,8 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: BalanceTransactions(**incremental_args), BankAccounts(**args), Charges(**incremental_args), + CheckoutSessions(**args), + CheckoutSessionsLineItems(**args), Coupons(**incremental_args), CustomerBalanceTransactions(**args), Customers(**incremental_args), @@ -64,6 +69,7 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: Payouts(**incremental_args), Plans(**incremental_args), Products(**incremental_args), + PromotionCodes(**incremental_args), Refunds(**incremental_args), SubscriptionItems(**args), Subscriptions(**incremental_args), diff --git a/airbyte-integrations/connectors/source-stripe/source_stripe/streams.py b/airbyte-integrations/connectors/source-stripe/source_stripe/streams.py index 472c8533ef5d6..febf99b61a424 100644 --- a/airbyte-integrations/connectors/source-stripe/source_stripe/streams.py +++ b/airbyte-integrations/connectors/source-stripe/source_stripe/streams.py @@ -2,7 +2,6 @@ # Copyright (c) 2021 Airbyte, Inc., all rights reserved. # - import math from abc import ABC, abstractmethod from typing import Any, Iterable, Mapping, MutableMapping, Optional @@ -348,3 +347,56 @@ def read_records(self, stream_slice: Optional[Mapping[str, Any]] = None, **kwarg customers_stream = Customers(authenticator=self.authenticator, account_id=self.account_id, start_date=self.start_date) for customer in customers_stream.read_records(sync_mode=SyncMode.full_refresh): yield from super().read_records(stream_slice={"customer_id": customer["id"]}, **kwargs) + + +class CheckoutSessions(StripeStream): + """ + API docs: https://stripe.com/docs/api/checkout/sessions/list + """ + + name = "checkout_sessions" + + def path(self, **kwargs): + return "checkout/sessions" + + +class CheckoutSessionsLineItems(StripeStream): + """ + API docs: https://stripe.com/docs/api/checkout/sessions/line_items + """ + + name = "checkout_sessions_line_items" + + def path(self, stream_slice: Mapping[str, Any] = None, **kwargs): + return f"checkout/sessions/{stream_slice['checkout_session_id']}/line_items" + + def read_records(self, stream_slice: Optional[Mapping[str, Any]] = None, **kwargs) -> Iterable[Mapping[str, Any]]: + checkout_session_stream = CheckoutSessions(authenticator=self.authenticator, account_id=self.account_id, start_date=self.start_date) + for checkout_session in checkout_session_stream.read_records(sync_mode=SyncMode.full_refresh): + yield from super().read_records(stream_slice={"checkout_session_id": checkout_session["id"]}, **kwargs) + + def request_params(self, stream_slice: Mapping[str, Any] = None, **kwargs): + params = super().request_params(stream_slice=stream_slice, **kwargs) + params["expand[]"] = "data.discounts" + return params + + def parse_response(self, response: requests.Response, stream_slice: Mapping[str, Any] = None, **kwargs) -> Iterable[Mapping]: + + response_json = response.json() + data = response_json.get("data", []) + if data and stream_slice: + cs_id = stream_slice.get("checkout_session_id", None) + for e in data: + e["checkout_session_id"] = cs_id + yield from data + + +class PromotionCodes(IncrementalStripeStream): + """ + API docs: https://stripe.com/docs/api/promotion_codes/list + """ + + cursor_field = "created" + + def path(self, **kwargs): + return "promotion_codes" diff --git a/docs/integrations/sources/stripe.md b/docs/integrations/sources/stripe.md index c5411102e1c8e..df253799f28d8 100644 --- a/docs/integrations/sources/stripe.md +++ b/docs/integrations/sources/stripe.md @@ -11,6 +11,8 @@ This Source is capable of syncing the following core Streams: * [Balance Transactions](https://stripe.com/docs/api/balance_transactions/list) \(Incremental\) * [Bank accounts](https://stripe.com/docs/api/customer_bank_accounts/list) * [Charges](https://stripe.com/docs/api/charges/list) \(Incremental\) +* [Checkout Streams](https://stripe.com/docs/api/checkout/sessions/list) +* [Checkout Streams Line Items](https://stripe.com/docs/api/checkout/sessions/line_items) * [Coupons](https://stripe.com/docs/api/coupons/list) \(Incremental\) * [Customer Balance Transactions](https://stripe.com/docs/api/customer_balance_transactions/list) * [Customers](https://stripe.com/docs/api/customers/list) \(Incremental\) @@ -21,6 +23,7 @@ This Source is capable of syncing the following core Streams: * [Invoices](https://stripe.com/docs/api/invoices/list) \(Incremental\) * [PaymentIntents](https://stripe.com/docs/api/payment_intents/list) \(Incremental\) * [Payouts](https://stripe.com/docs/api/payouts/list) \(Incremental\) +* [Promotion Code](https://stripe.com/docs/api/promotion_codes/list) \(Incremental\) * [Plans](https://stripe.com/docs/api/plans/list) \(Incremental\) * [Products](https://stripe.com/docs/api/products/list) \(Incremental\) * [Refunds](https://stripe.com/docs/api/refunds/list) \(Incremental\) @@ -71,6 +74,7 @@ If you would like to test Airbyte using test data on Stripe, `sk_test_` and `rk_ | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.1.22 | 2021-11-05 | [7345](https://github.com/airbytehq/airbyte/pull/7345) | Add 3 new streams | | 0.1.21 | 2021-10-07 | [6841](https://github.com/airbytehq/airbyte/pull/6841) | Fix missing `start_date` argument + update json files for SAT | | 0.1.20 | 2021-09-30 | [6017](https://github.com/airbytehq/airbyte/pull/6017) | Add lookback\_window\_days parameter | | 0.1.19 | 2021-09-27 | [6466](https://github.com/airbytehq/airbyte/pull/6466) | Use `start_date` parameter in incremental streams | From 0b9b8ba4728dcb6e39bc6ee961e6d6ccb26d387c Mon Sep 17 00:00:00 2001 From: Tuan Nguyen Date: Sun, 7 Nov 2021 03:32:13 +0700 Subject: [PATCH 78/83] =?UTF-8?q?=F0=9F=8E=89=20New=20Source:=20Monday=20(?= =?UTF-8?q?#7168)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * add items stream * add boards stream * add remaining streams * check point * lint * fix unit test & linting * Delete launch.json * fix schema * add retry logic * Update airbyte-integrations/connectors/source-monday/source_monday/source.py Co-authored-by: Marcos Marx * small fix + add creds * bump version and config file Co-authored-by: Marcos Marx Co-authored-by: Marcos Marx --- .github/workflows/publish-command.yml | 1 + .github/workflows/test-command.yml | 1 + .../80a54ea2-9959-4040-aac1-eee42423ec9b.json | 7 + .../resources/seed/source_definitions.yaml | 6 + .../src/main/resources/seed/source_specs.yaml | 19 +++ .../connectors/source-monday/.dockerignore | 7 + .../connectors/source-monday/Dockerfile | 38 +++++ .../connectors/source-monday/README.md | 132 +++++++++++++++++ .../source-monday/acceptance-test-config.yml | 20 +++ .../source-monday/acceptance-test-docker.sh | 16 ++ .../connectors/source-monday/build.gradle | 14 ++ .../integration_tests/__init__.py | 3 + .../integration_tests/abnormal_state.json | 5 + .../integration_tests/acceptance.py | 16 ++ .../integration_tests/configured_catalog.json | 54 +++++++ .../integration_tests/invalid_config.json | 3 + .../integration_tests/sample_config.json | 3 + .../integration_tests/sample_state.json | 5 + .../connectors/source-monday/main.py | 13 ++ .../connectors/source-monday/requirements.txt | 2 + .../connectors/source-monday/setup.py | 29 ++++ .../source-monday/source_monday/__init__.py | 8 + .../source_monday/schemas/boards.json | 82 +++++++++++ .../source_monday/schemas/items.json | 59 ++++++++ .../source_monday/schemas/teams.json | 15 ++ .../source_monday/schemas/updates.json | 30 ++++ .../source_monday/schemas/users.json | 31 ++++ .../source-monday/source_monday/source.py | 139 ++++++++++++++++++ .../source-monday/source_monday/spec.json | 17 +++ .../source-monday/unit_tests/__init__.py | 3 + .../source-monday/unit_tests/conftest.py | 13 ++ .../source-monday/unit_tests/test_source.py | 21 +++ tools/bin/ci_credentials.sh | 1 + 33 files changed, 813 insertions(+) create mode 100644 airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/80a54ea2-9959-4040-aac1-eee42423ec9b.json create mode 100644 airbyte-integrations/connectors/source-monday/.dockerignore create mode 100644 airbyte-integrations/connectors/source-monday/Dockerfile create mode 100644 airbyte-integrations/connectors/source-monday/README.md create mode 100644 airbyte-integrations/connectors/source-monday/acceptance-test-config.yml create mode 100644 airbyte-integrations/connectors/source-monday/acceptance-test-docker.sh create mode 100644 airbyte-integrations/connectors/source-monday/build.gradle create mode 100644 airbyte-integrations/connectors/source-monday/integration_tests/__init__.py create mode 100644 airbyte-integrations/connectors/source-monday/integration_tests/abnormal_state.json create mode 100644 airbyte-integrations/connectors/source-monday/integration_tests/acceptance.py create mode 100644 airbyte-integrations/connectors/source-monday/integration_tests/configured_catalog.json create mode 100644 airbyte-integrations/connectors/source-monday/integration_tests/invalid_config.json create mode 100644 airbyte-integrations/connectors/source-monday/integration_tests/sample_config.json create mode 100644 airbyte-integrations/connectors/source-monday/integration_tests/sample_state.json create mode 100644 airbyte-integrations/connectors/source-monday/main.py create mode 100644 airbyte-integrations/connectors/source-monday/requirements.txt create mode 100644 airbyte-integrations/connectors/source-monday/setup.py create mode 100644 airbyte-integrations/connectors/source-monday/source_monday/__init__.py create mode 100644 airbyte-integrations/connectors/source-monday/source_monday/schemas/boards.json create mode 100644 airbyte-integrations/connectors/source-monday/source_monday/schemas/items.json create mode 100644 airbyte-integrations/connectors/source-monday/source_monday/schemas/teams.json create mode 100644 airbyte-integrations/connectors/source-monday/source_monday/schemas/updates.json create mode 100644 airbyte-integrations/connectors/source-monday/source_monday/schemas/users.json create mode 100644 airbyte-integrations/connectors/source-monday/source_monday/source.py create mode 100644 airbyte-integrations/connectors/source-monday/source_monday/spec.json create mode 100644 airbyte-integrations/connectors/source-monday/unit_tests/__init__.py create mode 100644 airbyte-integrations/connectors/source-monday/unit_tests/conftest.py create mode 100644 airbyte-integrations/connectors/source-monday/unit_tests/test_source.py diff --git a/.github/workflows/publish-command.yml b/.github/workflows/publish-command.yml index b7b08bdf4d9d9..22684e6e1cef9 100644 --- a/.github/workflows/publish-command.yml +++ b/.github/workflows/publish-command.yml @@ -202,6 +202,7 @@ jobs: SOURCE_RETENTLY_TEST_CREDS: ${{ secrets.SOURCE_RETENTLY_TEST_CREDS }} SOURCE_SENTRY_TEST_CREDS: ${{ secrets.SOURCE_SENTRY_TEST_CREDS }} SOURCE_FRESHSALES_TEST_CREDS: ${{ secrets.SOURCE_FRESHSALES_TEST_CREDS }} + SOURCE_MONDAY_TEST_CREDS: ${{ secrets.SOURCE_MONDAY_TEST_CREDS }} - run: | echo "$SPEC_CACHE_SERVICE_ACCOUNT_KEY" > spec_cache_key_file.json && docker login -u airbytebot -p ${DOCKER_PASSWORD} ./tools/integrations/manage.sh publish airbyte-integrations/${{ github.event.inputs.connector }} ${{ github.event.inputs.run-tests }} --publish_spec_to_cache diff --git a/.github/workflows/test-command.yml b/.github/workflows/test-command.yml index 1490808c33fe7..a25bd99cce39b 100644 --- a/.github/workflows/test-command.yml +++ b/.github/workflows/test-command.yml @@ -197,6 +197,7 @@ jobs: SOURCE_RETENTLY_TEST_CREDS: ${{ secrets.SOURCE_RETENTLY_TEST_CREDS }} SOURCE_SENTRY_TEST_CREDS: ${{ secrets.SOURCE_SENTRY_TEST_CREDS }} SOURCE_FRESHSALES_TEST_CREDS: ${{ secrets.SOURCE_FRESHSALES_TEST_CREDS }} + SOURCE_MONDAY_TEST_CREDS: ${{ secrets.SOURCE_MONDAY_TEST_CREDS }} - run: | ./tools/bin/ci_integration_test.sh ${{ github.event.inputs.connector }} name: test ${{ github.event.inputs.connector }} diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/80a54ea2-9959-4040-aac1-eee42423ec9b.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/80a54ea2-9959-4040-aac1-eee42423ec9b.json new file mode 100644 index 0000000000000..6f40313e0fb65 --- /dev/null +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/80a54ea2-9959-4040-aac1-eee42423ec9b.json @@ -0,0 +1,7 @@ +{ + "sourceDefinitionId": "80a54ea2-9959-4040-aac1-eee42423ec9b", + "name": "Monday", + "dockerRepository": "airbyte/source-zendesk-monday", + "dockerImageTag": "0.1.0", + "documentationUrl": "https://docs.airbyte.io/integrations/sources/monday" +} diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 10634cd08a5ee..183c748961fad 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -338,6 +338,12 @@ documentationUrl: https://docs.airbyte.io/integrations/sources/mixpanel icon: mixpanel.svg sourceType: api +- name: Monday + sourceDefinitionId: 80a54ea2-9959-4040-aac1-eee42423ec9b + dockerRepository: airbyte/source-monday + dockerImageTag: 0.1.0 + documentationUrl: https://docs.airbyte.io/integrations/sources/monday + sourceType: api - name: MongoDb sourceDefinitionId: b2e713cd-cc36-4c0a-b5bd-b47cb8a0561e dockerRepository: airbyte/source-mongodb-v2 diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 905dbcb0617a0..e526ee27f5c3f 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -3392,6 +3392,25 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-monday:0.1.0" + spec: + documentationUrl: "https://docsurl.com" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Monday Spec" + type: "object" + required: + - "api_token" + additionalProperties: false + properties: + api_token: + type: "string" + description: "This is the API token to authenticate requests to Monday.\ + \ Profile picture (bottom left) => Admin => API" + airbyte_secret: true + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] - dockerImage: "airbyte/source-mongodb-v2:0.1.3" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/mongodb-v2" diff --git a/airbyte-integrations/connectors/source-monday/.dockerignore b/airbyte-integrations/connectors/source-monday/.dockerignore new file mode 100644 index 0000000000000..dd9f9850865ef --- /dev/null +++ b/airbyte-integrations/connectors/source-monday/.dockerignore @@ -0,0 +1,7 @@ +* +!Dockerfile +!Dockerfile.test +!main.py +!source_monday +!setup.py +!secrets diff --git a/airbyte-integrations/connectors/source-monday/Dockerfile b/airbyte-integrations/connectors/source-monday/Dockerfile new file mode 100644 index 0000000000000..bd7bf4c76adfb --- /dev/null +++ b/airbyte-integrations/connectors/source-monday/Dockerfile @@ -0,0 +1,38 @@ +FROM python:3.7.11-alpine3.14 as base + +# build and load all requirements +FROM base as builder +WORKDIR /airbyte/integration_code + +# upgrade pip to the latest version +RUN apk --no-cache upgrade \ + && pip install --upgrade pip \ + && apk --no-cache add tzdata build-base + + +COPY setup.py ./ +# install necessary packages to a temporary folder +RUN pip install --prefix=/install . + +# build a clean environment +FROM base +WORKDIR /airbyte/integration_code + +# copy all loaded and built libraries to a pure basic image +COPY --from=builder /install /usr/local +# add default timezone settings +COPY --from=builder /usr/share/zoneinfo/Etc/UTC /etc/localtime +RUN echo "Etc/UTC" > /etc/timezone + +# bash is installed for more convenient debugging. +RUN apk --no-cache add bash + +# copy payload code only +COPY main.py ./ +COPY source_monday ./source_monday + +ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" +ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] + +LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.name=airbyte/source-monday diff --git a/airbyte-integrations/connectors/source-monday/README.md b/airbyte-integrations/connectors/source-monday/README.md new file mode 100644 index 0000000000000..b876f674886ea --- /dev/null +++ b/airbyte-integrations/connectors/source-monday/README.md @@ -0,0 +1,132 @@ +# Monday Source + +This is the repository for the Monday source connector, written in Python. +For information about how to use this connector within Airbyte, see [the documentation](https://docs.airbyte.io/integrations/sources/monday). + +## Local development + +### Prerequisites +**To iterate on this connector, make sure to complete this prerequisites section.** + +#### Minimum Python version required `= 3.7.0` + +#### Build & Activate Virtual Environment and install dependencies +From this connector directory, create a virtual environment: +``` +python -m venv .venv +``` + +This will generate a virtualenv for this module in `.venv/`. Make sure this venv is active in your +development environment of choice. To activate it from the terminal, run: +``` +source .venv/bin/activate +pip install -r requirements.txt +pip install '.[tests]' +``` +If you are in an IDE, follow your IDE's instructions to activate the virtualenv. + +Note that while we are installing dependencies from `requirements.txt`, you should only edit `setup.py` for your dependencies. `requirements.txt` is +used for editable installs (`pip install -e`) to pull in Python dependencies from the monorepo and will call `setup.py`. +If this is mumbo jumbo to you, don't worry about it, just put your deps in `setup.py` but install using `pip install -r requirements.txt` and everything +should work as you expect. + +#### Building via Gradle +You can also build the connector in Gradle. This is typically used in CI and not needed for your development workflow. + +To build using Gradle, from the Airbyte repository root, run: +``` +./gradlew :airbyte-integrations:connectors:source-monday:build +``` + +#### Create credentials +**If you are a community contributor**, follow the instructions in the [documentation](https://docs.airbyte.io/integrations/sources/monday) +to generate the necessary credentials. Then create a file `secrets/config.json` conforming to the `source_monday/spec.json` file. +Note that any directory named `secrets` is gitignored across the entire Airbyte repo, so there is no danger of accidentally checking in sensitive information. +See `integration_tests/sample_config.json` for a sample config file. + +**If you are an Airbyte core member**, copy the credentials in Lastpass under the secret name `source monday test creds` +and place them into `secrets/config.json`. + +### Locally running the connector +``` +python main.py spec +python main.py check --config secrets/config.json +python main.py discover --config secrets/config.json +python main.py read --config secrets/config.json --catalog integration_tests/configured_catalog.json +``` + +### Locally running the connector docker image + +#### Build +First, make sure you build the latest Docker image: +``` +docker build . -t airbyte/source-monday:dev +``` + +You can also build the connector image via Gradle: +``` +./gradlew :airbyte-integrations:connectors:source-monday:airbyteDocker +``` +When building via Gradle, the docker image name and tag, respectively, are the values of the `io.airbyte.name` and `io.airbyte.version` `LABEL`s in +the Dockerfile. + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/source-monday:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-monday:dev check --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-monday:dev discover --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/source-monday:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json +``` +## Testing +Make sure to familiarize yourself with [pytest test discovery](https://docs.pytest.org/en/latest/goodpractices.html#test-discovery) to know how your test files and methods should be named. +First install test dependencies into your virtual environment: +``` +pip install .[tests] +``` +### Unit Tests +To run unit tests locally, from the connector directory run: +``` +python -m pytest unit_tests +``` + +### Integration Tests +There are two types of integration tests: Acceptance Tests (Airbyte's test suite for all source connectors) and custom integration tests (which are specific to this connector). +#### Custom Integration tests +Place custom tests inside `integration_tests/` folder, then, from the connector root, run +``` +python -m pytest integration_tests +``` +#### Acceptance Tests +Customize `acceptance-test-config.yml` file to configure tests. See [Source Acceptance Tests](https://docs.airbyte.io/connector-development/testing-connectors/source-acceptance-tests-reference) for more information. +If your connector requires to create or destroy resources for use during acceptance tests create fixtures for it and place them inside integration_tests/acceptance.py. +To run your integration tests with acceptance tests, from the connector root, run +``` +python -m pytest integration_tests -p integration_tests.acceptance +``` +To run your integration tests with docker + +### Using gradle to run tests +All commands should be run from airbyte project root. +To run unit tests: +``` +./gradlew :airbyte-integrations:connectors:source-monday:unitTest +``` +To run acceptance and custom integration tests: +``` +./gradlew :airbyte-integrations:connectors:source-monday:integrationTest +``` + +## Dependency Management +All of your dependencies should go in `setup.py`, NOT `requirements.txt`. The requirements file is only used to connect internal Airbyte dependencies in the monorepo for local development. +We split dependencies between two groups, dependencies that are: +* required for your connector to work need to go to `MAIN_REQUIREMENTS` list. +* required for the testing need to go to `TEST_REQUIREMENTS` list + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing unit and integration tests. +1. Bump the connector version in `Dockerfile` -- just increment the value of the `LABEL io.airbyte.version` appropriately (we use [SemVer](https://semver.org/)). +1. Create a Pull Request. +1. Pat yourself on the back for being an awesome contributor. +1. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. diff --git a/airbyte-integrations/connectors/source-monday/acceptance-test-config.yml b/airbyte-integrations/connectors/source-monday/acceptance-test-config.yml new file mode 100644 index 0000000000000..18d63f2b9cdf6 --- /dev/null +++ b/airbyte-integrations/connectors/source-monday/acceptance-test-config.yml @@ -0,0 +1,20 @@ +# See [Source Acceptance Tests](https://docs.airbyte.io/connector-development/testing-connectors/source-acceptance-tests-reference) +# for more information about how to configure these tests +connector_image: airbyte/source-monday:dev +tests: + spec: + - spec_path: "source_monday/spec.json" + connection: + - config_path: "secrets/config.json" + status: "succeed" + - config_path: "integration_tests/invalid_config.json" + status: "failed" + discovery: + - config_path: "secrets/config.json" + basic_read: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" + empty_streams: ["teams"] + full_refresh: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" diff --git a/airbyte-integrations/connectors/source-monday/acceptance-test-docker.sh b/airbyte-integrations/connectors/source-monday/acceptance-test-docker.sh new file mode 100644 index 0000000000000..e4d8b1cef8961 --- /dev/null +++ b/airbyte-integrations/connectors/source-monday/acceptance-test-docker.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env sh + +# Build latest connector image +docker build . -t $(cat acceptance-test-config.yml | grep "connector_image" | head -n 1 | cut -d: -f2) + +# Pull latest acctest image +docker pull airbyte/source-acceptance-test:latest + +# Run +docker run --rm -it \ + -v /var/run/docker.sock:/var/run/docker.sock \ + -v /tmp:/tmp \ + -v $(pwd):/test_input \ + airbyte/source-acceptance-test \ + --acceptance-test-config /test_input + diff --git a/airbyte-integrations/connectors/source-monday/build.gradle b/airbyte-integrations/connectors/source-monday/build.gradle new file mode 100644 index 0000000000000..3a1003739141c --- /dev/null +++ b/airbyte-integrations/connectors/source-monday/build.gradle @@ -0,0 +1,14 @@ +plugins { + id 'airbyte-python' + id 'airbyte-docker' + id 'airbyte-source-acceptance-test' +} + +airbytePython { + moduleDirectory 'source_monday' +} + +dependencies { + implementation files(project(':airbyte-integrations:bases:source-acceptance-test').airbyteDocker.outputs) + implementation files(project(':airbyte-integrations:bases:base-python').airbyteDocker.outputs) +} diff --git a/airbyte-integrations/connectors/source-monday/integration_tests/__init__.py b/airbyte-integrations/connectors/source-monday/integration_tests/__init__.py new file mode 100644 index 0000000000000..46b7376756ec6 --- /dev/null +++ b/airbyte-integrations/connectors/source-monday/integration_tests/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# diff --git a/airbyte-integrations/connectors/source-monday/integration_tests/abnormal_state.json b/airbyte-integrations/connectors/source-monday/integration_tests/abnormal_state.json new file mode 100644 index 0000000000000..52b0f2c2118f4 --- /dev/null +++ b/airbyte-integrations/connectors/source-monday/integration_tests/abnormal_state.json @@ -0,0 +1,5 @@ +{ + "todo-stream-name": { + "todo-field-name": "todo-abnormal-value" + } +} diff --git a/airbyte-integrations/connectors/source-monday/integration_tests/acceptance.py b/airbyte-integrations/connectors/source-monday/integration_tests/acceptance.py new file mode 100644 index 0000000000000..58c194c5d1376 --- /dev/null +++ b/airbyte-integrations/connectors/source-monday/integration_tests/acceptance.py @@ -0,0 +1,16 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +import pytest + +pytest_plugins = ("source_acceptance_test.plugin",) + + +@pytest.fixture(scope="session", autouse=True) +def connector_setup(): + """ This fixture is a placeholder for external resources that acceptance test might require.""" + # TODO: setup test dependencies if needed. otherwise remove the TODO comments + yield + # TODO: clean up test dependencies diff --git a/airbyte-integrations/connectors/source-monday/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/source-monday/integration_tests/configured_catalog.json new file mode 100644 index 0000000000000..a44855e5ae74a --- /dev/null +++ b/airbyte-integrations/connectors/source-monday/integration_tests/configured_catalog.json @@ -0,0 +1,54 @@ +{ + "streams": [ + { + "stream": { + "name": "items", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "boards", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "teams", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "updates", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "users", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + } + ] +} diff --git a/airbyte-integrations/connectors/source-monday/integration_tests/invalid_config.json b/airbyte-integrations/connectors/source-monday/integration_tests/invalid_config.json new file mode 100644 index 0000000000000..801a78c2451c9 --- /dev/null +++ b/airbyte-integrations/connectors/source-monday/integration_tests/invalid_config.json @@ -0,0 +1,3 @@ +{ + "api_token": "abcd" +} diff --git a/airbyte-integrations/connectors/source-monday/integration_tests/sample_config.json b/airbyte-integrations/connectors/source-monday/integration_tests/sample_config.json new file mode 100644 index 0000000000000..e14d519d95824 --- /dev/null +++ b/airbyte-integrations/connectors/source-monday/integration_tests/sample_config.json @@ -0,0 +1,3 @@ +{ + "api_token": "12345abc" +} diff --git a/airbyte-integrations/connectors/source-monday/integration_tests/sample_state.json b/airbyte-integrations/connectors/source-monday/integration_tests/sample_state.json new file mode 100644 index 0000000000000..3587e579822d0 --- /dev/null +++ b/airbyte-integrations/connectors/source-monday/integration_tests/sample_state.json @@ -0,0 +1,5 @@ +{ + "todo-stream-name": { + "todo-field-name": "value" + } +} diff --git a/airbyte-integrations/connectors/source-monday/main.py b/airbyte-integrations/connectors/source-monday/main.py new file mode 100644 index 0000000000000..97c4bafc81010 --- /dev/null +++ b/airbyte-integrations/connectors/source-monday/main.py @@ -0,0 +1,13 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +import sys + +from airbyte_cdk.entrypoint import launch +from source_monday import SourceMonday + +if __name__ == "__main__": + source = SourceMonday() + launch(source, sys.argv[1:]) diff --git a/airbyte-integrations/connectors/source-monday/requirements.txt b/airbyte-integrations/connectors/source-monday/requirements.txt new file mode 100644 index 0000000000000..0411042aa0911 --- /dev/null +++ b/airbyte-integrations/connectors/source-monday/requirements.txt @@ -0,0 +1,2 @@ +-e ../../bases/source-acceptance-test +-e . diff --git a/airbyte-integrations/connectors/source-monday/setup.py b/airbyte-integrations/connectors/source-monday/setup.py new file mode 100644 index 0000000000000..fdb97c1c95179 --- /dev/null +++ b/airbyte-integrations/connectors/source-monday/setup.py @@ -0,0 +1,29 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +from setuptools import find_packages, setup + +MAIN_REQUIREMENTS = [ + "airbyte-cdk", +] + +TEST_REQUIREMENTS = [ + "pytest~=6.1", + "pytest-mock~=3.6.1", + "source-acceptance-test", +] + +setup( + name="source_monday", + description="Source implementation for Monday.", + author="Airbyte", + author_email="contact@airbyte.io", + packages=find_packages(), + install_requires=MAIN_REQUIREMENTS, + package_data={"": ["*.json", "schemas/*.json", "schemas/shared/*.json"]}, + extras_require={ + "tests": TEST_REQUIREMENTS, + }, +) diff --git a/airbyte-integrations/connectors/source-monday/source_monday/__init__.py b/airbyte-integrations/connectors/source-monday/source_monday/__init__.py new file mode 100644 index 0000000000000..b2995ef1eb176 --- /dev/null +++ b/airbyte-integrations/connectors/source-monday/source_monday/__init__.py @@ -0,0 +1,8 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +from .source import SourceMonday + +__all__ = ["SourceMonday"] diff --git a/airbyte-integrations/connectors/source-monday/source_monday/schemas/boards.json b/airbyte-integrations/connectors/source-monday/source_monday/schemas/boards.json new file mode 100644 index 0000000000000..196f77d9ef2e7 --- /dev/null +++ b/airbyte-integrations/connectors/source-monday/source_monday/schemas/boards.json @@ -0,0 +1,82 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "board_kind": { "type": ["null", "string"] }, + "columns": { + "type": ["null", "array"], + "properties": { + "archived": { "type": ["null", "boolean"] }, + "id": { "type": ["null", "string"] }, + "settings_str": { "type": ["null", "string"] }, + "title": { "type": ["null", "string"] }, + "type": { "type": ["null", "string"] }, + "width": { "type": ["null", "integer"] } + } + }, + "communication": { "type": ["null", "object"] }, + "description": { "type": ["null", "string"] }, + "groups": { + "type": ["null", "array"], + "properties": { + "archived": { "type": ["null", "boolean"] }, + "color": { "type": ["null", "string"] }, + "deleted": { "type": ["null", "boolean"] }, + "id": { "type": ["null", "string"] }, + "position": { "type": ["null", "string"] }, + "title": { "type": ["null", "string"] } + } + }, + "id": { "type": ["null", "integer"] }, + "name": { "type": ["null", "string"] }, + "owner": { + "type": ["null", "object"], + "properties": { + "id": { "type": ["null", "integer"] } + } + }, + "permissions": { "type": ["null", "string"] }, + "pos": { "type": ["null", "string"] }, + "state": { "type": ["null", "string"] }, + "subscribers": { + "type": ["null", "array"], + "properties": { + "id": { "type": ["null", "integer"] } + } + }, + "tags": { + "type": ["null", "array"], + "properties": { + "id": { "type": ["null", "string"] } + } + }, + "top_group": { + "type": ["null", "object"], + "properties": { + "id": { "type": ["null", "string"] } + } + }, + "updated_at": { "type": ["null", "string"], "format": "date-time" }, + "updates": { + "type": ["null", "array"], + "properties": { + "id": { "type": ["null", "integer"] } + } + }, + "views": { + "type": ["null", "array"], + "properties": { + "id": { "type": ["null", "integer"] } + } + }, + "workspace": { + "type": ["null", "object"], + "properties": { + "id": { "type": ["null", "integer"] }, + "name": { "type": ["null", "string"] }, + "kind": { "type": ["null", "string"] }, + "description": { "type": ["null", "string"] } + } + } + } +} diff --git a/airbyte-integrations/connectors/source-monday/source_monday/schemas/items.json b/airbyte-integrations/connectors/source-monday/source_monday/schemas/items.json new file mode 100644 index 0000000000000..9ce9ea04479f9 --- /dev/null +++ b/airbyte-integrations/connectors/source-monday/source_monday/schemas/items.json @@ -0,0 +1,59 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "assets": { + "type": ["array", "object"], + "properties": { + "id": { "type": ["null", "integer"] } + } + }, + "board": { + "type": ["null", "object"], + "properties": { + "id": { "type": ["null", "integer"] } + } + }, + "column_values": { + "type": ["null", "array"], + "properties": { + "id": { "type": ["null", "integer"] }, + "value": { "type": ["null", "object"] }, + "additional_info": { "type": ["null", "object"] }, + "text": { "type": ["null", "string"] }, + "title": { "type": ["null", "string"] }, + "type": { "type": ["null", "string"] } + } + }, + "created_at": { "type": ["null", "string"], "format": "date-time" }, + "creator_id": { "type": ["null", "integer"] }, + "group": { + "type": ["null", "object"], + "properties": { + "id": { "type": ["null", "string"] } + } + }, + "id": { "type": ["null", "integer"] }, + "name": { "type": ["null", "string"] }, + "parent_item": { + "type": ["null", "object"], + "properties": { + "id": { "type": ["null", "integer"] } + } + }, + "state": { "type": ["null", "string"] }, + "subscribers": { + "type": ["null", "array"], + "properties": { + "id": { "type": ["null", "integer"] } + } + }, + "updated_at": { "type": ["null", "string"], "format": "date-time" }, + "updates": { + "type": ["null", "array"], + "properties": { + "id": { "type": ["null", "integer"] } + } + } + } +} diff --git a/airbyte-integrations/connectors/source-monday/source_monday/schemas/teams.json b/airbyte-integrations/connectors/source-monday/source_monday/schemas/teams.json new file mode 100644 index 0000000000000..c409d05b6bbdb --- /dev/null +++ b/airbyte-integrations/connectors/source-monday/source_monday/schemas/teams.json @@ -0,0 +1,15 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "id": { "type": ["null", "integer"] }, + "name": { "type": ["null", "string"] }, + "picture_url": { "type": ["null", "string"] }, + "users": { + "type": ["null", "array"], + "properties": { + "id": { "type": ["null", "integer"] } + } + } + } +} diff --git a/airbyte-integrations/connectors/source-monday/source_monday/schemas/updates.json b/airbyte-integrations/connectors/source-monday/source_monday/schemas/updates.json new file mode 100644 index 0000000000000..11a3164a6fad0 --- /dev/null +++ b/airbyte-integrations/connectors/source-monday/source_monday/schemas/updates.json @@ -0,0 +1,30 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "assets": { + "type": ["null", "array"], + "properties": { + "id": { "type": ["null", "integer"] } + } + }, + "body": { "type": ["null", "string"] }, + "created_at": { "type": ["null", "string"], "format": "date-time" }, + "creator_id": { "type": ["null", "integer"] }, + "id": { "type": ["null", "integer"] }, + "item_id": { "type": ["null", "integer"] }, + "replies": { + "type": ["null", "array"], + "properties": { + "id": { "type": ["null", "integer"] }, + "creator_id": { "type": ["null", "integer"] }, + "created_at": { "type": ["null", "string"], "format": "date-time" }, + "text_body": { "type": ["null", "string"] }, + "updated_at": { "type": ["null", "string"], "format": "date-time" }, + "body": { "type": ["null", "string"] } + } + }, + "text_body": { "type": ["null", "string"] }, + "updated_at": { "type": ["null", "string"], "format": "date-time" } + } +} diff --git a/airbyte-integrations/connectors/source-monday/source_monday/schemas/users.json b/airbyte-integrations/connectors/source-monday/source_monday/schemas/users.json new file mode 100644 index 0000000000000..a064bdc3f4bca --- /dev/null +++ b/airbyte-integrations/connectors/source-monday/source_monday/schemas/users.json @@ -0,0 +1,31 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "birthday": { "type": ["null", "string"], "format": "date-time" }, + "country_code": { "type": ["null", "string"] }, + "created_at": { "type": ["null", "string"], "format": "date-time" }, + "join_date": { "type": ["null", "string"], "format": "date" }, + "email": { "type": ["null", "string"] }, + "enabled": { "type": ["null", "boolean"] }, + "id": { "type": ["null", "integer"] }, + "is_admin": { "type": ["null", "boolean"] }, + "is_guest": { "type": ["null", "boolean"] }, + "is_pending": { "type": ["null", "boolean"] }, + "is_view_only": { "type": ["null", "boolean"] }, + "is_verified": { "type": ["null", "boolean"] }, + "location": { "type": ["null", "string"] }, + "mobile_phone": { "type": ["null", "string"] }, + "name": { "type": ["null", "string"] }, + "phone": { "type": ["null", "string"] }, + "photo_original": { "type": ["null", "string"] }, + "photo_small": { "type": ["null", "string"] }, + "photo_thumb": { "type": ["null", "string"] }, + "photo_thumb_small": { "type": ["null", "string"] }, + "photo_tiny": { "type": ["null", "string"] }, + "time_zone_identifier": { "type": ["null", "string"] }, + "title": { "type": ["null", "string"] }, + "url": { "type": ["null", "string"] }, + "utc_hours_diff": { "type": ["null", "integer"] } + } +} diff --git a/airbyte-integrations/connectors/source-monday/source_monday/source.py b/airbyte-integrations/connectors/source-monday/source_monday/source.py new file mode 100644 index 0000000000000..2e0cab5d0815f --- /dev/null +++ b/airbyte-integrations/connectors/source-monday/source_monday/source.py @@ -0,0 +1,139 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +import json +import os +from abc import ABC +from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple + +import requests +from airbyte_cdk.sources import AbstractSource +from airbyte_cdk.sources.streams import Stream +from airbyte_cdk.sources.streams.http import HttpStream +from airbyte_cdk.sources.streams.http.auth import TokenAuthenticator +from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer + + +# Basic full refresh stream +class MondayStream(HttpStream, ABC): + url_base = "https://api.monday.com/v2" + primary_key = "id" + page = 1 + transformer: TypeTransformer = TypeTransformer(TransformConfig.DefaultSchemaNormalization) + + def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: + json_response = response.json().get("data", {}) + records = json_response.get(self.name.lower(), []) + self.page += 1 + if records: + return {"page": self.page} + + def load_schema(self): + """ + Load schema from file and make a GraphQL query + """ + script_dir = os.path.dirname(__file__) + schema_path = os.path.join(script_dir, f"schemas/{self.name.lower()}.json") + with open(schema_path) as f: + schema_dict = json.load(f) + schema = schema_dict["properties"] + graphql_schema = [] + for col in schema: + if "properties" in schema[col]: + nested_ids = ",".join(schema[col]["properties"]) + graphql_schema.append(f"{col}{{{nested_ids}}}") + else: + graphql_schema.append(col) + return ",".join(graphql_schema) + + def should_retry(self, response: requests.Response) -> bool: + # Monday API return code 200 with and errors key if complexity is too high. + # https://api.developer.monday.com/docs/complexity-queries + is_complex_query = response.json().get("errors") + return response.status_code == 429 or 500 <= response.status_code < 600 or is_complex_query + + @property + def retry_factor(self) -> int: + return 15 + + def request_params( + self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, any] = None, next_page_token: Mapping[str, Any] = None + ) -> MutableMapping[str, Any]: + graphql_params = {} + if next_page_token: + graphql_params.update(next_page_token) + + graphql_query = ",".join([f"{k}:{v}" for k, v in graphql_params.items()]) + + # Monday uses a query string to pass in environments + params = {"query": f"query {{ {self.name.lower()} ({graphql_query}) {{ {self.load_schema()} }} }}"} + return params + + def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: + json_response = response.json().get("data", {}) + records = json_response.get(self.name.lower(), []) + yield from records + + def path( + self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None + ) -> str: + return "" + + +class Items(MondayStream): + """ + API Documentation: https://api.developer.monday.com/docs/items-queries + """ + + +class Boards(MondayStream): + """ + API Documentation: https://api.developer.monday.com/docs/groups-queries#groups-queries + """ + + +class Teams(MondayStream): + """ + API Documentation: https://api.developer.monday.com/docs/teams-queries + """ + + +class Updates(MondayStream): + """ + API Documentation: https://api.developer.monday.com/docs/updates-queries + """ + + +class Users(MondayStream): + """ + API Documentation: https://api.developer.monday.com/docs/users-queries-1 + """ + + def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: + pass + + +# Source +class SourceMonday(AbstractSource): + def check_connection(self, logger, config) -> Tuple[bool, any]: + url = "https://api.monday.com/v2" + params = {"query": "{boards(limit:1){id name}}"} + auth = TokenAuthenticator(config["api_token"]).get_auth_header() + try: + response = requests.post(url, params=params, headers=auth) + response.raise_for_status() + return True, None + except requests.exceptions.RequestException as e: + return False, e + + def streams(self, config: Mapping[str, Any]) -> List[Stream]: + auth = TokenAuthenticator(token=config["api_token"]) + return [ + Items(authenticator=auth), + Boards(authenticator=auth), + Teams(authenticator=auth), + Updates(authenticator=auth), + Users(authenticator=auth), + ] diff --git a/airbyte-integrations/connectors/source-monday/source_monday/spec.json b/airbyte-integrations/connectors/source-monday/source_monday/spec.json new file mode 100644 index 0000000000000..870dd5ac587d4 --- /dev/null +++ b/airbyte-integrations/connectors/source-monday/source_monday/spec.json @@ -0,0 +1,17 @@ +{ + "documentationUrl": "https://docsurl.com", + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Monday Spec", + "type": "object", + "required": ["api_token"], + "additionalProperties": false, + "properties": { + "api_token": { + "type": "string", + "description": "This is the API token to authenticate requests to Monday. Profile picture (bottom left) => Admin => API", + "airbyte_secret": true + } + } + } +} diff --git a/airbyte-integrations/connectors/source-monday/unit_tests/__init__.py b/airbyte-integrations/connectors/source-monday/unit_tests/__init__.py new file mode 100644 index 0000000000000..46b7376756ec6 --- /dev/null +++ b/airbyte-integrations/connectors/source-monday/unit_tests/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# diff --git a/airbyte-integrations/connectors/source-monday/unit_tests/conftest.py b/airbyte-integrations/connectors/source-monday/unit_tests/conftest.py new file mode 100644 index 0000000000000..d03c2820311d0 --- /dev/null +++ b/airbyte-integrations/connectors/source-monday/unit_tests/conftest.py @@ -0,0 +1,13 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + +import json + +import pytest + + +@pytest.fixture(scope="session", name="config") +def config_fixture(): + with open("secrets/config.json", "r") as config_file: + return json.load(config_file) diff --git a/airbyte-integrations/connectors/source-monday/unit_tests/test_source.py b/airbyte-integrations/connectors/source-monday/unit_tests/test_source.py new file mode 100644 index 0000000000000..ed8c514aacbf1 --- /dev/null +++ b/airbyte-integrations/connectors/source-monday/unit_tests/test_source.py @@ -0,0 +1,21 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + +from unittest.mock import MagicMock + +from source_monday.source import SourceMonday + + +def test_check_connection(mocker, config): + source = SourceMonday() + logger_mock = MagicMock() + assert source.check_connection(logger_mock, config) == (True, None) + + +def test_stream_count(mocker): + source = SourceMonday() + config_mock = MagicMock() + streams = source.streams(config_mock) + expected_streams_number = 5 + assert len(streams) == expected_streams_number diff --git a/tools/bin/ci_credentials.sh b/tools/bin/ci_credentials.sh index 6936d2e3a54a3..cd1742f8f25e5 100755 --- a/tools/bin/ci_credentials.sh +++ b/tools/bin/ci_credentials.sh @@ -111,6 +111,7 @@ write_standard_creds source-mailchimp "$MAILCHIMP_TEST_CREDS" write_standard_creds source-marketo "$SOURCE_MARKETO_TEST_CREDS" write_standard_creds source-microsoft-teams "$MICROSOFT_TEAMS_TEST_CREDS" write_standard_creds source-mixpanel "$MIXPANEL_INTEGRATION_TEST_CREDS" +write_standard_creds source-monday "$SOURCE_MONDAY_TEST_CREDS" write_standard_creds source-mongodb-strict-encrypt "$MONGODB_TEST_CREDS" "credentials.json" write_standard_creds source-mongodb-v2 "$MONGODB_TEST_CREDS" "credentials.json" write_standard_creds source-mssql "$MSSQL_RDS_TEST_CREDS" From 4a5751ae7d7b422fa59123caa83e75c1587d3bda Mon Sep 17 00:00:00 2001 From: Jared Rhizor Date: Sun, 7 Nov 2021 23:00:23 -0800 Subject: [PATCH 79/83] m1 build fixes (#7720) --- .../airbyte/commons/util/AutoCloseableIteratorsTest.java | 9 ++++++--- docker-compose.build-m1.yaml | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/airbyte-commons/src/test/java/io/airbyte/commons/util/AutoCloseableIteratorsTest.java b/airbyte-commons/src/test/java/io/airbyte/commons/util/AutoCloseableIteratorsTest.java index 26f160c06a1f6..145e6565454e6 100644 --- a/airbyte-commons/src/test/java/io/airbyte/commons/util/AutoCloseableIteratorsTest.java +++ b/airbyte-commons/src/test/java/io/airbyte/commons/util/AutoCloseableIteratorsTest.java @@ -9,7 +9,6 @@ import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.never; -import static org.mockito.Mockito.spy; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; @@ -18,6 +17,7 @@ import io.airbyte.commons.concurrency.VoidCallable; import java.util.Iterator; import java.util.List; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.stream.Stream; import org.junit.jupiter.api.Test; @@ -38,7 +38,10 @@ void testFromIterator() throws Exception { @Test void testFromStream() throws Exception { - final Stream stream = spy(Stream.of("a", "b", "c")); + final AtomicBoolean isClosed = new AtomicBoolean(false); + final Stream stream = Stream.of("a", "b", "c"); + stream.onClose(() -> isClosed.set(true)); + final AutoCloseableIterator iterator = AutoCloseableIterators.fromStream(stream); assertNext(iterator, "a"); @@ -46,7 +49,7 @@ void testFromStream() throws Exception { assertNext(iterator, "c"); iterator.close(); - verify(stream).close(); + assertTrue(isClosed.get()); } private void assertNext(final Iterator iterator, final String value) { diff --git a/docker-compose.build-m1.yaml b/docker-compose.build-m1.yaml index 71e8dc4f6d5e1..2dc62df2c91cb 100644 --- a/docker-compose.build-m1.yaml +++ b/docker-compose.build-m1.yaml @@ -41,7 +41,7 @@ services: build: dockerfile: Dockerfile args: - ARCH: ${DOCKER_BUILD_ARCH} + DOCKER_BUILD_ARCH: ${DOCKER_BUILD_ARCH} JDK_VERSION: ${JDK_VERSION} context: airbyte-workers labels: From 154ecceda0350b33b840aab076b6a57b9aad358b Mon Sep 17 00:00:00 2001 From: Jared Rhizor Date: Sun, 7 Nov 2021 23:01:08 -0800 Subject: [PATCH 80/83] improve link checker (#6797) * improve link checker * use ref instead of base_ref * remove base, always compare to master for modified * add failing to test * don't do quiet for testing * switch error to 404 not 403 * yes to both * turn off verbose mode * fix * actually check things * fix outstanding link problems * revet change to run for everything * use new format * ignore gitbook failures * switch back to only running on master --- .github/workflows/doc-link-check.json | 6 +++++- .github/workflows/doc-link-check.yml | 16 ++++++++++++---- docs/deploying-airbyte/on-kubernetes.md | 4 ++-- docs/integrations/sources/amplitude.md | 2 +- docs/integrations/sources/iterable.md | 2 +- docs/integrations/sources/klaviyo.md | 1 + docs/integrations/sources/mixpanel.md | 3 ++- docs/integrations/sources/trello.md | 2 +- 8 files changed, 25 insertions(+), 11 deletions(-) diff --git a/.github/workflows/doc-link-check.json b/.github/workflows/doc-link-check.json index 6777f5033b01c..a7683d73d66bd 100644 --- a/.github/workflows/doc-link-check.json +++ b/.github/workflows/doc-link-check.json @@ -45,11 +45,15 @@ "reason": "Test only scaffold connector", "pattern": "destinations/scaffold-" }, + { + "reason": "Returns a 403 for many valid pages", + "pattern": "https://mvnrepository.com/artifact/" + }, { "reason": "Archived articles aren't actively maintained.", "pattern": "archive/" } ], "retryOn429": false, - "aliveStatusCodes": [200, 206, 401, 403, 429, 503] + "aliveStatusCodes": [200, 206, 429, 503, 0] } diff --git a/.github/workflows/doc-link-check.yml b/.github/workflows/doc-link-check.yml index 9ccfc86d846bf..1a52e49cd9b07 100644 --- a/.github/workflows/doc-link-check.yml +++ b/.github/workflows/doc-link-check.yml @@ -1,5 +1,6 @@ # Perform link check on all markdown files -name: Doc Link Checker (Full) + +name: Doc Link Checker on: push: @@ -12,10 +13,17 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@master + # check all files on master - uses: gaurav-nelson/github-action-markdown-link-check@v1 + if: github.ref == 'refs/heads/master' with: - use-quiet-mode: 'no' - use-verbose-mode: 'yes' + use-quiet-mode: 'yes' check-modified-files-only: 'no' config-file: .github/workflows/doc-link-check.json - base-branch: ${{ github.base_ref }} +# # check changed files for branches +# - uses: gaurav-nelson/github-action-markdown-link-check@v1 +# if: github.ref != 'refs/heads/master' +# with: +# use-quiet-mode: 'yes' +# check-modified-files-only: 'yes' +# config-file: .github/workflows/doc-link-check.json diff --git a/docs/deploying-airbyte/on-kubernetes.md b/docs/deploying-airbyte/on-kubernetes.md index eece6c1f7ad41..48b3ee793d262 100644 --- a/docs/deploying-airbyte/on-kubernetes.md +++ b/docs/deploying-airbyte/on-kubernetes.md @@ -10,8 +10,8 @@ Airbyte allows scaling sync workloads horizontally using Kubernetes. The core co For local testing we recommend following one of the following setup guides: -* [Docker Desktop \(Mac\)](https://docs.docker.com/desktop/kubernetes/) -* [Minikube](https://minikube.sigs.k8s.io/docs/start/) +* [Docker Desktop \(Mac\)](https://docs.docker.com/desktop/kubernetes) +* [Minikube](https://minikube.sigs.k8s.io/docs/start) * NOTE: Start Minikube with at least 4gb RAM with `minikube start --memory=4000` * [Kind](https://kind.sigs.k8s.io/docs/user/quick-start/) diff --git a/docs/integrations/sources/amplitude.md b/docs/integrations/sources/amplitude.md index 8ec31cf76db2f..ab694e5c6aadf 100644 --- a/docs/integrations/sources/amplitude.md +++ b/docs/integrations/sources/amplitude.md @@ -38,7 +38,7 @@ The Amplitude connector should gracefully handle Amplitude API limitations under * Amplitude Secret Key ### Setup guide - + Please read [How to get your API key and Secret key](https://help.amplitude.com/hc/en-us/articles/360058073772-Create-and-manage-organizations-and-projects#view-and-edit-your-project-information). ## Changelog diff --git a/docs/integrations/sources/iterable.md b/docs/integrations/sources/iterable.md index db20b0671f304..9b9830a8af0b9 100644 --- a/docs/integrations/sources/iterable.md +++ b/docs/integrations/sources/iterable.md @@ -51,7 +51,7 @@ The Iterable connector should not run into Iterable API limitations under normal * Iterable API Key ### Setup guide - + Please read [How to find your API key](https://support.iterable.com/hc/en-us/articles/360043464871-API-Keys-#creating-api-keys). ## CHANGELOG diff --git a/docs/integrations/sources/klaviyo.md b/docs/integrations/sources/klaviyo.md index f2a293e714ae1..04dffda082f58 100644 --- a/docs/integrations/sources/klaviyo.md +++ b/docs/integrations/sources/klaviyo.md @@ -44,6 +44,7 @@ The Klaviyo connector should not run into Klaviyo API limitations under normal u ### Setup guide + Please follow these [steps](https://help.klaviyo.com/hc/en-us/articles/115005062267-How-to-Manage-Your-Account-s-API-Keys#your-private-api-keys3) to obtain Private API Key for your account. ## CHANGELOG diff --git a/docs/integrations/sources/mixpanel.md b/docs/integrations/sources/mixpanel.md index e8b1ce633efe1..4249393b686d6 100644 --- a/docs/integrations/sources/mixpanel.md +++ b/docs/integrations/sources/mixpanel.md @@ -47,9 +47,10 @@ The Mixpanel connector should not run into Mixpanel API limitations under normal * Project region `US` or `EU` ### Setup guide - + Please read [Find API Secret](https://help.mixpanel.com/hc/en-us/articles/115004502806-Find-Project-Token-). + Select the correct region \(EU or US\) for your Mixpanel project. See detail [here](https://help.mixpanel.com/hc/en-us/articles/360039135652-Data-Residency-in-EU) ## CHANGELOG diff --git a/docs/integrations/sources/trello.md b/docs/integrations/sources/trello.md index 1be02dc448440..cda4717fd1c19 100644 --- a/docs/integrations/sources/trello.md +++ b/docs/integrations/sources/trello.md @@ -42,7 +42,7 @@ The Trello connector should not run into Trello API limitations under normal usa * Trello API Key ### Setup guide - + Please read [How to get your APIs Token and Key](https://developer.atlassian.com/cloud/trello/guides/rest-api/authorization/#using-basic-oauth) or you can log in to Trello and visit [Developer API Keys](https://trello.com/app-key/). ## Changelog From af903f23817070328db00403dde641c80eb684eb Mon Sep 17 00:00:00 2001 From: Serhii Chvaliuk Date: Mon, 8 Nov 2021 10:23:51 +0200 Subject: [PATCH 81/83] =?UTF-8?q?=F0=9F=8E=89=20Source=20Recharge:=20impro?= =?UTF-8?q?ve=20'backoff'=20for=20HTTP=20requests=20(#7626)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * backoff for successful but incomplete responses Signed-off-by: Sergey Chvalyuk --- .../45d2e135-2ede-49e1-939f-3e3ec357a65e.json | 2 +- .../init/src/main/resources/seed/source_definitions.yaml | 2 +- .../connectors/source-recharge/Dockerfile | 2 +- .../connectors/source-recharge/source_recharge/api.py | 9 +++++++++ docs/integrations/sources/recharge.md | 1 + 5 files changed, 13 insertions(+), 3 deletions(-) diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/45d2e135-2ede-49e1-939f-3e3ec357a65e.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/45d2e135-2ede-49e1-939f-3e3ec357a65e.json index 92988179fb2e4..eb89c73307bf8 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/45d2e135-2ede-49e1-939f-3e3ec357a65e.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/45d2e135-2ede-49e1-939f-3e3ec357a65e.json @@ -2,6 +2,6 @@ "sourceDefinitionId": "45d2e135-2ede-49e1-939f-3e3ec357a65e", "name": "Recharge", "dockerRepository": "airbyte/source-recharge", - "dockerImageTag": "0.1.3", + "dockerImageTag": "0.1.4", "documentationUrl": "https://docs.airbyte.io/integrations/sources/recharge" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 183c748961fad..ac93faba15118 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -436,7 +436,7 @@ - name: Recharge sourceDefinitionId: 45d2e135-2ede-49e1-939f-3e3ec357a65e dockerRepository: airbyte/source-recharge - dockerImageTag: 0.1.3 + dockerImageTag: 0.1.4 documentationUrl: https://docs.airbyte.io/integrations/sources/recharge sourceType: api - name: Recurly diff --git a/airbyte-integrations/connectors/source-recharge/Dockerfile b/airbyte-integrations/connectors/source-recharge/Dockerfile index 4292222431b21..b91a5d04fcafb 100644 --- a/airbyte-integrations/connectors/source-recharge/Dockerfile +++ b/airbyte-integrations/connectors/source-recharge/Dockerfile @@ -12,5 +12,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.3 +LABEL io.airbyte.version=0.1.4 LABEL io.airbyte.name=airbyte/source-recharge diff --git a/airbyte-integrations/connectors/source-recharge/source_recharge/api.py b/airbyte-integrations/connectors/source-recharge/source_recharge/api.py index a7f7bcbfc4934..30e922385db03 100644 --- a/airbyte-integrations/connectors/source-recharge/source_recharge/api.py +++ b/airbyte-integrations/connectors/source-recharge/source_recharge/api.py @@ -56,6 +56,15 @@ def get_stream_data(self, response_data: Any) -> List[dict]: else: return [response_data] + def should_retry(self, response: requests.Response) -> bool: + res = super().should_retry(response) + if res: + return res + + # For some reason, successful responses contains incomplete data + content_length = int(response.headers.get("Content-Length", 0)) + return response.status_code == 200 and content_length > len(response.content) + class IncrementalRechargeStream(RechargeStream, ABC): diff --git a/docs/integrations/sources/recharge.md b/docs/integrations/sources/recharge.md index f2eeafb1b8448..0445abcbdae62 100644 --- a/docs/integrations/sources/recharge.md +++ b/docs/integrations/sources/recharge.md @@ -50,6 +50,7 @@ Please read [How to generate your API token](https://support.rechargepayments.co | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.1.4 | 2021-11-05 | [7626](https://github.com/airbytehq/airbyte/pull/7626) | Improve 'backoff' for HTTP requests | | 0.1.3 | 2021-09-17 | [6149](https://github.com/airbytehq/airbyte/pull/6149) | Update `discount` and `order` schema | | 0.1.2 | 2021-09-17 | [6149](https://github.com/airbytehq/airbyte/pull/6149) | Change `cursor_field` for Incremental streams | | | | | | From f0c54a7c0a63d2943ced2008b4796e5d2af13d14 Mon Sep 17 00:00:00 2001 From: Vadym Date: Mon, 8 Nov 2021 10:32:39 +0200 Subject: [PATCH 82/83] =?UTF-8?q?=F0=9F=8E=89=20Source=20Greenhouse:=20Imp?= =?UTF-8?q?lement=20demographics=20streams=20support=20(#7607)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add demographics streams support. Update SAT to dest demographics streams. * Bump docker version --- .../59f1e50a-331f-4f09-b3e8-2e8d4d355f44.json | 2 +- .../resources/seed/source_definitions.yaml | 2 +- .../connectors/source-greenhouse/Dockerfile | 2 +- .../acceptance-test-config.yml | 3 +- .../configured_catalog_const_records.json | 63 +++++++ .../configured_catalog_no_demographics.json | 177 ------------------ .../source_greenhouse/source.py | 14 ++ .../source_greenhouse/streams.py | 111 ++++++----- docs/integrations/sources/greenhouse.md | 1 + 9 files changed, 137 insertions(+), 238 deletions(-) delete mode 100644 airbyte-integrations/connectors/source-greenhouse/integration_tests/configured_catalog_no_demographics.json diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/59f1e50a-331f-4f09-b3e8-2e8d4d355f44.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/59f1e50a-331f-4f09-b3e8-2e8d4d355f44.json index d5d140972c959..7999f8cfe9eed 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/59f1e50a-331f-4f09-b3e8-2e8d4d355f44.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/59f1e50a-331f-4f09-b3e8-2e8d4d355f44.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "59f1e50a-331f-4f09-b3e8-2e8d4d355f44", "name": "Greenhouse", "dockerRepository": "airbyte/source-greenhouse", - "dockerImageTag": "0.2.5", + "dockerImageTag": "0.2.6", "documentationUrl": "https://docs.airbyte.io/integrations/sources/greenhouse", "icon": "greenhouse.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index ac93faba15118..a842e98bb00ac 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -223,7 +223,7 @@ - name: Greenhouse sourceDefinitionId: 59f1e50a-331f-4f09-b3e8-2e8d4d355f44 dockerRepository: airbyte/source-greenhouse - dockerImageTag: 0.2.5 + dockerImageTag: 0.2.6 documentationUrl: https://docs.airbyte.io/integrations/sources/greenhouse icon: greenhouse.svg sourceType: api diff --git a/airbyte-integrations/connectors/source-greenhouse/Dockerfile b/airbyte-integrations/connectors/source-greenhouse/Dockerfile index 7a8111abed8c5..267a7cdc85dc6 100644 --- a/airbyte-integrations/connectors/source-greenhouse/Dockerfile +++ b/airbyte-integrations/connectors/source-greenhouse/Dockerfile @@ -12,5 +12,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.2.5 +LABEL io.airbyte.version=0.2.6 LABEL io.airbyte.name=airbyte/source-greenhouse diff --git a/airbyte-integrations/connectors/source-greenhouse/acceptance-test-config.yml b/airbyte-integrations/connectors/source-greenhouse/acceptance-test-config.yml index e590b104696cf..3c111edb3767e 100644 --- a/airbyte-integrations/connectors/source-greenhouse/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-greenhouse/acceptance-test-config.yml @@ -16,8 +16,7 @@ tests: - config_path: "secrets/config_users_only.json" basic_read: - config_path: "secrets/config.json" - # TODO: replace with configured_catalog.json when https://github.com/airbytehq/airbyte/issues/6546 is resolved - configured_catalog_path: "integration_tests/configured_catalog_no_demographics.json" + configured_catalog_path: "integration_tests/configured_catalog.json" - config_path: "secrets/config.json" configured_catalog_path: "integration_tests/configured_catalog_users_only.json" full_refresh: diff --git a/airbyte-integrations/connectors/source-greenhouse/integration_tests/configured_catalog_const_records.json b/airbyte-integrations/connectors/source-greenhouse/integration_tests/configured_catalog_const_records.json index d73400945f18e..ab9dfc20ec80e 100644 --- a/airbyte-integrations/connectors/source-greenhouse/integration_tests/configured_catalog_const_records.json +++ b/airbyte-integrations/connectors/source-greenhouse/integration_tests/configured_catalog_const_records.json @@ -152,6 +152,69 @@ }, "sync_mode": "full_refresh", "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "demographics_question_sets", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "demographics_questions", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "demographics_answer_options", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "demographics_answers", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "applications_demographics_answers", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "demographics_question_sets_questions", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "demographics_answers_answer_options", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" } ] } diff --git a/airbyte-integrations/connectors/source-greenhouse/integration_tests/configured_catalog_no_demographics.json b/airbyte-integrations/connectors/source-greenhouse/integration_tests/configured_catalog_no_demographics.json deleted file mode 100644 index ee4f6c3b296ea..0000000000000 --- a/airbyte-integrations/connectors/source-greenhouse/integration_tests/configured_catalog_no_demographics.json +++ /dev/null @@ -1,177 +0,0 @@ -{ - "streams": [ - { - "stream": { - "name": "applications", - "json_schema": {}, - "supported_sync_modes": ["full_refresh"], - "source_defined_cursor": false - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" - }, - { - "stream": { - "name": "candidates", - "json_schema": {}, - "supported_sync_modes": ["full_refresh"], - "source_defined_cursor": false - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" - }, - { - "stream": { - "name": "close_reasons", - "json_schema": {}, - "supported_sync_modes": ["full_refresh"], - "source_defined_cursor": false - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" - }, - { - "stream": { - "name": "degrees", - "json_schema": {}, - "supported_sync_modes": ["full_refresh"], - "source_defined_cursor": false - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" - }, - { - "stream": { - "name": "departments", - "json_schema": {}, - "supported_sync_modes": ["full_refresh"], - "source_defined_cursor": false - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" - }, - { - "stream": { - "name": "job_posts", - "json_schema": {}, - "supported_sync_modes": ["full_refresh"], - "source_defined_cursor": false - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" - }, - { - "stream": { - "name": "jobs", - "json_schema": {}, - "supported_sync_modes": ["full_refresh"], - "source_defined_cursor": false - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" - }, - { - "stream": { - "name": "offers", - "json_schema": {}, - "supported_sync_modes": ["full_refresh"], - "source_defined_cursor": false - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" - }, - { - "stream": { - "name": "scorecards", - "json_schema": {}, - "supported_sync_modes": ["full_refresh"], - "source_defined_cursor": false - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" - }, - { - "stream": { - "name": "users", - "json_schema": {}, - "supported_sync_modes": ["full_refresh"], - "source_defined_cursor": false - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" - }, - { - "stream": { - "name": "custom_fields", - "json_schema": {}, - "supported_sync_modes": ["full_refresh"], - "source_defined_cursor": false - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" - }, - { - "stream": { - "name": "interviews", - "json_schema": {}, - "supported_sync_modes": ["full_refresh"] - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" - }, - { - "stream": { - "name": "applications_interviews", - "json_schema": {}, - "supported_sync_modes": ["full_refresh"] - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" - }, - { - "stream": { - "name": "sources", - "json_schema": {}, - "supported_sync_modes": ["full_refresh"] - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" - }, - { - "stream": { - "name": "rejection_reasons", - "json_schema": {}, - "supported_sync_modes": ["full_refresh"] - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" - }, - { - "stream": { - "name": "jobs_openings", - "json_schema": {}, - "supported_sync_modes": ["full_refresh"] - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" - }, - { - "stream": { - "name": "job_stages", - "json_schema": {}, - "supported_sync_modes": ["full_refresh"] - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" - }, - { - "stream": { - "name": "jobs_stages", - "json_schema": {}, - "supported_sync_modes": ["full_refresh"] - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" - } - ] -} diff --git a/airbyte-integrations/connectors/source-greenhouse/source_greenhouse/source.py b/airbyte-integrations/connectors/source-greenhouse/source_greenhouse/source.py index 7496bc0a5db11..9f54797433930 100644 --- a/airbyte-integrations/connectors/source-greenhouse/source_greenhouse/source.py +++ b/airbyte-integrations/connectors/source-greenhouse/source_greenhouse/source.py @@ -11,11 +11,18 @@ from requests.auth import HTTPBasicAuth from source_greenhouse.streams import ( Applications, + ApplicationsDemographicsAnswers, ApplicationsInterviews, Candidates, CloseReasons, CustomFields, Degrees, + DemographicsAnswerOptions, + DemographicsAnswers, + DemographicsAnswersAnswerOptions, + DemographicsQuestions, + DemographicsQuestionSets, + DemographicsQuestionSetsQuestions, Departments, Interviews, JobPosts, @@ -62,6 +69,13 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: Scorecards(authenticator=auth), Sources(authenticator=auth), Users(authenticator=auth), + ApplicationsDemographicsAnswers(authenticator=auth), + DemographicsAnswers(authenticator=auth), + DemographicsAnswerOptions(authenticator=auth), + DemographicsQuestions(authenticator=auth), + DemographicsAnswersAnswerOptions(authenticator=auth), + DemographicsQuestionSets(authenticator=auth), + DemographicsQuestionSetsQuestions(authenticator=auth), ] return streams diff --git a/airbyte-integrations/connectors/source-greenhouse/source_greenhouse/streams.py b/airbyte-integrations/connectors/source-greenhouse/source_greenhouse/streams.py index 49a3a9dff42a9..46f5112eea497 100644 --- a/airbyte-integrations/connectors/source-greenhouse/source_greenhouse/streams.py +++ b/airbyte-integrations/connectors/source-greenhouse/source_greenhouse/streams.py @@ -73,13 +73,13 @@ class Applications(GreenhouseStream): """ -class ApplicationsDemographicsAnswers(GreenhouseStream): +class ApplicationsDemographicsAnswers(GreenhouseSubStream, GreenhouseStream): """ Docs: https://developers.greenhouse.io/harvest.html#get-list-demographic-answers """ - def path(self, **kwargs) -> str: - return "demographics/answers" + parent_stream = Applications + path_template = "applications/{parent_id}/demographics/answers" class ApplicationsInterviews(GreenhouseSubStream, GreenhouseStream): @@ -115,59 +115,58 @@ class Degrees(GreenhouseStream): """ -# TODO: uncomment when https://github.com/airbytehq/airbyte/issues/6546 is resolved -# class DemographicsAnswers(GreenhouseStream): -# """ -# Docs: https://developers.greenhouse.io/harvest.html#get-list-demographic-answers -# """ -# -# def path(self, **kwargs) -> str: -# return "demographics/answers" -# -# -# class DemographicsAnswerOptions(GreenhouseStream): -# """ -# Docs: https://developers.greenhouse.io/harvest.html#get-list-demographic-answer-options -# """ -# -# def path(self, **kwargs) -> str: -# return "demographics/answer_options" -# -# -# class DemographicsQuestions(GreenhouseStream): -# """ -# Docs: https://developers.greenhouse.io/harvest.html#get-list-demographic-questions -# """ -# -# def path(self, **kwargs) -> str: -# return "demographics/questions" -# -# -# class DemographicsAnswersAnswerOptions(GreenhouseSubStream, GreenhouseStream): -# """ -# Docs: https://developers.greenhouse.io/harvest.html#get-list-demographic-answer-options-for-demographic-question -# """ -# -# parent_stream = DemographicsQuestions -# path_template = "demographics/questions/{parent_id}/answer_options" -# -# -# class DemographicsQuestionSets(GreenhouseStream): -# """ -# Docs: https://developers.greenhouse.io/harvest.html#get-list-demographic-question-sets -# """ -# -# def path(self, **kwargs) -> str: -# return "demographics/question_sets" -# -# -# class DemographicsQuestionSetsQuestions(GreenhouseSubStream, GreenhouseStream): -# """ -# Docs: https://developers.greenhouse.io/harvest.html#get-list-demographic-questions-for-demographic-question-set -# """ -# -# parent_stream = DemographicsQuestionSets -# path_template = "demographics/question_sets/{parent_id}/questions" +class DemographicsAnswers(GreenhouseStream): + """ + Docs: https://developers.greenhouse.io/harvest.html#get-list-demographic-answers + """ + + def path(self, **kwargs) -> str: + return "demographics/answers" + + +class DemographicsAnswerOptions(GreenhouseStream): + """ + Docs: https://developers.greenhouse.io/harvest.html#get-list-demographic-answer-options + """ + + def path(self, **kwargs) -> str: + return "demographics/answer_options" + + +class DemographicsQuestions(GreenhouseStream): + """ + Docs: https://developers.greenhouse.io/harvest.html#get-list-demographic-questions + """ + + def path(self, **kwargs) -> str: + return "demographics/questions" + + +class DemographicsAnswersAnswerOptions(GreenhouseSubStream, GreenhouseStream): + """ + Docs: https://developers.greenhouse.io/harvest.html#get-list-demographic-answer-options-for-demographic-question + """ + + parent_stream = DemographicsQuestions + path_template = "demographics/questions/{parent_id}/answer_options" + + +class DemographicsQuestionSets(GreenhouseStream): + """ + Docs: https://developers.greenhouse.io/harvest.html#get-list-demographic-question-sets + """ + + def path(self, **kwargs) -> str: + return "demographics/question_sets" + + +class DemographicsQuestionSetsQuestions(GreenhouseSubStream, GreenhouseStream): + """ + Docs: https://developers.greenhouse.io/harvest.html#get-list-demographic-questions-for-demographic-question-set + """ + + parent_stream = DemographicsQuestionSets + path_template = "demographics/question_sets/{parent_id}/questions" class Departments(GreenhouseStream): diff --git a/docs/integrations/sources/greenhouse.md b/docs/integrations/sources/greenhouse.md index a8b57e1459c70..cdd9dd4e7a826 100644 --- a/docs/integrations/sources/greenhouse.md +++ b/docs/integrations/sources/greenhouse.md @@ -57,5 +57,6 @@ Please follow the [Greenhouse documentation for generating an API key](https://d | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.2.6 | 2021-11-08 | [7607](https://github.com/airbytehq/airbyte/pull/7607) | Implement demographics streams support. Update SAT for demographics streams | | 0.2.5 | 2021-09-22 | [6377](https://github.com/airbytehq/airbyte/pull/6377) | Refactor the connector to use CDK. Implement additional stream support | | 0.2.4 | 2021-09-15 | [6238](https://github.com/airbytehq/airbyte/pull/6238) | added identification of accessible streams for API keys with limited permissions | From c918f52f16fc02d06cf6e5dc6d44b0af0b58cca0 Mon Sep 17 00:00:00 2001 From: Dmytro Date: Mon, 8 Nov 2021 14:22:20 +0200 Subject: [PATCH 83/83] Fix Hubspot and Asana oAuth flow. (#7730) --- .../36c891d9-4bd9-43ac-bad2-10e12756272c.json | 2 +- .../main/resources/seed/source_definitions.yaml | 2 +- .../connectors/source-hubspot/Dockerfile | 2 +- .../source-hubspot/source_hubspot/spec.json | 8 +------- .../io/airbyte/oauth/flows/AsanaOAuthFlow.java | 16 ++++++++++++++++ .../io/airbyte/oauth/flows/HubspotOAuthFlow.java | 16 ++++++++++++++++ .../airbyte/oauth/flows/AsanaOAuthFlowTest.java | 4 ++-- .../oauth/flows/HubspotOAuthFlowTest.java | 4 ++-- docs/integrations/sources/hubspot.md | 1 + 9 files changed, 41 insertions(+), 14 deletions(-) diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/36c891d9-4bd9-43ac-bad2-10e12756272c.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/36c891d9-4bd9-43ac-bad2-10e12756272c.json index cda2735c33c8f..3af6a81ec4d29 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/36c891d9-4bd9-43ac-bad2-10e12756272c.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/36c891d9-4bd9-43ac-bad2-10e12756272c.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "36c891d9-4bd9-43ac-bad2-10e12756272c", "name": "Hubspot", "dockerRepository": "airbyte/source-hubspot", - "dockerImageTag": "0.1.22", + "dockerImageTag": "0.1.23", "documentationUrl": "https://docs.airbyte.io/integrations/sources/hubspot", "icon": "hubspot.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index a842e98bb00ac..4597a82ddecbc 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -236,7 +236,7 @@ - name: Hubspot sourceDefinitionId: 36c891d9-4bd9-43ac-bad2-10e12756272c dockerRepository: airbyte/source-hubspot - dockerImageTag: 0.1.22 + dockerImageTag: 0.1.23 documentationUrl: https://docs.airbyte.io/integrations/sources/hubspot icon: hubspot.svg sourceType: api diff --git a/airbyte-integrations/connectors/source-hubspot/Dockerfile b/airbyte-integrations/connectors/source-hubspot/Dockerfile index 40fcc091a1211..5acc4f5ea5285 100644 --- a/airbyte-integrations/connectors/source-hubspot/Dockerfile +++ b/airbyte-integrations/connectors/source-hubspot/Dockerfile @@ -34,5 +34,5 @@ COPY source_hubspot ./source_hubspot ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.22 +LABEL io.airbyte.version=0.1.23 LABEL io.airbyte.name=airbyte/source-hubspot diff --git a/airbyte-integrations/connectors/source-hubspot/source_hubspot/spec.json b/airbyte-integrations/connectors/source-hubspot/source_hubspot/spec.json index fbb98942a99c5..8a969205962b9 100644 --- a/airbyte-integrations/connectors/source-hubspot/source_hubspot/spec.json +++ b/airbyte-integrations/connectors/source-hubspot/source_hubspot/spec.json @@ -23,11 +23,9 @@ "type": "object", "title": "Authenticate via Hubspot (Oauth)", "required": [ - "redirect_uri", "client_id", "client_secret", "refresh_token", - "access_token", "credentials_title" ], "properties": { @@ -92,11 +90,7 @@ "auth_type": "oauth2.0", "oauth2Specification": { "rootObject": ["credentials", "0"], - "oauthFlowInitParameters": [ - ["client_id"], - ["client_secret"], - ["refresh_token"] - ], + "oauthFlowInitParameters": [["client_id"], ["client_secret"]], "oauthFlowOutputParameters": [["refresh_token"]] } } diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/AsanaOAuthFlow.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/AsanaOAuthFlow.java index 19ee047bc6ea2..c6a2ec9273c6d 100644 --- a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/AsanaOAuthFlow.java +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/AsanaOAuthFlow.java @@ -4,7 +4,9 @@ package io.airbyte.oauth.flows; +import com.fasterxml.jackson.databind.JsonNode; import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableMap; import io.airbyte.config.persistence.ConfigRepository; import io.airbyte.oauth.BaseOAuthFlow; @@ -60,4 +62,18 @@ protected Map getAccessTokenQueryParameters(String clientId, Str .build(); } + @Override + protected String getClientIdUnsafe(final JsonNode config) { + // the config object containing client ID and secret is nested inside the "credentials" object + Preconditions.checkArgument(config.hasNonNull("credentials")); + return super.getClientIdUnsafe(config.get("credentials")); + } + + @Override + protected String getClientSecretUnsafe(final JsonNode config) { + // the config object containing client ID and secret is nested inside the "credentials" object + Preconditions.checkArgument(config.hasNonNull("credentials")); + return super.getClientSecretUnsafe(config.get("credentials")); + } + } diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/HubspotOAuthFlow.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/HubspotOAuthFlow.java index e1edcfbf6efab..45139504d2816 100644 --- a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/HubspotOAuthFlow.java +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/HubspotOAuthFlow.java @@ -4,6 +4,8 @@ package io.airbyte.oauth.flows; +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableMap; import io.airbyte.config.persistence.ConfigRepository; import io.airbyte.oauth.BaseOAuthFlow; @@ -90,4 +92,18 @@ protected String getAccessTokenUrl() { return "https://api.hubapi.com/oauth/v1/token"; } + @Override + protected String getClientIdUnsafe(final JsonNode config) { + // the config object containing client ID and secret is nested inside the "credentials" object + Preconditions.checkArgument(config.hasNonNull("credentials")); + return super.getClientIdUnsafe(config.get("credentials")); + } + + @Override + protected String getClientSecretUnsafe(final JsonNode config) { + // the config object containing client ID and secret is nested inside the "credentials" object + Preconditions.checkArgument(config.hasNonNull("credentials")); + return super.getClientSecretUnsafe(config.get("credentials")); + } + } diff --git a/airbyte-oauth/src/test/java/io/airbyte/oauth/flows/AsanaOAuthFlowTest.java b/airbyte-oauth/src/test/java/io/airbyte/oauth/flows/AsanaOAuthFlowTest.java index 4119254b80e9a..13071ab096751 100644 --- a/airbyte-oauth/src/test/java/io/airbyte/oauth/flows/AsanaOAuthFlowTest.java +++ b/airbyte-oauth/src/test/java/io/airbyte/oauth/flows/AsanaOAuthFlowTest.java @@ -48,10 +48,10 @@ public void setup() throws IOException, JsonValidationException { .withOauthParameterId(UUID.randomUUID()) .withSourceDefinitionId(definitionId) .withWorkspaceId(workspaceId) - .withConfiguration(Jsons.jsonNode(ImmutableMap.builder() + .withConfiguration(Jsons.jsonNode(Map.of("credentials", ImmutableMap.builder() .put("client_id", "test_client_id") .put("client_secret", "test_client_secret") - .build())))); + .build()))))); asanaoAuthFlow = new AsanaOAuthFlow(configRepository, httpClient, AsanaOAuthFlowTest::getConstantState); } diff --git a/airbyte-oauth/src/test/java/io/airbyte/oauth/flows/HubspotOAuthFlowTest.java b/airbyte-oauth/src/test/java/io/airbyte/oauth/flows/HubspotOAuthFlowTest.java index e18f83864e26b..46c5272898e51 100644 --- a/airbyte-oauth/src/test/java/io/airbyte/oauth/flows/HubspotOAuthFlowTest.java +++ b/airbyte-oauth/src/test/java/io/airbyte/oauth/flows/HubspotOAuthFlowTest.java @@ -48,10 +48,10 @@ public void setup() throws IOException, JsonValidationException { .withOauthParameterId(UUID.randomUUID()) .withSourceDefinitionId(definitionId) .withWorkspaceId(workspaceId) - .withConfiguration(Jsons.jsonNode(ImmutableMap.builder() + .withConfiguration(Jsons.jsonNode(Map.of("credentials", ImmutableMap.builder() .put("client_id", "test_client_id") .put("client_secret", "test_client_secret") - .build())))); + .build()))))); flow = new HubspotOAuthFlow(configRepository, httpClient, HubspotOAuthFlowTest::getConstantState); } diff --git a/docs/integrations/sources/hubspot.md b/docs/integrations/sources/hubspot.md index 35b64b637c8b0..f7caa93fa2e57 100644 --- a/docs/integrations/sources/hubspot.md +++ b/docs/integrations/sources/hubspot.md @@ -96,6 +96,7 @@ If you are using Oauth, most of the streams require the appropriate [scopes](htt | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.1.23 | 2021-11-08 | [7730](https://github.com/airbytehq/airbyte/pull/7730) | Fix oAuth flow schema| | 0.1.22 | 2021-11-03 | [7562](https://github.com/airbytehq/airbyte/pull/7562) | Migrate Hubspot source to CDK structure | | 0.1.21 | 2021-10-27 | [7405](https://github.com/airbytehq/airbyte/pull/7405) | Change of package `import` from `urllib` to `urllib.parse` | | 0.1.20 | 2021-10-26 | [7393](https://github.com/airbytehq/airbyte/pull/7393) | Hotfix for `split_properties` function, add the length of separator symbol `,`(`%2C` in HTTP format) to the checking of the summary URL length |