diff --git a/.github/labeler.yml b/.github/labeler.yml index 23e0950d448a5..4ff1d48beabed 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -14,7 +14,3 @@ area/documentation: CDK: - airbyte-cdk/* - airbyte-cdk/**/* - -normalization: - - airbyte-integrations/bases/base-normalization/* - - airbyte-integrations/bases/base-normalization/**/* diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1170125df75e8..6d87ae30cb2d4 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,8 +6,6 @@ exclude: | ^.*?/node_modules/.*$| ^.*?/charts/.*$| - ^airbyte-integrations/bases/base-normalization/.*$| - ^.*?/normalization_test_output/.*$| ^.*?/pnpm-lock\.yaml$| ^.*?/source-amplitude/unit_tests/api_data/zipped\.json$| diff --git a/airbyte-integrations/bases/base-java/.dockerignore b/airbyte-integrations/bases/base-java/.dockerignore deleted file mode 100644 index 70cd13cb50b78..0000000000000 --- a/airbyte-integrations/bases/base-java/.dockerignore +++ /dev/null @@ -1,5 +0,0 @@ -* -!Dockerfile -!build -!javabase.sh -!run_with_normalization.sh diff --git a/airbyte-integrations/bases/base-java/Dockerfile b/airbyte-integrations/bases/base-java/Dockerfile deleted file mode 100644 index d19438eab3f01..0000000000000 --- a/airbyte-integrations/bases/base-java/Dockerfile +++ /dev/null @@ -1,34 +0,0 @@ -### WARNING ### -# The Java connector Dockerfiles will soon be deprecated. -# This Dockerfile is not used to build the connector image we publish to DockerHub. -# The new logic to build the connector image is declared with Dagger here: -# https://github.com/airbytehq/airbyte/blob/master/tools/ci_connector_ops/ci_connector_ops/pipelines/actions/environments.py#L649 - -# If you need to add a custom logic to build your connector image, you can do it by adding a finalize_build.sh or finalize_build.py script in the connector folder. -# Please reach out to the Connectors Operations team if you have any question. -ARG JDK_VERSION=17.0.8 -FROM amazoncorretto:${JDK_VERSION} -COPY --from=airbyte/integration-base:dev /airbyte /airbyte - -RUN yum update -y && yum install -y tar openssl && yum clean all - -WORKDIR /airbyte - -# Add the Datadog Java APM agent -ADD https://dtdg.co/latest-java-tracer dd-java-agent.jar - -COPY javabase.sh . -COPY run_with_normalization.sh . - -# airbyte base commands -ENV AIRBYTE_SPEC_CMD "/airbyte/javabase.sh --spec" -ENV AIRBYTE_CHECK_CMD "/airbyte/javabase.sh --check" -ENV AIRBYTE_DISCOVER_CMD "/airbyte/javabase.sh --discover" -ENV AIRBYTE_READ_CMD "/airbyte/javabase.sh --read" -ENV AIRBYTE_WRITE_CMD "/airbyte/javabase.sh --write" - -ENV AIRBYTE_ENTRYPOINT "/airbyte/base.sh" -ENTRYPOINT ["/airbyte/base.sh"] - -LABEL io.airbyte.version=0.1.2 -LABEL io.airbyte.name=airbyte/integration-base-java diff --git a/airbyte-integrations/bases/base-java/build.gradle b/airbyte-integrations/bases/base-java/build.gradle deleted file mode 100644 index 0c2de175e2cc9..0000000000000 --- a/airbyte-integrations/bases/base-java/build.gradle +++ /dev/null @@ -1,3 +0,0 @@ -plugins { - id 'airbyte-docker-legacy' -} diff --git a/airbyte-integrations/bases/base-java/javabase.sh b/airbyte-integrations/bases/base-java/javabase.sh deleted file mode 100755 index b5fc9ab7166c2..0000000000000 --- a/airbyte-integrations/bases/base-java/javabase.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/usr/bin/env bash - -set -e - -# if IS_CAPTURE_HEAP_DUMP_ON_ERROR is set to true, then will capture Heap dump on OutOfMemory error -if [[ $IS_CAPTURE_HEAP_DUMP_ON_ERROR = true ]]; then - - arrayOfSupportedConnectors=("source-postgres" "source-mssql" "source-mysql" ) - - # The heap dump would be captured only in case when java-based connector fails with OutOfMemory error - if [[ " ${arrayOfSupportedConnectors[*]} " =~ " $APPLICATION " ]]; then - JAVA_OPTS=$JAVA_OPTS" -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/data/dump.hprof" - export JAVA_OPTS - echo "Added JAVA_OPTS=$JAVA_OPTS" - echo "APPLICATION=$APPLICATION" - fi -fi -#30781 - Allocate 32KB for log4j appender buffer to ensure that each line is logged in a single println -JAVA_OPTS=$JAVA_OPTS" -Dlog4j.encoder.byteBufferSize=32768 -Dlog4j2.configurationFile=log4j2.xml" -#needed because we make ThreadLocal.get(Thread) accessible in IntegrationRunner.stopOrphanedThreads -JAVA_OPTS=$JAVA_OPTS" --add-opens=java.base/java.lang=ALL-UNNAMED" -# tell jooq to be quiet (https://stackoverflow.com/questions/28272284/how-to-disable-jooqs-self-ad-message-in-3-4) -JAVA_OPTS=$JAVA_OPTS" -Dorg.jooq.no-logo=true -Dorg.jooq.no-tips=true" -export JAVA_OPTS - -# Wrap run script in a script so that we can lazy evaluate the value of APPLICATION. APPLICATION is -# set by the dockerfile that inherits base-java, so it cannot be evaluated when base-java is built. -# We also need to make sure that stdin of the script is piped to the stdin of the java application. -if [[ $A = --write ]]; then - cat <&0 | /airbyte/bin/"$APPLICATION" "$@" -else - /airbyte/bin/"$APPLICATION" "$@" -fi diff --git a/airbyte-integrations/bases/base-java/run_with_normalization.sh b/airbyte-integrations/bases/base-java/run_with_normalization.sh deleted file mode 100755 index 669763021803c..0000000000000 --- a/airbyte-integrations/bases/base-java/run_with_normalization.sh +++ /dev/null @@ -1,61 +0,0 @@ -#!/bin/bash -# Intentionally no set -e, because we want to run normalization even if the destination fails -set -o pipefail - -/airbyte/base.sh $@ -destination_exit_code=$? -echo '{"type": "LOG","log":{"level":"INFO","message":"Destination process done (exit code '"$destination_exit_code"')"}}' - -# store original args -args=$@ - -while [ $# -ne 0 ]; do - case "$1" in - --config) - CONFIG_FILE="$2" - shift 2 - ;; - *) - # move on - shift - ;; - esac -done - -# restore original args after shifts -set -- $args - -USE_1S1T_FORMAT="false" -if [[ -s "$CONFIG_FILE" ]]; then - USE_1S1T_FORMAT=$(jq -r '.use_1s1t_format' "$CONFIG_FILE") -fi - -if test "$1" != 'write' -then - normalization_exit_code=0 -elif test "$NORMALIZATION_TECHNIQUE" = 'LEGACY' && test "$USE_1S1T_FORMAT" != "true" -then - echo '{"type": "LOG","log":{"level":"INFO","message":"Starting in-connector normalization"}}' - # Normalization tries to create this file from the connector config and crashes if it already exists - # so just nuke it and let normalization recreate it. - # Use -f to avoid error if it doesn't exist, since it's only created for certain SSL modes. - rm -f ca.crt - # the args in a write command are `write --catalog foo.json --config bar.json` - # so if we remove the `write`, we can just pass the rest directly into normalization - /airbyte/entrypoint.sh run ${@:2} --integration-type $AIRBYTE_NORMALIZATION_INTEGRATION | java -cp "/airbyte/lib/*" io.airbyte.cdk.integrations.destination.normalization.NormalizationLogParser - normalization_exit_code=$? - echo '{"type": "LOG","log":{"level":"INFO","message":"In-connector normalization done (exit code '"$normalization_exit_code"')"}}' -else - echo '{"type": "LOG","log":{"level":"INFO","message":"Skipping in-connector normalization"}}' - normalization_exit_code=0 -fi - -if test $destination_exit_code -ne 0 -then - exit $destination_exit_code -elif test $normalization_exit_code -ne 0 -then - exit $normalization_exit_code -else - exit 0 -fi diff --git a/airbyte-integrations/bases/base-normalization/.dockerignore b/airbyte-integrations/bases/base-normalization/.dockerignore deleted file mode 100644 index 1af2d8606be8f..0000000000000 --- a/airbyte-integrations/bases/base-normalization/.dockerignore +++ /dev/null @@ -1,13 +0,0 @@ -* -!Dockerfile -!entrypoint.sh -!build/sshtunneling.sh -!setup.py -!normalization -!dbt-project-template -!dbt-project-template-mssql -!dbt-project-template-mysql -!dbt-project-template-oracle -!dbt-project-template-clickhouse -!dbt-project-template-snowflake -!dbt-project-template-redshift diff --git a/airbyte-integrations/bases/base-normalization/.gitignore b/airbyte-integrations/bases/base-normalization/.gitignore deleted file mode 100644 index 7994f50ee6bea..0000000000000 --- a/airbyte-integrations/bases/base-normalization/.gitignore +++ /dev/null @@ -1,51 +0,0 @@ -build/ -logs/ -dbt-project-template/models/generated/ -dbt-project-template/test_output.log -dbt_modules/ -secrets/ -dist/ - -integration_tests/normalization_test_output/*/*/macros -integration_tests/normalization_test_output/*/*/tests -integration_tests/normalization_test_output/**/*.json -integration_tests/normalization_test_output/**/*.log -integration_tests/normalization_test_output/**/*.md -integration_tests/normalization_test_output/**/*.sql -integration_tests/normalization_test_output/**/*.yml -!integration_tests/normalization_test_output/**/*dbt_project.yml -!integration_tests/normalization_test_output/**/generated/sources.yml - -# We keep a minimal/restricted subset of sql files for all destinations to avoid noise in diff -# Simple Streams -!integration_tests/normalization_test_output/**/dedup_exchange_rate*.sql -!integration_tests/normalization_test_output/**/DEDUP_EXCHANGE_RATE*.sql -!integration_tests/normalization_test_output/**/exchange_rate.sql -!integration_tests/normalization_test_output/**/EXCHANGE_RATE.sql -!integration_tests/normalization_test_output/**/test_simple_streams/first_output/airbyte_views/**/multiple_column_names_conflicts_stg.sql -# Nested Streams -# Parent table -!integration_tests/normalization_test_output/**/nested_stream_with*_names_ab*.sql -!integration_tests/normalization_test_output/**/nested_stream_with*_names_scd.sql -!integration_tests/normalization_test_output/**/nested_stream_with*_names.sql -!integration_tests/normalization_test_output/**/NESTED_STREAM_WITH*_NAMES_AB*.sql -!integration_tests/normalization_test_output/**/NESTED_STREAM_WITH*_NAMES_SCD.sql -!integration_tests/normalization_test_output/**/NESTED_STREAM_WITH*_NAMES.sql -# Nested table -!integration_tests/normalization_test_output/**/nested_stream_with_*_partition_ab1.sql -!integration_tests/normalization_test_output/**/nested_stream_with_*_data_ab1.sql -!integration_tests/normalization_test_output/**/nested_stream_with*_partition_scd.sql -!integration_tests/normalization_test_output/**/nested_stream_with*_data_scd.sql -!integration_tests/normalization_test_output/**/nested_stream_with*_partition.sql -!integration_tests/normalization_test_output/**/nested_stream_with*_data.sql -!integration_tests/normalization_test_output/**/NESTED_STREAM_WITH_*_PARTITION_AB1.sql -!integration_tests/normalization_test_output/**/NESTED_STREAM_WITH_*_DATA_AB1.sql -!integration_tests/normalization_test_output/**/NESTED_STREAM_WITH*_PARTITION_SCD.sql -!integration_tests/normalization_test_output/**/NESTED_STREAM_WITH*_DATA_SCD.sql -!integration_tests/normalization_test_output/**/NESTED_STREAM_WITH*_PARTITION.sql -!integration_tests/normalization_test_output/**/NESTED_STREAM_WITH*_DATA.sql - -# but we keep all sql files for Postgres -!integration_tests/normalization_test_output/postgres/**/*.sql -integration_tests/normalization_test_output/postgres/**/dbt_data_tests -integration_tests/normalization_test_output/postgres/**/dbt_schema_tests diff --git a/airbyte-integrations/bases/base-normalization/Dockerfile b/airbyte-integrations/bases/base-normalization/Dockerfile deleted file mode 100644 index c0ee635f30459..0000000000000 --- a/airbyte-integrations/bases/base-normalization/Dockerfile +++ /dev/null @@ -1,37 +0,0 @@ -FROM fishtownanalytics/dbt:1.0.0 -COPY --from=airbyte/base-airbyte-protocol-python:0.1.1 /airbyte /airbyte - -# Install SSH Tunneling dependencies -RUN apt-get update && apt-get install -y jq sshpass - -WORKDIR /airbyte -COPY entrypoint.sh . -COPY build/sshtunneling.sh . - -WORKDIR /airbyte/normalization_code -COPY normalization ./normalization -COPY setup.py . -COPY dbt-project-template/ ./dbt-template/ - -# Install python dependencies -WORKDIR /airbyte/base_python_structs - -# workaround for https://github.com/yaml/pyyaml/issues/601 -# this should be fixed in the airbyte/base-airbyte-protocol-python image -RUN pip install "Cython<3.0" "pyyaml==5.4" --no-build-isolation - -RUN pip install . - -WORKDIR /airbyte/normalization_code -RUN pip install . - -WORKDIR /airbyte/normalization_code/dbt-template/ -# Download external dbt dependencies -RUN dbt deps - -WORKDIR /airbyte -ENV AIRBYTE_ENTRYPOINT "/airbyte/entrypoint.sh" -ENTRYPOINT ["/airbyte/entrypoint.sh"] - -LABEL io.airbyte.version=0.4.3 -LABEL io.airbyte.name=airbyte/normalization diff --git a/airbyte-integrations/bases/base-normalization/build.gradle b/airbyte-integrations/bases/base-normalization/build.gradle deleted file mode 100644 index 4cc45316ef92d..0000000000000 --- a/airbyte-integrations/bases/base-normalization/build.gradle +++ /dev/null @@ -1,57 +0,0 @@ -plugins { - id 'airbyte-docker-legacy' - id 'airbyte-python' -} - -dependencies { - testFixtures(project(':airbyte-cdk:java:airbyte-cdk:airbyte-cdk-dependencies')) -} - -// we need to access the sshtunneling script from airbyte-workers for ssh support -def copySshScript = tasks.register('copySshScript', Copy) { - from "${project(':airbyte-cdk:java:airbyte-cdk:airbyte-cdk-dependencies').buildDir}/resources/testFixtures" - into "${buildDir}" - include "sshtunneling.sh" -} -copySshScript.configure { - dependsOn project(':airbyte-cdk:java:airbyte-cdk:airbyte-cdk-dependencies').tasks.named('processTestFixturesResources') -} - -// make sure the copy task above worked (if it fails, it fails silently annoyingly) -def checkSshScriptCopy = tasks.register('checkSshScriptCopy') { - doFirst { - assert file("${buildDir}/sshtunneling.sh").exists() : "Copy of sshtunneling.sh failed." - } -} -checkSshScriptCopy.configure { - dependsOn copySshScript -} - -def generate = tasks.register('generate') -generate.configure { - dependsOn checkSshScriptCopy -} - -tasks.named('check').configure { - dependsOn generate -} - -tasks.named("jar").configure { - dependsOn copySshScript -} - -[ - 'bigquery', - 'mysql', - 'postgres', - 'redshift', - 'snowflake', - 'oracle', - 'mssql', - 'clickhouse', - 'tidb', -].each {destinationName -> - tasks.matching { it.name == 'integrationTestPython' }.configureEach { - dependsOn project(":airbyte-integrations:connectors:destination-$destinationName").tasks.named('assemble') - } -} diff --git a/airbyte-integrations/bases/base-normalization/clickhouse.Dockerfile b/airbyte-integrations/bases/base-normalization/clickhouse.Dockerfile deleted file mode 100644 index 18005ea89872a..0000000000000 --- a/airbyte-integrations/bases/base-normalization/clickhouse.Dockerfile +++ /dev/null @@ -1,36 +0,0 @@ -FROM ghcr.io/dbt-labs/dbt-core:1.3.1 -COPY --from=airbyte/base-airbyte-protocol-python:0.1.1 /airbyte /airbyte - -# Install SSH Tunneling dependencies -RUN apt-get update && apt-get install -y jq sshpass -WORKDIR /airbyte -COPY entrypoint.sh . -COPY build/sshtunneling.sh . - -WORKDIR /airbyte/normalization_code -COPY normalization ./normalization -COPY setup.py . -COPY dbt-project-template/ ./dbt-template/ - -# Install python dependencies -WORKDIR /airbyte/base_python_structs - -# workaround for https://github.com/yaml/pyyaml/issues/601 -# this should be fixed in the airbyte/base-airbyte-protocol-python image -RUN pip install "Cython<3.0" "pyyaml==5.4" --no-build-isolation - -RUN pip install . - -WORKDIR /airbyte/normalization_code -RUN pip install . - -WORKDIR /airbyte/normalization_code/dbt-template/ -RUN pip install "dbt-clickhouse>=1.4.0" -# Download external dbt dependencies -RUN dbt deps - -WORKDIR /airbyte -ENV AIRBYTE_ENTRYPOINT "/airbyte/entrypoint.sh" -ENTRYPOINT ["/airbyte/entrypoint.sh"] - -LABEL io.airbyte.name=airbyte/normalization-clickhouse diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template-clickhouse/dbt_project.yml b/airbyte-integrations/bases/base-normalization/dbt-project-template-clickhouse/dbt_project.yml deleted file mode 100755 index b6033fcb69544..0000000000000 --- a/airbyte-integrations/bases/base-normalization/dbt-project-template-clickhouse/dbt_project.yml +++ /dev/null @@ -1,65 +0,0 @@ -# This file is necessary to install dbt-utils with dbt deps -# the content will be overwritten by the transform function - -# Name your package! Package names should contain only lowercase characters -# and underscores. A good package name should reflect your organization's -# name or the intended use of these models -name: "airbyte_utils" -version: "1.0" -config-version: 2 - -# This setting configures which "profile" dbt uses for this project. Profiles contain -# database connection information, and should be configured in the ~/.dbt/profiles.yml file -profile: "normalize" - -# These configurations specify where dbt should look for different types of files. -# The `model-paths` config, for example, states that source models can be found -# in the "models/" directory. You probably won't need to change these! -model-paths: ["models"] -docs-paths: ["docs"] -analysis-paths: ["analysis"] -test-paths: ["tests"] -seed-paths: ["data"] -macro-paths: ["macros"] - -target-path: "../build" # directory which will store compiled SQL files -log-path: "../logs" # directory which will store DBT logs -packages-install-path: "/dbt" # directory which will store external DBT dependencies - -clean-targets: # directories to be removed by `dbt clean` - - "build" - - "dbt_modules" - -quoting: - database: true - # Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785) - # all schemas should be unquoted - schema: true - identifier: true - -# You can define configurations for models in the `model-paths` directory here. -# Using these configurations, you can enable or disable models, change how they -# are materialized, and more! -models: - airbyte_utils: - +materialized: table - generated: - airbyte_ctes: - +tags: airbyte_internal_cte - # ephemeral materialization isn't supported in ClickHouse yet - +materialized: view - airbyte_incremental: - +tags: incremental_tables - +materialized: incremental - # schema change test isn't supported in ClickHouse yet - +on_schema_change: "ignore" - airbyte_tables: - +tags: normalized_tables - +materialized: table - airbyte_views: - +tags: airbyte_internal_views - +materialized: view - -dispatch: - - macro_namespace: dbt_utils - search_order: ["airbyte_utils", "dbt_utils"] diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template-clickhouse/packages.yml b/airbyte-integrations/bases/base-normalization/dbt-project-template-clickhouse/packages.yml deleted file mode 100755 index 33b4edd58c8c6..0000000000000 --- a/airbyte-integrations/bases/base-normalization/dbt-project-template-clickhouse/packages.yml +++ /dev/null @@ -1,5 +0,0 @@ -# add dependencies. these will get pulled during the `dbt deps` process. - -packages: - - git: "https://github.com/fishtown-analytics/dbt-utils.git" - revision: 0.8.2 diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template-duckdb/dbt_project.yml b/airbyte-integrations/bases/base-normalization/dbt-project-template-duckdb/dbt_project.yml deleted file mode 100755 index 7631ef356dc92..0000000000000 --- a/airbyte-integrations/bases/base-normalization/dbt-project-template-duckdb/dbt_project.yml +++ /dev/null @@ -1,63 +0,0 @@ -# This file is necessary to install dbt-utils with dbt deps -# the content will be overwritten by the transform function - -# Name your package! Package names should contain only lowercase characters -# and underscores. A good package name should reflect your organization's -# name or the intended use of these models -name: "airbyte_utils" -version: "1.0" -config-version: 2 - -# This setting configures which "profile" dbt uses for this project. Profiles contain -# database connection information, and should be configured in the ~/.dbt/profiles.yml file -profile: "normalize" - -# These configurations specify where dbt should look for different types of files. -# The `model-paths` config, for example, states that source models can be found -# in the "models/" directory. You probably won't need to change these! -model-paths: ["models"] -docs-paths: ["docs"] -analysis-paths: ["analysis"] -test-paths: ["tests"] -seed-paths: ["data"] -macro-paths: ["macros"] - -target-path: "../build" # directory which will store compiled SQL files -log-path: "../logs" # directory which will store DBT logs -packages-install-path: "/dbt" # directory which will store external DBT dependencies - -clean-targets: # directories to be removed by `dbt clean` - - "build" - - "dbt_modules" - -quoting: - database: true - # Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785) - # all schemas should be unquoted - schema: false - identifier: true - -# You can define configurations for models in the `model-paths` directory here. -# Using these configurations, you can enable or disable models, change how they -# are materialized, and more! -models: - airbyte_utils: - +materialized: table - generated: - airbyte_ctes: - +tags: airbyte_internal_cte - +materialized: ephemeral - airbyte_incremental: - +tags: incremental_tables - +materialized: incremental - +on_schema_change: sync_all_columns - airbyte_tables: - +tags: normalized_tables - +materialized: table - airbyte_views: - +tags: airbyte_internal_views - +materialized: view - -dispatch: - - macro_namespace: dbt_utils - search_order: ["airbyte_utils", "dbt_utils"] diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template-duckdb/packages.yml b/airbyte-integrations/bases/base-normalization/dbt-project-template-duckdb/packages.yml deleted file mode 100755 index 33b4edd58c8c6..0000000000000 --- a/airbyte-integrations/bases/base-normalization/dbt-project-template-duckdb/packages.yml +++ /dev/null @@ -1,5 +0,0 @@ -# add dependencies. these will get pulled during the `dbt deps` process. - -packages: - - git: "https://github.com/fishtown-analytics/dbt-utils.git" - revision: 0.8.2 diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template-mssql/dbt_project.yml b/airbyte-integrations/bases/base-normalization/dbt-project-template-mssql/dbt_project.yml deleted file mode 100755 index 8ed082f367749..0000000000000 --- a/airbyte-integrations/bases/base-normalization/dbt-project-template-mssql/dbt_project.yml +++ /dev/null @@ -1,61 +0,0 @@ -# This file is necessary to install dbt-utils with dbt deps -# the content will be overwritten by the transform function - -# Name your package! Package names should contain only lowercase characters -# and underscores. A good package name should reflect your organization's -# name or the intended use of these models -name: "airbyte_utils" -version: "1.0" -config-version: 2 - -# This setting configures which "profile" dbt uses for this project. Profiles contain -# database connection information, and should be configured in the ~/.dbt/profiles.yml file -profile: "normalize" - -# These configurations specify where dbt should look for different types of files. -# The `model-paths` config, for example, states that source models can be found -# in the "models/" directory. You probably won't need to change these! -model-paths: ["models"] -docs-paths: ["docs"] -analysis-paths: ["analysis"] -test-paths: ["tests"] -seed-paths: ["data"] -macro-paths: ["macros"] - -target-path: "../build" # directory which will store compiled SQL files -log-path: "../logs" # directory which will store DBT logs -packages-install-path: "/dbt" # directory which will store external DBT dependencies - -clean-targets: # directories to be removed by `dbt clean` - - "build" - - "dbt_modules" - -quoting: - database: true - # Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785) - # all schemas should be unquoted - schema: false - identifier: true - -# You can define configurations for models in the `model-paths` directory here. -# Using these configurations, you can enable or disable models, change how they -# are materialized, and more! -models: - airbyte_utils: - +materialized: table - generated: - airbyte_ctes: - +tags: airbyte_internal_cte - +materialized: ephemeral - airbyte_incremental: - +tags: incremental_tables - +materialized: incremental - airbyte_tables: - +tags: normalized_tables - +materialized: table - airbyte_views: - +tags: airbyte_internal_views - +materialized: view - -vars: - dbt_utils_dispatch_list: ["airbyte_utils"] diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template-mssql/packages.yml b/airbyte-integrations/bases/base-normalization/dbt-project-template-mssql/packages.yml deleted file mode 100755 index 33b4edd58c8c6..0000000000000 --- a/airbyte-integrations/bases/base-normalization/dbt-project-template-mssql/packages.yml +++ /dev/null @@ -1,5 +0,0 @@ -# add dependencies. these will get pulled during the `dbt deps` process. - -packages: - - git: "https://github.com/fishtown-analytics/dbt-utils.git" - revision: 0.8.2 diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template-mysql/dbt_project.yml b/airbyte-integrations/bases/base-normalization/dbt-project-template-mysql/dbt_project.yml deleted file mode 100755 index 7116e6dc63d2e..0000000000000 --- a/airbyte-integrations/bases/base-normalization/dbt-project-template-mysql/dbt_project.yml +++ /dev/null @@ -1,63 +0,0 @@ -# This file is necessary to install dbt-utils with dbt deps -# the content will be overwritten by the transform function - -# Name your package! Package names should contain only lowercase characters -# and underscores. A good package name should reflect your organization"s -# name or the intended use of these models -name: "airbyte_utils" -version: "1.0" -config-version: 2 - -# This setting configures which "profile" dbt uses for this project. Profiles contain -# database connection information, and should be configured in the ~/.dbt/profiles.yml file -profile: "normalize" - -# These configurations specify where dbt should look for different types of files. -# The `model-paths` config, for example, states that source models can be found -# in the "models/" directory. You probably won"t need to change these! -model-paths: ["models"] -docs-paths: ["docs"] -analysis-paths: ["analysis"] -test-paths: ["tests"] -seed-paths: ["data"] -macro-paths: ["macros"] - -target-path: "../build" # directory which will store compiled SQL files -log-path: "../logs" # directory which will store DBT logs -packages-install-path: "/dbt" # directory which will store external DBT dependencies - -clean-targets: # directories to be removed by `dbt clean` - - "build" - - "dbt_modules" - -quoting: - database: true - # Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785) - # all schemas should be unquoted - schema: false - identifier: true - -# You can define configurations for models in the `model-paths` directory here. -# Using these configurations, you can enable or disable models, change how they -# are materialized, and more! -models: - airbyte_utils: - +materialized: table - generated: - airbyte_ctes: - +tags: airbyte_internal_cte - +materialized: ephemeral - airbyte_incremental: - +tags: incremental_tables - # incremental is not enabled for MySql yet - #+materialized: incremental - +materialized: table - airbyte_tables: - +tags: normalized_tables - +materialized: table - airbyte_views: - +tags: airbyte_internal_views - +materialized: view - -vars: - dbt_utils_dispatch_list: ["airbyte_utils"] diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template-mysql/packages.yml b/airbyte-integrations/bases/base-normalization/dbt-project-template-mysql/packages.yml deleted file mode 100755 index 33b4edd58c8c6..0000000000000 --- a/airbyte-integrations/bases/base-normalization/dbt-project-template-mysql/packages.yml +++ /dev/null @@ -1,5 +0,0 @@ -# add dependencies. these will get pulled during the `dbt deps` process. - -packages: - - git: "https://github.com/fishtown-analytics/dbt-utils.git" - revision: 0.8.2 diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template-oracle/dbt_project.yml b/airbyte-integrations/bases/base-normalization/dbt-project-template-oracle/dbt_project.yml deleted file mode 100755 index 7ad95ea5f9414..0000000000000 --- a/airbyte-integrations/bases/base-normalization/dbt-project-template-oracle/dbt_project.yml +++ /dev/null @@ -1,61 +0,0 @@ -# This file is necessary to install dbt-utils with dbt deps -# the content will be overwritten by the transform function - -# Name your package! Package names should contain only lowercase characters -# and underscores. A good package name should reflect your organization's -# name or the intended use of these models -name: "airbyte_utils" -version: "1.0" -config-version: 2 - -# This setting configures which "profile" dbt uses for this project. Profiles contain -# database connection information, and should be configured in the ~/.dbt/profiles.yml file -profile: "normalize" - -# These configurations specify where dbt should look for different types of files. -# The `source-paths` config, for example, states that source models can be found -# in the "models/" directory. You probably won't need to change these! -source-paths: ["models"] -docs-paths: ["docs"] -analysis-paths: ["analysis"] -test-paths: ["tests"] -data-paths: ["data"] -macro-paths: ["macros"] - -target-path: "../build" # directory which will store compiled SQL files -log-path: "../logs" # directory which will store DBT logs -modules-path: "/dbt" # directory which will store external DBT dependencies - -clean-targets: # directories to be removed by `dbt clean` - - "build" - - "dbt_modules" - -quoting: - database: false - schema: false - identifier: false - -# You can define configurations for models in the `source-paths` directory here. -# Using these configurations, you can enable or disable models, change how they -# are materialized, and more! -models: - airbyte_utils: - +materialized: table - generated: - airbyte_ctes: - +tags: airbyte_internal_cte - +materialized: ephemeral - airbyte_incremental: - +tags: incremental_tables - # incremental is not enabled for Oracle yet - #+materialized: incremental - +materialized: table - airbyte_tables: - +tags: normalized_tables - +materialized: table - airbyte_views: - +tags: airbyte_internal_views - +materialized: view - -vars: - dbt_utils_dispatch_list: ["airbyte_utils"] diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template-oracle/packages.yml b/airbyte-integrations/bases/base-normalization/dbt-project-template-oracle/packages.yml deleted file mode 100755 index 13d4e69a45cb7..0000000000000 --- a/airbyte-integrations/bases/base-normalization/dbt-project-template-oracle/packages.yml +++ /dev/null @@ -1,5 +0,0 @@ -# add dependencies. these will get pulled during the `dbt deps` process. - -packages: - - git: "https://github.com/fishtown-analytics/dbt-utils.git" - revision: 0.6.4 diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template-redshift/dbt_project.yml b/airbyte-integrations/bases/base-normalization/dbt-project-template-redshift/dbt_project.yml deleted file mode 100755 index c17ac179bd600..0000000000000 --- a/airbyte-integrations/bases/base-normalization/dbt-project-template-redshift/dbt_project.yml +++ /dev/null @@ -1,66 +0,0 @@ -# This file is necessary to install dbt-utils with dbt deps -# the content will be overwritten by the transform function - -# Name your package! Package names should contain only lowercase characters -# and underscores. A good package name should reflect your organization's -# name or the intended use of these models -name: "airbyte_utils" -version: "1.0" -config-version: 2 - -# This setting configures which "profile" dbt uses for this project. Profiles contain -# database connection information, and should be configured in the ~/.dbt/profiles.yml file -profile: "normalize" - -# These configurations specify where dbt should look for different types of files. -# The `model-paths` config, for example, states that source models can be found -# in the "models/" directory. You probably won't need to change these! -model-paths: ["models"] -docs-paths: ["docs"] -analysis-paths: ["analysis"] -test-paths: ["tests"] -seed-paths: ["data"] -macro-paths: ["macros"] - -target-path: "../build" # directory which will store compiled SQL files -log-path: "../logs" # directory which will store DBT logs -packages-install-path: "/dbt" # directory which will store external DBT dependencies - -clean-targets: # directories to be removed by `dbt clean` - - "build" - - "dbt_modules" - -quoting: - database: true - # Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785) - # all schemas should be unquoted - schema: false - identifier: true - -# You can define configurations for models in the `model-paths` directory here. -# Using these configurations, you can enable or disable models, change how they -# are materialized, and more! -models: - +transient: false - # https://docs.aws.amazon.com/redshift/latest/dg/super-configurations.html - +pre-hook: "SET enable_case_sensitive_identifier to TRUE" - airbyte_utils: - +materialized: table - generated: - airbyte_ctes: - +tags: airbyte_internal_cte - +materialized: ephemeral - airbyte_incremental: - +tags: incremental_tables - +materialized: incremental - +on_schema_change: sync_all_columns - airbyte_tables: - +tags: normalized_tables - +materialized: table - airbyte_views: - +tags: airbyte_internal_views - +materialized: view - -dispatch: - - macro_namespace: dbt_utils - search_order: ["airbyte_utils", "dbt_utils"] diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template-snowflake/dbt_project.yml b/airbyte-integrations/bases/base-normalization/dbt-project-template-snowflake/dbt_project.yml deleted file mode 100644 index 2e807c5e19bae..0000000000000 --- a/airbyte-integrations/bases/base-normalization/dbt-project-template-snowflake/dbt_project.yml +++ /dev/null @@ -1,64 +0,0 @@ -# This file is necessary to install dbt-utils with dbt deps -# the content will be overwritten by the transform function - -# Name your package! Package names should contain only lowercase characters -# and underscores. A good package name should reflect your organization's -# name or the intended use of these models -name: "airbyte_utils" -version: "1.0" -config-version: 2 - -# This setting configures which "profile" dbt uses for this project. Profiles contain -# database connection information, and should be configured in the ~/.dbt/profiles.yml file -profile: "normalize" - -# These configurations specify where dbt should look for different types of files. -# The `model-paths` config, for example, states that source models can be found -# in the "models/" directory. You probably won't need to change these! -model-paths: ["models"] -docs-paths: ["docs"] -analysis-paths: ["analysis"] -test-paths: ["tests"] -seed-paths: ["data"] -macro-paths: ["macros"] - -target-path: "../build" # directory which will store compiled SQL files -log-path: "../logs" # directory which will store DBT logs -packages-install-path: "/dbt" # directory which will store external DBT dependencies - -clean-targets: # directories to be removed by `dbt clean` - - "build" - - "dbt_modules" - -quoting: - database: true - # Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785) - # all schemas should be unquoted - schema: false - identifier: true - -# You can define configurations for models in the `model-paths` directory here. -# Using these configurations, you can enable or disable models, change how they -# are materialized, and more! -models: - +transient: false - airbyte_utils: - +materialized: table - generated: - airbyte_ctes: - +tags: airbyte_internal_cte - +materialized: ephemeral - airbyte_incremental: - +tags: incremental_tables - +materialized: incremental - +on_schema_change: sync_all_columns - airbyte_tables: - +tags: normalized_tables - +materialized: table - airbyte_views: - +tags: airbyte_internal_views - +materialized: view - -dispatch: - - macro_namespace: dbt_utils - search_order: ["airbyte_utils", "dbt_utils"] diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template-tidb/dbt_project.yml b/airbyte-integrations/bases/base-normalization/dbt-project-template-tidb/dbt_project.yml deleted file mode 100755 index 497a4f592e3f0..0000000000000 --- a/airbyte-integrations/bases/base-normalization/dbt-project-template-tidb/dbt_project.yml +++ /dev/null @@ -1,61 +0,0 @@ -# This file is necessary to install dbt-utils with dbt deps -# the content will be overwritten by the transform function - -# Name your package! Package names should contain only lowercase characters -# and underscores. A good package name should reflect your organization"s -# name or the intended use of these models -name: "airbyte_utils" -version: "1.0" -config-version: 2 - -# This setting configures which "profile" dbt uses for this project. Profiles contain -# database connection information, and should be configured in the ~/.dbt/profiles.yml file -profile: "normalize" - -# These configurations specify where dbt should look for different types of files. -# The `model-paths` config, for example, states that source models can be found -# in the "models/" directory. You probably won"t need to change these! -model-paths: ["models"] -docs-paths: ["docs"] -analysis-paths: ["analysis"] -test-paths: ["tests"] -seed-paths: ["data"] -macro-paths: ["macros"] - -target-path: "../build" # directory which will store compiled SQL files -log-path: "../logs" # directory which will store DBT logs -packages-install-path: "/dbt" # directory which will store external DBT dependencies - -clean-targets: # directories to be removed by `dbt clean` - - "build" - - "dbt_modules" - -quoting: - database: true - # Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785) - # all schemas should be unquoted - schema: false - identifier: true - -# You can define configurations for models in the `model-paths` directory here. -# Using these configurations, you can enable or disable models, change how they -# are materialized, and more! -models: - airbyte_utils: - +materialized: table - generated: - airbyte_ctes: - +tags: airbyte_internal_cte - +materialized: ephemeral - airbyte_incremental: - +tags: incremental_tables - +materialized: incremental - airbyte_tables: - +tags: normalized_tables - +materialized: table - airbyte_views: - +tags: airbyte_internal_views - +materialized: view - -vars: - dbt_utils_dispatch_list: ["airbyte_utils"] diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template-tidb/packages.yml b/airbyte-integrations/bases/base-normalization/dbt-project-template-tidb/packages.yml deleted file mode 100755 index 33b4edd58c8c6..0000000000000 --- a/airbyte-integrations/bases/base-normalization/dbt-project-template-tidb/packages.yml +++ /dev/null @@ -1,5 +0,0 @@ -# add dependencies. these will get pulled during the `dbt deps` process. - -packages: - - git: "https://github.com/fishtown-analytics/dbt-utils.git" - revision: 0.8.2 diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template/README.md b/airbyte-integrations/bases/base-normalization/dbt-project-template/README.md deleted file mode 100644 index 13e812383e92d..0000000000000 --- a/airbyte-integrations/bases/base-normalization/dbt-project-template/README.md +++ /dev/null @@ -1,19 +0,0 @@ -## Installing dbt - -1. Activate your venv and run `pip3 install dbt` -1. Copy `airbyte-normalization/sample_files/profiles.yml` over to `~/.dbt/profiles.yml` -1. Edit to configure your profiles accordingly - -## Running dbt - -1. `cd airbyte-normalization` -1. You can now run dbt commands, to check the setup is fine: `dbt debug` -1. To build the dbt tables in your warehouse: `dbt run` - -## Running dbt from Airbyte generated config - -1. You can also change directory (`cd /tmp/dev_root/workspace/1/0/normalize` for example) to one of the workspace generated by Airbyte within one of the `normalize` folder. -1. You should find `profiles.yml` and a bunch of other dbt files/folders created there. -1. To check everything is setup properly: `dbt debug --profiles-dir=$(pwd) --project-dir=$(pwd)` -1. You can modify the `.sql` files and run `dbt run --profiles-dir=$(pwd) --project-dir=$(pwd)` too -1. You can inspect compiled dbt `.sql` files before they are run in the destination engine in `normalize/build/compiled` or `normalize/build/run` folders diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template/dbt_project.yml b/airbyte-integrations/bases/base-normalization/dbt-project-template/dbt_project.yml deleted file mode 100755 index 7631ef356dc92..0000000000000 --- a/airbyte-integrations/bases/base-normalization/dbt-project-template/dbt_project.yml +++ /dev/null @@ -1,63 +0,0 @@ -# This file is necessary to install dbt-utils with dbt deps -# the content will be overwritten by the transform function - -# Name your package! Package names should contain only lowercase characters -# and underscores. A good package name should reflect your organization's -# name or the intended use of these models -name: "airbyte_utils" -version: "1.0" -config-version: 2 - -# This setting configures which "profile" dbt uses for this project. Profiles contain -# database connection information, and should be configured in the ~/.dbt/profiles.yml file -profile: "normalize" - -# These configurations specify where dbt should look for different types of files. -# The `model-paths` config, for example, states that source models can be found -# in the "models/" directory. You probably won't need to change these! -model-paths: ["models"] -docs-paths: ["docs"] -analysis-paths: ["analysis"] -test-paths: ["tests"] -seed-paths: ["data"] -macro-paths: ["macros"] - -target-path: "../build" # directory which will store compiled SQL files -log-path: "../logs" # directory which will store DBT logs -packages-install-path: "/dbt" # directory which will store external DBT dependencies - -clean-targets: # directories to be removed by `dbt clean` - - "build" - - "dbt_modules" - -quoting: - database: true - # Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785) - # all schemas should be unquoted - schema: false - identifier: true - -# You can define configurations for models in the `model-paths` directory here. -# Using these configurations, you can enable or disable models, change how they -# are materialized, and more! -models: - airbyte_utils: - +materialized: table - generated: - airbyte_ctes: - +tags: airbyte_internal_cte - +materialized: ephemeral - airbyte_incremental: - +tags: incremental_tables - +materialized: incremental - +on_schema_change: sync_all_columns - airbyte_tables: - +tags: normalized_tables - +materialized: table - airbyte_views: - +tags: airbyte_internal_views - +materialized: view - -dispatch: - - macro_namespace: dbt_utils - search_order: ["airbyte_utils", "dbt_utils"] diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/clean_tmp_tables.sql b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/clean_tmp_tables.sql deleted file mode 100644 index 46e2328745f1a..0000000000000 --- a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/clean_tmp_tables.sql +++ /dev/null @@ -1,19 +0,0 @@ -{% macro clean_tmp_tables(schemas) -%} - {{ adapter.dispatch('clean_tmp_tables')(schemas) }} -{%- endmacro %} - --- default -{% macro default__clean_tmp_tables(schemas) -%} - {% do exceptions.warn("\tINFO: CLEANING TEST LEFTOVERS IS NOT IMPLEMENTED FOR THIS DESTINATION. CONSIDER TO REMOVE TEST TABLES MANUALY.\n") %} -{%- endmacro %} - --- for redshift -{% macro redshift__clean_tmp_tables(schemas) %} - {%- for tmp_schema in schemas -%} - {% do log("\tDROP SCHEMA IF EXISTS " ~ tmp_schema, info=True) %} - {%- set drop_query -%} - drop schema if exists {{ tmp_schema }} cascade; - {%- endset -%} - {%- do run_query(drop_query) -%} - {%- endfor -%} -{% endmacro %} \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/array.sql b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/array.sql deleted file mode 100644 index 6180675674b7f..0000000000000 --- a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/array.sql +++ /dev/null @@ -1,173 +0,0 @@ -{# - Adapter Macros for the following functions: - - Bigquery: unnest() -> https://cloud.google.com/bigquery/docs/reference/standard-sql/arrays#flattening-arrays-and-repeated-fields - - Snowflake: flatten() -> https://docs.snowflake.com/en/sql-reference/functions/flatten.html - - Redshift: -> https://blog.getdbt.com/how-to-unnest-arrays-in-redshift/ - - postgres: unnest() -> https://www.postgresqltutorial.com/postgresql-array/ - - MSSQL: openjson() –> https://docs.microsoft.com/en-us/sql/relational-databases/json/validate-query-and-change-json-data-with-built-in-functions-sql-server?view=sql-server-ver15 - - ClickHouse: ARRAY JOIN –> https://clickhouse.com/docs/zh/sql-reference/statements/select/array-join/ -#} - -{# cross_join_unnest ------------------------------------------------- #} - -{% macro cross_join_unnest(stream_name, array_col) -%} - {{ adapter.dispatch('cross_join_unnest')(stream_name, array_col) }} -{%- endmacro %} - -{% macro default__cross_join_unnest(stream_name, array_col) -%} - {% do exceptions.warn("Undefined macro cross_join_unnest for this destination engine") %} -{%- endmacro %} - -{% macro bigquery__cross_join_unnest(stream_name, array_col) -%} - cross join unnest({{ array_col }}) as {{ array_col }} -{%- endmacro %} - -{% macro clickhouse__cross_join_unnest(stream_name, array_col) -%} - ARRAY JOIN {{ array_col }} -{%- endmacro %} - -{% macro oracle__cross_join_unnest(stream_name, array_col) -%} - {% do exceptions.warn("Normalization does not support unnesting for Oracle yet.") %} -{%- endmacro %} - -{% macro postgres__cross_join_unnest(stream_name, array_col) -%} - cross join jsonb_array_elements( - case jsonb_typeof({{ array_col }}) - when 'array' then {{ array_col }} - else '[]' end - ) as _airbyte_nested_data -{%- endmacro %} - -{% macro mysql__cross_join_unnest(stream_name, array_col) -%} - left join joined on _airbyte_{{ stream_name }}_hashid = joined._airbyte_hashid -{%- endmacro %} - -{% macro tidb__cross_join_unnest(stream_name, array_col) -%} - left join joined on _airbyte_{{ stream_name }}_hashid = joined._airbyte_hashid -{%- endmacro %} - -{% macro duckdb__cross_join_unnest(stream_name, array_col) -%} - left join joined on _airbyte_{{ stream_name }}_hashid = joined._airbyte_hashid -{%- endmacro %} - -{% macro redshift__cross_join_unnest(stream_name, array_col) -%} - left join joined on _airbyte_{{ stream_name }}_hashid = joined._airbyte_hashid -{%- endmacro %} - -{% macro snowflake__cross_join_unnest(stream_name, array_col) -%} - cross join table(flatten({{ array_col }})) as {{ array_col }} -{%- endmacro %} - -{% macro sqlserver__cross_join_unnest(stream_name, array_col) -%} -{# https://docs.microsoft.com/en-us/sql/relational-databases/json/convert-json-data-to-rows-and-columns-with-openjson-sql-server?view=sql-server-ver15#option-1---openjson-with-the-default-output #} - CROSS APPLY ( - SELECT [value] = CASE - WHEN [type] = 4 THEN (SELECT [value] FROM OPENJSON([value])) - WHEN [type] = 5 THEN [value] - END - FROM OPENJSON({{ array_col }}) - ) AS {{ array_col }} -{%- endmacro %} - -{# unnested_column_value -- this macro is related to unnest_cte #} - -{% macro unnested_column_value(column_col) -%} - {{ adapter.dispatch('unnested_column_value')(column_col) }} -{%- endmacro %} - -{% macro default__unnested_column_value(column_col) -%} - {{ column_col }} -{%- endmacro %} - -{% macro postgres__unnested_column_value(column_col) -%} - _airbyte_nested_data -{%- endmacro %} - -{% macro snowflake__unnested_column_value(column_col) -%} - {{ column_col }}.value -{%- endmacro %} - -{% macro redshift__unnested_column_value(column_col) -%} - _airbyte_nested_data -{%- endmacro %} - -{% macro mysql__unnested_column_value(column_col) -%} - _airbyte_nested_data -{%- endmacro %} - -{% macro tidb__unnested_column_value(column_col) -%} - _airbyte_nested_data -{%- endmacro %} - -{% macro duckdb__unnested_column_value(column_col) -%} - _airbyte_nested_data -{%- endmacro %} - -{% macro oracle__unnested_column_value(column_col) -%} - {{ column_col }} -{%- endmacro %} - -{% macro sqlserver__unnested_column_value(column_col) -%} - {# unnested array/sub_array will be located in `value` column afterwards, we need to address to it #} - {{ column_col }}.value -{%- endmacro %} - -{# unnest_cte ------------------------------------------------- #} - -{% macro unnest_cte(from_table, stream_name, column_col) -%} - {{ adapter.dispatch('unnest_cte')(from_table, stream_name, column_col) }} -{%- endmacro %} - -{% macro default__unnest_cte(from_table, stream_name, column_col) -%}{%- endmacro %} - -{% macro redshift__unnest_cte(from_table, stream_name, column_col) -%} - {# -- based on https://docs.aws.amazon.com/redshift/latest/dg/query-super.html #} - with joined as ( - select - table_alias._airbyte_{{ stream_name }}_hashid as _airbyte_hashid, - _airbyte_nested_data - from {{ from_table }} as table_alias, table_alias.{{ column_col }} as _airbyte_nested_data - ) -{%- endmacro %} - -{% macro mysql__unnest_cte(from_table, stream_name, column_col) -%} - {%- if not execute -%} - {{ return('') }} - {% endif %} - - {%- call statement('max_json_array_length', fetch_result=True) -%} - with max_value as ( - select max(json_length({{ column_col }})) as max_number_of_items - from {{ from_table }} - ) - select - case when max_number_of_items is not null and max_number_of_items > 1 - then max_number_of_items - else 1 end as max_number_of_items - from max_value - {%- endcall -%} - - {%- set max_length = load_result('max_json_array_length') -%} - with numbers as ( - {{ dbt_utils.generate_series(max_length["data"][0][0]) }} - ), - joined as ( - select - _airbyte_{{ stream_name }}_hashid as _airbyte_hashid, - {# -- json_extract(column_col, '$[i][0]') as _airbyte_nested_data #} - json_extract({{ column_col }}, concat("$[", numbers.generated_number - 1, "][0]")) as _airbyte_nested_data - from {{ from_table }} - cross join numbers - -- only generate the number of records in the cross join that corresponds - -- to the number of items in {{ from_table }}.{{ column_col }} - where numbers.generated_number <= json_length({{ column_col }}) - ) -{%- endmacro %} - -{% macro tidb__unnest_cte(from_table, stream_name, column_col) -%} - {{ mysql__unnest_cte(from_table, stream_name, column_col) }} -{%- endmacro %} - -{% macro duckdb__unnest_cte(from_table, stream_name, column_col) -%} - {{ mysql__unnest_cte(from_table, stream_name, column_col) }} -{%- endmacro %} diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/concat.sql b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/concat.sql deleted file mode 100644 index aab42ca3b9640..0000000000000 --- a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/concat.sql +++ /dev/null @@ -1,36 +0,0 @@ -{# - concat in dbt 0.6.4 used to work fine for bigquery but the new implementaion in 0.7.3 is less scalable (can not handle too many columns) - Therefore, we revert the implementation here and add versions for missing destinations -#} - -{% macro concat(fields) -%} - {{ adapter.dispatch('concat')(fields) }} -{%- endmacro %} - -{% macro bigquery__concat(fields) -%} - {#-- concat() in SQL bigquery scales better with number of columns than using the '||' operator --#} - concat({{ fields|join(', ') }}) -{%- endmacro %} - -{% macro mysql__concat(fields) -%} - {#-- MySQL doesn't support the '||' operator as concatenation by default --#} - concat({{ fields|join(', ') }}) -{%- endmacro %} - -{% macro sqlserver__concat(fields) -%} - {#-- CONCAT() in SQL SERVER accepts from 2 to 254 arguments, we use batches for the main concat, to overcome the limit. --#} - {% set concat_chunks = [] %} - {% for chunk in fields|batch(253) -%} - {% set _ = concat_chunks.append( "concat(" ~ chunk|join(', ') ~ ",'')" ) %} - {% endfor %} - - concat({{ concat_chunks|join(', ') }}, '') -{%- endmacro %} - -{% macro tidb__concat(fields) -%} - concat({{ fields|join(', ') }}) -{%- endmacro %} - -{% macro duckdb__concat(fields) -%} - concat({{ fields|join(', ') }}) -{%- endmacro %} diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/current_timestamp.sql b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/current_timestamp.sql deleted file mode 100644 index a9df34c9e4979..0000000000000 --- a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/current_timestamp.sql +++ /dev/null @@ -1,7 +0,0 @@ -{% macro mysql__current_timestamp() %} - CURRENT_TIMESTAMP -{% endmacro %} - -{% macro oracle__current_timestamp() %} - CURRENT_TIMESTAMP -{% endmacro %} diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/datatypes.sql b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/datatypes.sql deleted file mode 100755 index 7f69c66f78df2..0000000000000 --- a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/datatypes.sql +++ /dev/null @@ -1,394 +0,0 @@ -{# json ------------------------------------------------- #} - -{%- macro type_json() -%} - {{ adapter.dispatch('type_json')() }} -{%- endmacro -%} - -{% macro default__type_json() %} - string -{% endmacro %} - -{%- macro redshift__type_json() -%} - super -{%- endmacro -%} - -{% macro postgres__type_json() %} - jsonb -{% endmacro %} - -{%- macro oracle__type_json() -%} - varchar2(4000) -{%- endmacro -%} - -{% macro snowflake__type_json() %} - variant -{% endmacro %} - -{%- macro mysql__type_json() -%} - json -{%- endmacro -%} - -{%- macro sqlserver__type_json() -%} - NVARCHAR(max) -{%- endmacro -%} - -{% macro clickhouse__type_json() %} - String -{% endmacro %} - -{%- macro tidb__type_json() -%} - json -{%- endmacro -%} - -{%- macro duckdb__type_json() -%} - json -{%- endmacro -%} - -{# string ------------------------------------------------- #} - -{%- macro mysql__type_string() -%} - char -{%- endmacro -%} - -{%- macro oracle__type_string() -%} - varchar2(4000) -{%- endmacro -%} - -{% macro sqlserver__type_string() %} - NVARCHAR(max) -{%- endmacro -%} - -{%- macro clickhouse__type_string() -%} - String -{%- endmacro -%} - -{#-- TODO: Remove this macro when dbt issue regarding unlimited varchars on postgres is resolved (https://github.com/dbt-labs/dbt-core/issues/5238) and we've upgraded to the latest version of dbt --#} -{%- macro postgres__type_string() -%} - text -{%- endmacro -%} - -{%- macro tidb__type_string() -%} - char(1000) -{%- endmacro -%} - -{%- macro duckdb__type_string() -%} - VARCHAR -{%- endmacro -%} - -{# float ------------------------------------------------- #} -{% macro mysql__type_float() %} - float -{% endmacro %} - -{% macro oracle__type_float() %} - float -{% endmacro %} - -{% macro clickhouse__type_float() %} - Float64 -{% endmacro %} - -{% macro tidb__type_float() %} - float -{% endmacro %} - -{% macro duckdb__type_float() %} - DOUBLE -{% endmacro %} - -{# int ------------------------------------------------- #} -{% macro default__type_int() %} - int -{% endmacro %} - -{% macro mysql__type_int() %} - signed -{% endmacro %} - -{% macro oracle__type_int() %} - int -{% endmacro %} - -{% macro clickhouse__type_int() %} - INT -{% endmacro %} - -{% macro tidb__type_int() %} - signed -{% endmacro %} - -{% macro duckdb__type_int() %} - INTEGER -{% endmacro %} - -{# bigint ------------------------------------------------- #} -{% macro mysql__type_bigint() %} - signed -{% endmacro %} - -{% macro oracle__type_bigint() %} - numeric -{% endmacro %} - -{% macro clickhouse__type_bigint() %} - BIGINT -{% endmacro %} - -{% macro tidb__type_bigint() %} - signed -{% endmacro %} - -{% macro duckdb__type_bigint() %} - BIGINT -{% endmacro %} - -{# numeric ------------------------------------------------- --#} -{% macro mysql__type_numeric() %} - float -{% endmacro %} - -{% macro clickhouse__type_numeric() %} - Float64 -{% endmacro %} - -{% macro tidb__type_numeric() %} - float -{% endmacro %} - -{% macro duckdb__type_numeric() %} - DOUBLE -{% endmacro %} - -{# very_large_integer --------------------------------------- --#} -{# -Most databases don't have a true unbounded numeric datatype, so we use a really big numeric field. -Our type terminology unfortunately collides with DB terminology (i.e. "big_integer" means different things in different contexts) -so this macro needs to be called very_large_integer. -#} -{%- macro type_very_large_integer() -%} - {{ adapter.dispatch('type_very_large_integer')() }} -{%- endmacro -%} - -{% macro default__type_very_large_integer() %} - numeric -{% endmacro %} - -{% macro snowflake__type_very_large_integer() %} - numeric -{% endmacro %} - -{% macro mysql__type_very_large_integer() %} - decimal(38, 0) -{% endmacro %} - -{% macro clickhouse__type_very_large_integer() %} - decimal128(0) -{% endmacro %} - -{% macro tidb__type_very_large_integer() %} - decimal(38, 0) -{% endmacro %} - -{% macro duckdb__type_very_large_integer() %} - DECIMAL(38, 0) -{% endmacro %} - -{# timestamp ------------------------------------------------- --#} -{% macro mysql__type_timestamp() %} - time -{% endmacro %} - -{%- macro sqlserver__type_timestamp() -%} - {#-- in TSQL timestamp is really datetime --#} - {#-- https://docs.microsoft.com/en-us/sql/t-sql/functions/date-and-time-data-types-and-functions-transact-sql?view=sql-server-ver15#DateandTimeDataTypes --#} - datetime -{%- endmacro -%} - -{% macro clickhouse__type_timestamp() %} - DateTime64 -{% endmacro %} - -{% macro tidb__type_timestamp() %} - time -{% endmacro %} - -{% macro duckdb__type_timestamp() %} - TIMESTAMP -{% endmacro %} - -{# timestamp with time zone ------------------------------------------------- #} - -{%- macro type_timestamp_with_timezone() -%} - {{ adapter.dispatch('type_timestamp_with_timezone')() }} -{%- endmacro -%} - -{% macro default__type_timestamp_with_timezone() %} - timestamp with time zone -{% endmacro %} - -{% macro bigquery__type_timestamp_with_timezone() %} - timestamp -{% endmacro %} - -{#-- MySQL doesnt allow cast operation with nullif to work with DATETIME and doesn't support storing of timezone so we have to use char --#} -{#-- https://bugs.mysql.com/bug.php?id=77805 --#} -{%- macro mysql__type_timestamp_with_timezone() -%} - char(1024) -{%- endmacro -%} - -{% macro oracle__type_timestamp_with_timezone() %} - varchar2(4000) -{% endmacro %} - -{%- macro sqlserver__type_timestamp_with_timezone() -%} - datetimeoffset -{%- endmacro -%} - -{% macro redshift__type_timestamp_with_timezone() %} - TIMESTAMPTZ -{% endmacro %} - -{% macro clickhouse__type_timestamp_with_timezone() %} - DateTime64 -{% endmacro %} - -{%- macro tidb__type_timestamp_with_timezone() -%} - char(1000) -{%- endmacro -%} - -{%- macro duckdb__type_timestamp_with_timezone() -%} - TIMESTAMPTZ -{%- endmacro -%} - -{# timestamp without time zone ------------------------------------------------- #} - -{%- macro type_timestamp_without_timezone() -%} - {{ adapter.dispatch('type_timestamp_without_timezone')() }} -{%- endmacro -%} - -{% macro default__type_timestamp_without_timezone() %} - timestamp -{% endmacro %} - -{%- macro sqlserver__type_timestamp_without_timezone() -%} - {#-- in TSQL timestamp is really datetime or datetime2 --#} - {#-- https://docs.microsoft.com/en-us/sql/t-sql/functions/date-and-time-data-types-and-functions-transact-sql?view=sql-server-ver15#DateandTimeDataTypes --#} - datetime2 -{%- endmacro -%} - -{% macro bigquery__type_timestamp_without_timezone() %} - datetime -{% endmacro %} - -{% macro oracle__type_timestamp_without_timezone() %} - varchar2(4000) -{% endmacro %} - -{% macro redshift__type_timestamp_without_timezone() %} - TIMESTAMP -{% endmacro %} - -{% macro tidb__type_timestamp_without_timezone() %} - datetime -{% endmacro %} - -{% macro duckdb__type_timestamp_without_timezone() %} - TIMESTAMP -{% endmacro %} - -{# time without time zone ------------------------------------------------- #} - -{%- macro type_time_without_timezone() -%} - {{ adapter.dispatch('type_time_without_timezone')() }} -{%- endmacro -%} - -{% macro default__type_time_without_timezone() %} - time -{% endmacro %} - -{% macro oracle__type_time_without_timezone() %} - varchar2(4000) -{% endmacro %} - -{% macro redshift__type_time_without_timezone() %} - TIME -{% endmacro %} - -{% macro clickhouse__type_time_without_timezone() %} - String -{% endmacro %} - -{% macro tidb__type_time_without_timezone() %} - time -{% endmacro %} - -{% macro duckdb__type_time_without_timezone() %} - TIMESTAMP -{% endmacro %} - -{# time with time zone ------------------------------------------------- #} - -{%- macro type_time_with_timezone() -%} - {{ adapter.dispatch('type_time_with_timezone')() }} -{%- endmacro -%} - -{% macro default__type_time_with_timezone() %} - time with time zone -{% endmacro %} - -{%- macro mysql__type_time_with_timezone() -%} - char(1024) -{%- endmacro -%} - -{%- macro sqlserver__type_time_with_timezone() -%} - NVARCHAR(max) -{%- endmacro -%} - -{% macro bigquery__type_time_with_timezone() %} - STRING -{% endmacro %} - -{% macro oracle__type_time_with_timezone() %} - varchar2(4000) -{% endmacro %} - -{% macro snowflake__type_time_with_timezone() %} - varchar -{% endmacro %} - -{% macro redshift__type_time_with_timezone() %} - TIMETZ -{% endmacro %} - -{% macro clickhouse__type_time_with_timezone() %} - String -{% endmacro %} - -{%- macro tidb__type_time_with_timezone() -%} - char(1000) -{%- endmacro -%} - -{%- macro duckdb__type_time_with_timezone() -%} - TIMESTAMPTZ -{%- endmacro -%} -{# date ------------------------------------------------- #} - -{%- macro type_date() -%} - {{ adapter.dispatch('type_date')() }} -{%- endmacro -%} - -{% macro default__type_date() %} - date -{% endmacro %} - -{% macro oracle__type_date() %} - varchar2(4000) -{% endmacro %} - -{%- macro sqlserver__type_date() -%} - date -{%- endmacro -%} - -{% macro clickhouse__type_date() %} - Date32 -{% endmacro %} diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/except.sql b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/except.sql deleted file mode 100644 index a0f0c159dc214..0000000000000 --- a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/except.sql +++ /dev/null @@ -1,7 +0,0 @@ -{% macro mysql__except() %} - {% do exceptions.warn("MySQL does not support EXCEPT operator") %} -{% endmacro %} - -{% macro oracle__except() %} - minus -{% endmacro %} diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/hash.sql b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/hash.sql deleted file mode 100644 index 184888794b9f0..0000000000000 --- a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/hash.sql +++ /dev/null @@ -1,5 +0,0 @@ -{# converting hash in varchar _macro #} - -{% macro sqlserver__hash(field) -%} - convert(varchar(32), HashBytes('md5', coalesce(cast({{field}} as {{dbt_utils.type_string()}}), '')), 2) -{%- endmacro %} \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/json_operations.sql b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/json_operations.sql deleted file mode 100644 index cbbfbc4510196..0000000000000 --- a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/json_operations.sql +++ /dev/null @@ -1,317 +0,0 @@ -{# - Adapter Macros for the following functions: - - Bigquery: JSON_EXTRACT(json_string_expr, json_path_format) -> https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions - - Snowflake: JSON_EXTRACT_PATH_TEXT( , '' ) -> https://docs.snowflake.com/en/sql-reference/functions/json_extract_path_text.html - - Redshift: json_extract_path_text('json_string', 'path_elem' [,'path_elem'[, ...] ] [, null_if_invalid ] ) -> https://docs.aws.amazon.com/redshift/latest/dg/JSON_EXTRACT_PATH_TEXT.html - - Postgres: json_extract_path_text(, 'path' [, 'path' [, ...}}) -> https://www.postgresql.org/docs/12/functions-json.html - - MySQL: JSON_EXTRACT(json_doc, 'path' [, 'path'] ...) -> https://dev.mysql.com/doc/refman/8.0/en/json-search-functions.html - - ClickHouse: JSONExtractString(json_doc, 'path' [, 'path'] ...) -> https://clickhouse.com/docs/en/sql-reference/functions/json-functions/ - - TiDB: JSON_EXTRACT(json_doc, 'path' [, 'path'] ...) -> https://docs.pingcap.com/tidb/stable/json-functions - - DuckDB: json_extract(json, 'path') note: If path is a LIST, the result will be a LIST of JSON -> https://duckdb.org/docs/extensions/json -#} - -{# format_json_path -------------------------------------------------- #} -{% macro format_json_path(json_path_list) -%} - {{ adapter.dispatch('format_json_path')(json_path_list) }} -{%- endmacro %} - -{% macro default__format_json_path(json_path_list) -%} - {{ '.' ~ json_path_list|join('.') }} -{%- endmacro %} - -{% macro oracle__format_json_path(json_path_list) -%} - {{ '\'$."' ~ json_path_list|join('."') ~ '"\'' }} -{%- endmacro %} - -{# - BigQuery has different JSONPath syntax depending on which function you call. - Most of our macros use the "legacy" JSON functions, so this function uses - the legacy syntax. - - These paths look like: "$['foo']['bar']" -#} -{% macro bigquery__format_json_path(json_path_list) -%} - {%- set str_list = [] -%} - {%- for json_path in json_path_list -%} - {%- if str_list.append(json_path.replace('"', '\\"')) -%} {%- endif -%} - {%- endfor -%} - {{ '"$[\'' ~ str_list|join('\'][\'') ~ '\']"' }} -{%- endmacro %} - -{# - For macros which use the newer JSON functions, define a new_format_json_path - macro which generates the correct path syntax. - - These paths look like: '$."foo"."bar"' -#} -{% macro bigquery_new_format_json_path(json_path_list) -%} - {%- set str_list = [] -%} - {%- for json_path in json_path_list -%} - {%- if str_list.append(json_path.replace('\'', '\\\'')) -%} {%- endif -%} - {%- endfor -%} - {{ '\'$."' ~ str_list|join('"."') ~ '"\'' }} -{%- endmacro %} - -{% macro postgres__format_json_path(json_path_list) -%} - {%- set str_list = [] -%} - {%- for json_path in json_path_list -%} - {%- if str_list.append(json_path.replace("'", "''")) -%} {%- endif -%} - {%- endfor -%} - {{ "'" ~ str_list|join("','") ~ "'" }} -{%- endmacro %} - -{% macro mysql__format_json_path(json_path_list) -%} - {# -- '$."x"."y"."z"' #} - {{ "'$.\"" ~ json_path_list|join(".") ~ "\"'" }} -{%- endmacro %} - -{% macro redshift__format_json_path(json_path_list) -%} - {%- set quote = '"' -%} - {%- set str_list = [] -%} - {%- for json_path in json_path_list -%} - {%- if str_list.append(json_path.replace(quote, quote + quote)) -%} {%- endif -%} - {%- endfor -%} - {{ quote ~ str_list|join(quote + "," + quote) ~ quote }} -{%- endmacro %} - -{% macro snowflake__format_json_path(json_path_list) -%} - {%- set str_list = [] -%} - {%- for json_path in json_path_list -%} - {%- if str_list.append(json_path.replace("'", "''").replace('"', '""')) -%} {%- endif -%} - {%- endfor -%} - {{ "'\"" ~ str_list|join('"."') ~ "\"'" }} -{%- endmacro %} - -{% macro sqlserver__format_json_path(json_path_list) -%} - {# -- '$."x"."y"."z"' #} - {%- set str_list = [] -%} - {%- for json_path in json_path_list -%} - {%- if str_list.append(json_path.replace("'", "''").replace('"', '\\"')) -%} {%- endif -%} - {%- endfor -%} - {{ "'$.\"" ~ str_list|join(".") ~ "\"'" }} -{%- endmacro %} - -{% macro clickhouse__format_json_path(json_path_list) -%} - {%- set str_list = [] -%} - {%- for json_path in json_path_list -%} - {%- if str_list.append(json_path.replace("'", "''").replace('"', '\\"')) -%} {%- endif -%} - {%- endfor -%} - {{ "'" ~ str_list|join("','") ~ "'" }} -{%- endmacro %} - -{% macro tidb__format_json_path(json_path_list) -%} - {# -- '$."x"."y"."z"' #} - {{ "'$.\"" ~ json_path_list|join(".") ~ "\"'" }} -{%- endmacro %} - -{% macro duckdb__format_json_path(json_path_list) -%} - {# -- '$."x"."y"."z"' #} - {{ "'$.\"" ~ json_path_list|join(".") ~ "\"'" }} -{%- endmacro %} - -{# json_extract ------------------------------------------------- #} - -{% macro json_extract(from_table, json_column, json_path_list, normalized_json_path) -%} - {{ adapter.dispatch('json_extract')(from_table, json_column, json_path_list, normalized_json_path) }} -{%- endmacro %} - -{% macro default__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%} - json_extract({{ from_table}}.{{ json_column }}, {{ format_json_path(json_path_list) }}) -{%- endmacro %} - -{% macro oracle__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%} - json_value({{ json_column }}, {{ format_json_path(normalized_json_path) }}) -{%- endmacro %} - -{% macro bigquery__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%} - {%- if from_table|string() == '' %} - json_extract({{ json_column }}, {{ format_json_path(normalized_json_path) }}) - {% else %} - json_extract({{ from_table}}.{{ json_column }}, {{ format_json_path(normalized_json_path) }}) - {% endif -%} -{%- endmacro %} - -{% macro postgres__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%} - {%- if from_table|string() == '' %} - jsonb_extract_path({{ json_column }}, {{ format_json_path(json_path_list) }}) - {% else %} - jsonb_extract_path({{ from_table }}.{{ json_column }}, {{ format_json_path(json_path_list) }}) - {% endif -%} -{%- endmacro %} - -{% macro mysql__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%} - {%- if from_table|string() == '' %} - json_extract({{ json_column }}, {{ format_json_path(normalized_json_path) }}) - {% else %} - json_extract({{ from_table }}.{{ json_column }}, {{ format_json_path(normalized_json_path) }}) - {% endif -%} -{%- endmacro %} - -{% macro redshift__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%} - {%- if from_table|string() != '' -%} - {%- set json_column = from_table|string() + "." + json_column|string() -%} - {%- endif -%} - case when {{ json_column }}.{{ format_json_path(json_path_list) }} != '' then {{ json_column }}.{{ format_json_path(json_path_list) }} end -{%- endmacro %} - -{% macro snowflake__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%} - {%- if from_table|string() == '' %} - get_path(parse_json({{ json_column }}), {{ format_json_path(json_path_list) }}) - {% else %} - get_path(parse_json({{ from_table }}.{{ json_column }}), {{ format_json_path(json_path_list) }}) - {% endif -%} -{%- endmacro %} - -{% macro sqlserver__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%} - json_query({{ json_column }}, {{ format_json_path(json_path_list) }}) -{%- endmacro %} - -{% macro clickhouse__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%} - {%- if from_table|string() == '' %} - JSONExtractRaw(assumeNotNull({{ json_column }}), {{ format_json_path(json_path_list) }}) - {% else %} - JSONExtractRaw(assumeNotNull({{ from_table }}.{{ json_column }}), {{ format_json_path(json_path_list) }}) - {% endif -%} -{%- endmacro %} - -{% macro tidb__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%} - {%- if from_table|string() == '' %} - json_extract({{ json_column }}, {{ format_json_path(normalized_json_path) }}) - {% else %} - json_extract({{ from_table }}.{{ json_column }}, {{ format_json_path(normalized_json_path) }}) - {% endif -%} -{%- endmacro %} - -{% macro duckdb__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%} - {%- if from_table|string() == '' %} - json_extract({{ json_column }}, {{ format_json_path(normalized_json_path) }}) - {% else %} - json_extract({{ from_table }}.{{ json_column }}, {{ format_json_path(normalized_json_path) }}) - {% endif -%} -{%- endmacro %} - -{# json_extract_scalar ------------------------------------------------- #} - -{% macro json_extract_scalar(json_column, json_path_list, normalized_json_path) -%} - {{ adapter.dispatch('json_extract_scalar')(json_column, json_path_list, normalized_json_path) }} -{%- endmacro %} - -{% macro default__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%} - json_extract_scalar({{ json_column }}, {{ format_json_path(json_path_list) }}) -{%- endmacro %} - -{% macro oracle__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%} - json_value({{ json_column }}, {{ format_json_path(normalized_json_path) }}) -{%- endmacro %} - -{% macro bigquery__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%} - json_extract_scalar({{ json_column }}, {{ format_json_path(normalized_json_path) }}) -{%- endmacro %} - -{% macro postgres__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%} - jsonb_extract_path_text({{ json_column }}, {{ format_json_path(json_path_list) }}) -{%- endmacro %} - -{% macro mysql__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%} - json_value({{ json_column }}, {{ format_json_path(normalized_json_path) }} RETURNING CHAR) -{%- endmacro %} - -{% macro redshift__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%} - case when {{ json_column }}.{{ format_json_path(json_path_list) }} != '' then {{ json_column }}.{{ format_json_path(json_path_list) }} end -{%- endmacro %} - -{% macro snowflake__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%} - to_varchar(get_path(parse_json({{ json_column }}), {{ format_json_path(json_path_list) }})) -{%- endmacro %} - -{% macro sqlserver__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%} - json_value({{ json_column }}, {{ format_json_path(json_path_list) }}) -{%- endmacro %} - -{% macro clickhouse__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%} - JSONExtractRaw(assumeNotNull({{ json_column }}), {{ format_json_path(json_path_list) }}) -{%- endmacro %} - -{% macro tidb__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%} - IF( - JSON_UNQUOTE(JSON_EXTRACT({{ json_column }}, {{ format_json_path(normalized_json_path) }})) = 'null', - NULL, - JSON_UNQUOTE(JSON_EXTRACT({{ json_column }}, {{ format_json_path(normalized_json_path) }})) - ) -{%- endmacro %} - -{% macro duckdb__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%} - json_extract_string({{ json_column }}, {{ format_json_path(json_path_list) }}) -{%- endmacro %} - -{# json_extract_array ------------------------------------------------- #} - -{% macro json_extract_array(json_column, json_path_list, normalized_json_path) -%} - {{ adapter.dispatch('json_extract_array')(json_column, json_path_list, normalized_json_path) }} -{%- endmacro %} - -{% macro default__json_extract_array(json_column, json_path_list, normalized_json_path) -%} - json_extract_array({{ json_column }}, {{ format_json_path(json_path_list) }}) -{%- endmacro %} - -{% macro oracle__json_extract_array(json_column, json_path_list, normalized_json_path) -%} - json_value({{ json_column }}, {{ format_json_path(normalized_json_path) }}) -{%- endmacro %} - -{% macro bigquery__json_extract_array(json_column, json_path_list, normalized_json_path) -%} - json_extract_array({{ json_column }}, {{ format_json_path(normalized_json_path) }}) -{%- endmacro %} - -{% macro postgres__json_extract_array(json_column, json_path_list, normalized_json_path) -%} - jsonb_extract_path({{ json_column }}, {{ format_json_path(json_path_list) }}) -{%- endmacro %} - -{% macro mysql__json_extract_array(json_column, json_path_list, normalized_json_path) -%} - json_extract({{ json_column }}, {{ format_json_path(normalized_json_path) }}) -{%- endmacro %} - -{% macro redshift__json_extract_array(json_column, json_path_list, normalized_json_path) -%} - {{ json_column }}.{{ format_json_path(json_path_list) }} -{%- endmacro %} - -{% macro snowflake__json_extract_array(json_column, json_path_list, normalized_json_path) -%} - get_path(parse_json({{ json_column }}), {{ format_json_path(json_path_list) }}) -{%- endmacro %} - -{% macro sqlserver__json_extract_array(json_column, json_path_list, normalized_json_path) -%} - json_query({{ json_column }}, {{ format_json_path(json_path_list) }}) -{%- endmacro %} - -{% macro clickhouse__json_extract_array(json_column, json_path_list, normalized_json_path) -%} - JSONExtractArrayRaw(assumeNotNull({{ json_column }}), {{ format_json_path(json_path_list) }}) -{%- endmacro %} - -{% macro tidb__json_extract_array(json_column, json_path_list, normalized_json_path) -%} - json_extract({{ json_column }}, {{ format_json_path(normalized_json_path) }}) -{%- endmacro %} - -{% macro duckdb__json_extract_array(json_column, json_path_list, normalized_json_path) -%} - json_extract({{ json_column }}, {{ format_json_path(normalized_json_path) }}) -{%- endmacro %} - -{# json_extract_string_array ------------------------------------------------- #} - -{% macro json_extract_string_array(json_column, json_path_list, normalized_json_path) -%} - {{ adapter.dispatch('json_extract_string_array')(json_column, json_path_list, normalized_json_path) }} -{%- endmacro %} - -{% macro default__json_extract_string_array(json_column, json_path_list, normalized_json_path) -%} - {{ json_extract_array(json_column, json_path_list, normalized_json_path) }} -{%- endmacro %} - -{# -See https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_extract_string_array - -BigQuery does not allow NULL entries in REPEATED fields, so we replace those with literal "NULL" strings. -#} -{% macro bigquery__json_extract_string_array(json_column, json_path_list, normalized_json_path) -%} - array( - select ifnull(x, "NULL") - from unnest(json_value_array({{ json_column }}, {{ bigquery_new_format_json_path(normalized_json_path) }})) as x - ) -{%- endmacro %} diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/quote.sql b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/quote.sql deleted file mode 100644 index 87862498cfc5f..0000000000000 --- a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/quote.sql +++ /dev/null @@ -1,16 +0,0 @@ -{# quote ---------------------------------- #} -{% macro quote(column_name) -%} - {{ adapter.dispatch('quote')(column_name) }} -{%- endmacro %} - -{% macro default__quote(column_name) -%} - adapter.quote(column_name) -{%- endmacro %} - -{% macro oracle__quote(column_name) -%} - {{ '\"' ~ column_name ~ '\"'}} -{%- endmacro %} - -{% macro clickhouse__quote(column_name) -%} - {{ '\"' ~ column_name ~ '\"'}} -{%- endmacro %} diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/surrogate_key.sql b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/surrogate_key.sql deleted file mode 100644 index 9de2965409aad..0000000000000 --- a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/surrogate_key.sql +++ /dev/null @@ -1,25 +0,0 @@ -{# surrogate_key ---------------------------------- #} - -{% macro oracle__surrogate_key(field_list) -%} - ora_hash( - {%- for field in field_list %} - {% if not loop.last %} - {{ field }} || '~' || - {% else %} - {{ field }} - {% endif %} - {%- endfor %} - ) -{%- endmacro %} - -{% macro clickhouse__surrogate_key(field_list) -%} - assumeNotNull(hex(MD5( - {%- for field in field_list %} - {% if not loop.last %} - toString({{ field }}) || '~' || - {% else %} - toString({{ field }}) - {% endif %} - {%- endfor %} - ))) -{%- endmacro %} diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/type_conversions.sql b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/type_conversions.sql deleted file mode 100644 index 90b2337ed3ba0..0000000000000 --- a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/type_conversions.sql +++ /dev/null @@ -1,105 +0,0 @@ - -{# boolean_to_string ------------------------------------------------- #} -{% macro boolean_to_string(boolean_column) -%} - {{ adapter.dispatch('boolean_to_string')(boolean_column) }} -{%- endmacro %} - -{% macro default__boolean_to_string(boolean_column) -%} - {{ boolean_column }} -{%- endmacro %} - -{% macro redshift__boolean_to_string(boolean_column) -%} - case when {{ boolean_column }} then 'true' else 'false' end -{%- endmacro %} - -{# array_to_string ------------------------------------------------- #} -{% macro array_to_string(array_column) -%} - {{ adapter.dispatch('array_to_string')(array_column) }} -{%- endmacro %} - -{% macro default__array_to_string(array_column) -%} - {{ array_column }} -{%- endmacro %} - -{% macro bigquery__array_to_string(array_column) -%} - array_to_string({{ array_column }}, "|", "") -{%- endmacro %} - -{% macro oracle__array_to_string(array_column) -%} - cast({{ array_column }} as varchar2(4000)) -{%- endmacro %} - -{% macro sqlserver__array_to_string(array_column) -%} - cast({{ array_column }} as {{dbt_utils.type_string()}}) -{%- endmacro %} - -{% macro redshift__array_to_string(array_column) -%} - json_serialize({{array_column}}) -{%- endmacro %} - -{# object_to_string ------------------------------------------------- #} -{% macro object_to_string(object_column) -%} - {{ adapter.dispatch('object_to_string')(object_column) }} -{%- endmacro %} - -{% macro default__object_to_string(object_column) -%} - {{ object_column }} -{%- endmacro %} - -{% macro redshift__object_to_string(object_column) -%} - json_serialize({{object_column}}) -{%- endmacro %} - -{# cast_to_boolean ------------------------------------------------- #} -{% macro cast_to_boolean(field) -%} - {{ adapter.dispatch('cast_to_boolean')(field) }} -{%- endmacro %} - -{% macro default__cast_to_boolean(field) -%} - cast({{ field }} as boolean) -{%- endmacro %} - -{# -- MySQL does not support cast function converting string directly to boolean (an alias of tinyint(1), https://dev.mysql.com/doc/refman/8.0/en/cast-functions.html#function_cast #} -{% macro mysql__cast_to_boolean(field) -%} - IF(lower({{ field }}) = 'true', true, false) -{%- endmacro %} - -{# TiDB does not support cast string to boolean #} -{% macro tidb__cast_to_boolean(field) -%} - IF(lower({{ field }}) = 'true', true, false) -{%- endmacro %} - -{% macro duckdb__cast_to_boolean(field) -%} - cast({{ field }} as boolean) -{%- endmacro %} - -{% macro redshift__cast_to_boolean(field) -%} - cast({{ field }} as boolean) -{%- endmacro %} - -{# -- MS SQL Server does not support converting string directly to boolean, it must be casted as bit #} -{% macro sqlserver__cast_to_boolean(field) -%} - cast({{ field }} as bit) -{%- endmacro %} - -{# -- ClickHouse does not support converting string directly to Int8, it must go through int first #} -{% macro clickhouse__cast_to_boolean(field) -%} - IF(lower({{ field }}) = 'true', 1, 0) -{%- endmacro %} - -{# empty_string_to_null ------------------------------------------------- #} -{% macro empty_string_to_null(field) -%} - {{ return(adapter.dispatch('empty_string_to_null')(field)) }} -{%- endmacro %} - -{%- macro default__empty_string_to_null(field) -%} - nullif({{ field }}, '') -{%- endmacro %} - -{%- macro duckdb__empty_string_to_null(field) -%} - nullif(nullif({{ field }}, 'null'), '') -{%- endmacro %} - -{%- macro redshift__empty_string_to_null(field) -%} - nullif({{ field }}::varchar, '') -{%- endmacro %} diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/get_custom_schema.sql b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/get_custom_schema.sql deleted file mode 100644 index 77e83c7acd48f..0000000000000 --- a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/get_custom_schema.sql +++ /dev/null @@ -1,4 +0,0 @@ --- see https://docs.getdbt.com/docs/building-a-dbt-project/building-models/using-custom-schemas/#an-alternative-pattern-for-generating-schema-names -{% macro generate_schema_name(custom_schema_name, node) -%} - {{ generate_schema_name_for_env(custom_schema_name, node) }} -{%- endmacro %} diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/incremental.sql b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/incremental.sql deleted file mode 100644 index f3f4c12d75df6..0000000000000 --- a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/incremental.sql +++ /dev/null @@ -1,61 +0,0 @@ -{# - These macros control how incremental models are updated in Airbyte's normalization step - - get_max_normalized_cursor retrieve the value of the last normalized data - - incremental_clause controls the predicate to filter on new data to process incrementally -#} - -{% macro incremental_clause(col_emitted_at, tablename) -%} - {{ adapter.dispatch('incremental_clause')(col_emitted_at, tablename) }} -{%- endmacro %} - -{%- macro default__incremental_clause(col_emitted_at, tablename) -%} -{% if is_incremental() %} -and coalesce( - cast({{ col_emitted_at }} as {{ type_timestamp_with_timezone() }}) > (select max(cast({{ col_emitted_at }} as {{ type_timestamp_with_timezone() }})) from {{ tablename }}), - {# -- if {{ col_emitted_at }} is NULL in either table, the previous comparison would evaluate to NULL, #} - {# -- so we coalesce and make sure the row is always returned for incremental processing instead #} - true) -{% endif %} -{%- endmacro -%} - -{# -- see https://on-systems.tech/113-beware-dbt-incremental-updates-against-snowflake-external-tables/ #} -{%- macro snowflake__incremental_clause(col_emitted_at, tablename) -%} -{% if is_incremental() %} - {% if get_max_normalized_cursor(col_emitted_at, tablename) %} -and cast({{ col_emitted_at }} as {{ type_timestamp_with_timezone() }}) > - cast('{{ get_max_normalized_cursor(col_emitted_at, tablename) }}' as {{ type_timestamp_with_timezone() }}) - {% endif %} -{% endif %} -{%- endmacro -%} - -{# -- see https://cloud.google.com/bigquery/docs/querying-partitioned-tables#best_practices_for_partition_pruning #} -{%- macro bigquery__incremental_clause(col_emitted_at, tablename) -%} -{% if is_incremental() %} - {% if get_max_normalized_cursor(col_emitted_at, tablename) %} -and cast({{ col_emitted_at }} as {{ type_timestamp_with_timezone() }}) > - cast('{{ get_max_normalized_cursor(col_emitted_at, tablename) }}' as {{ type_timestamp_with_timezone() }}) - {% endif %} -{% endif %} -{%- endmacro -%} - -{%- macro sqlserver__incremental_clause(col_emitted_at, tablename) -%} -{% if is_incremental() %} -and ((select max(cast({{ col_emitted_at }} as {{ type_timestamp_with_timezone() }})) from {{ tablename }}) is null - or cast({{ col_emitted_at }} as {{ type_timestamp_with_timezone() }}) > - (select max(cast({{ col_emitted_at }} as {{ type_timestamp_with_timezone() }})) from {{ tablename }})) -{% endif %} -{%- endmacro -%} - -{% macro get_max_normalized_cursor(col_emitted_at, tablename) %} -{% if execute and is_incremental() %} - {% if env_var('INCREMENTAL_CURSOR', 'UNSET') == 'UNSET' %} - {% set query %} - select max(cast({{ col_emitted_at }} as {{ type_timestamp_with_timezone() }})) from {{ tablename }} - {% endset %} - {% set max_cursor = run_query(query).columns[0][0] %} - {% do return(max_cursor) %} - {% else %} - {% do return(env_var('INCREMENTAL_CURSOR')) %} - {% endif %} -{% endif %} -{% endmacro %} diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/schema_tests/equal_rowcount.sql b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/schema_tests/equal_rowcount.sql deleted file mode 100644 index 0dd4dc62000e4..0000000000000 --- a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/schema_tests/equal_rowcount.sql +++ /dev/null @@ -1,34 +0,0 @@ -{% macro oracle__test_equal_rowcount(model, compare_model) %} - -{#-- Needs to be set at parse time, before we return '' below --#} -{{ config(fail_calc = 'coalesce(diff_count, 0)') }} - -{#-- Prevent querying of db in parsing mode. This works because this macro does not create any new refs. #} -{%- if not execute -%} - {{ return('') }} -{% endif %} - -with a as ( - - select count(*) as count_a from {{ model }} - -), -b as ( - - select count(*) as count_b from {{ compare_model }} - -), -final as ( - - select - count_a, - count_b, - abs(count_a - count_b) as diff_count - from a - cross join b - -) - -select diff_count from final - -{% endmacro %} diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/schema_tests/equality.sql b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/schema_tests/equality.sql deleted file mode 100644 index ef83a024f479f..0000000000000 --- a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/schema_tests/equality.sql +++ /dev/null @@ -1,107 +0,0 @@ -{# --- Adapted from https://github.com/dbt-labs/dbt-utils/blob/0-19-0-updates/macros/schema_tests/equality.sql --- dbt-utils version: 0.6.4 --- This macro needs to be updated accordingly when dbt-utils is upgraded. --- This is needed because MySQL does not support the EXCEPT operator! -#} - -{% macro mysql__test_equality(model, compare_model, compare_columns=None) %} - - {%- if not execute -%} - {{ return('') }} - {% endif %} - - {%- do dbt_utils._is_relation(model, 'test_equality') -%} - - {%- if not compare_columns -%} - {%- do dbt_utils._is_ephemeral(model, 'test_equality') -%} - {%- set compare_columns = adapter.get_columns_in_relation(model) | map(attribute='quoted') -%} - {%- endif -%} - - {% set compare_cols_csv = compare_columns | join(', ') %} - - with a as ( - select * from {{ model }} - ), - - b as ( - select * from {{ compare_model }} - ), - - a_minus_b as ( - select {{ compare_cols_csv }} from a - where ({{ compare_cols_csv }}) not in - (select {{ compare_cols_csv }} from b) - ), - - b_minus_a as ( - select {{ compare_cols_csv }} from b - where ({{ compare_cols_csv }}) not in - (select {{ compare_cols_csv }} from a) - ), - - unioned as ( - select * from a_minus_b - union all - select * from b_minus_a - ), - - final as ( - select (select count(*) from unioned) + - (select abs( - (select count(*) from a_minus_b) - - (select count(*) from b_minus_a) - )) - as count - ) - - select count from final - -{% endmacro %} - -{% macro oracle__test_equality(model) %} - {#-- Prevent querying of db in parsing mode. This works because this macro does not create any new refs. #} - {%- if not execute -%} - {{ return('') }} - {% endif %} - - -- setup - {%- do dbt_utils._is_relation(model, 'test_equality') -%} - - {#- - If the compare_cols arg is provided, we can run this test without querying the - information schema — this allows the model to be an ephemeral model - -#} - {%- set compare_columns = kwargs.get('compare_columns', None) -%} - - {%- if not compare_columns -%} - {%- do dbt_utils._is_ephemeral(model, 'test_equality') -%} - {%- set compare_columns = adapter.get_columns_in_relation(model) | map(attribute='quoted') -%} - {%- endif -%} - - {% set compare_model = kwargs.get('compare_model', kwargs.get('arg')) %} - {% set compare_cols_csv = compare_columns | join(', ') %} - - with a as ( - select * from {{ model }} - ), - b as ( - select * from {{ compare_model }} - ), - a_minus_b as ( - select {{compare_cols_csv}} from a - {{ dbt_utils.except() }} - select {{compare_cols_csv}} from b - ), - b_minus_a as ( - select {{compare_cols_csv}} from b - {{ dbt_utils.except() }} - select {{compare_cols_csv}} from a - ), - unioned as ( - select * from a_minus_b - union all - select * from b_minus_a - ) - select count(*) from unioned -{% endmacro %} diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/should_full_refresh.sql b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/should_full_refresh.sql deleted file mode 100644 index ff2c6d54ecce3..0000000000000 --- a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/should_full_refresh.sql +++ /dev/null @@ -1,51 +0,0 @@ -{# - This overrides the behavior of the macro `should_full_refresh` so full refresh are triggered if: - - the dbt cli is run with --full-refresh flag or the model is configured explicitly to full_refresh - - the column _airbyte_ab_id does not exists in the normalized tables and make sure it is well populated. -#} - -{%- macro need_full_refresh(col_ab_id, target_table=this) -%} - {%- if not execute -%} - {{ return(false) }} - {%- endif -%} - {%- set found_column = [] %} - {%- set cols = adapter.get_columns_in_relation(target_table) -%} - {%- for col in cols -%} - {%- if col.column == col_ab_id -%} - {% do found_column.append(col.column) %} - {%- endif -%} - {%- endfor -%} - {%- if found_column -%} - {{ return(false) }} - {%- else -%} - {{ dbt_utils.log_info(target_table ~ "." ~ col_ab_id ~ " does not exist yet. The table will be created or rebuilt with dbt.full_refresh") }} - {{ return(true) }} - {%- endif -%} -{%- endmacro -%} - -{%- macro should_full_refresh() -%} - {% set config_full_refresh = config.get('full_refresh') %} - {%- if config_full_refresh is none -%} - {% set config_full_refresh = flags.FULL_REFRESH %} - {%- endif -%} - {%- if not config_full_refresh -%} - {% set config_full_refresh = need_full_refresh(get_col_ab_id(), this) %} - {%- endif -%} - {% do return(config_full_refresh) %} -{%- endmacro -%} - -{%- macro get_col_ab_id() -%} - {{ adapter.dispatch('get_col_ab_id')() }} -{%- endmacro -%} - -{%- macro default__get_col_ab_id() -%} - _airbyte_ab_id -{%- endmacro -%} - -{%- macro oracle__get_col_ab_id() -%} - "_AIRBYTE_AB_ID" -{%- endmacro -%} - -{%- macro snowflake__get_col_ab_id() -%} - _AIRBYTE_AB_ID -{%- endmacro -%} diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/star_intersect.sql b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/star_intersect.sql deleted file mode 100644 index 3f3d06c4eb106..0000000000000 --- a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/star_intersect.sql +++ /dev/null @@ -1,46 +0,0 @@ -{# - Similar to the star macro here: https://github.com/dbt-labs/dbt-utils/blob/main/macros/sql/star.sql - - This star_intersect macro takes an additional 'intersect' relation as argument. - Its behavior is to select columns from both 'intersect' and 'from' relations with the following rules: - - if the columns are existing in both 'from' and the 'intersect' relations, then the column from 'intersect' is used - - if it's not in the both relation, then only the column in the 'from' relation is used -#} -{% macro star_intersect(from, intersect, from_alias=False, intersect_alias=False, except=[]) -%} - {%- do dbt_utils._is_relation(from, 'star_intersect') -%} - {%- do dbt_utils._is_ephemeral(from, 'star_intersect') -%} - {%- do dbt_utils._is_relation(intersect, 'star_intersect') -%} - {%- do dbt_utils._is_ephemeral(intersect, 'star_intersect') -%} - - {#-- Prevent querying of db in parsing mode. This works because this macro does not create any new refs. #} - {%- if not execute -%} - {{ return('') }} - {% endif %} - - {%- set include_cols = [] %} - {%- set cols = adapter.get_columns_in_relation(from) -%} - {%- set except = except | map("lower") | list %} - {%- for col in cols -%} - {%- if col.column|lower not in except -%} - {% do include_cols.append(col.column) %} - {%- endif %} - {%- endfor %} - - {%- set include_intersect_cols = [] %} - {%- set intersect_cols = adapter.get_columns_in_relation(intersect) -%} - {%- for col in intersect_cols -%} - {%- if col.column|lower not in except -%} - {% do include_intersect_cols.append(col.column) %} - {%- endif %} - {%- endfor %} - - {%- for col in include_cols %} - {%- if col in include_intersect_cols -%} - {%- if intersect_alias %}{{ intersect_alias }}.{% else %}{%- endif -%}{{ adapter.quote(col)|trim }} - {%- if not loop.last %},{{ '\n ' }}{% endif %} - {%- else %} - {%- if from_alias %}{{ from_alias }}.{% else %}{{ from }}.{%- endif -%}{{ adapter.quote(col)|trim }} as {{ adapter.quote(col)|trim }} - {%- if not loop.last %},{{ '\n ' }}{% endif %} - {%- endif %} - {%- endfor -%} -{%- endmacro %} diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template/packages.yml b/airbyte-integrations/bases/base-normalization/dbt-project-template/packages.yml deleted file mode 100755 index 33b4edd58c8c6..0000000000000 --- a/airbyte-integrations/bases/base-normalization/dbt-project-template/packages.yml +++ /dev/null @@ -1,5 +0,0 @@ -# add dependencies. these will get pulled during the `dbt deps` process. - -packages: - - git: "https://github.com/fishtown-analytics/dbt-utils.git" - revision: 0.8.2 diff --git a/airbyte-integrations/bases/base-normalization/dbt.Dockerfile b/airbyte-integrations/bases/base-normalization/dbt.Dockerfile deleted file mode 100644 index 09b0e3c94064a..0000000000000 --- a/airbyte-integrations/bases/base-normalization/dbt.Dockerfile +++ /dev/null @@ -1,3 +0,0 @@ -# This dockerfile only exists to pull and re-export this image converted to the local arch of this machine -# It is then consumed by the Dockerfile in this direcotry as "fishtownanalytics/dbt:1.0.0-dev" -FROM fishtownanalytics/dbt:1.0.0 \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/docker-compose.build.yaml b/airbyte-integrations/bases/base-normalization/docker-compose.build.yaml deleted file mode 100644 index c9b9331f3e295..0000000000000 --- a/airbyte-integrations/bases/base-normalization/docker-compose.build.yaml +++ /dev/null @@ -1,66 +0,0 @@ -version: "3.7" - -services: - normalization: - image: airbyte/normalization:${VERSION} - build: - dockerfile: Dockerfile - context: . - labels: - io.airbyte.git-revision: ${GIT_REVISION} - normalization-mssql: - image: airbyte/normalization-mssql:${VERSION} - build: - dockerfile: mssql.Dockerfile - context: . - labels: - io.airbyte.git-revision: ${GIT_REVISION} - normalization-mysql: - image: airbyte/normalization-mysql:${VERSION} - build: - dockerfile: mysql.Dockerfile - context: . - labels: - io.airbyte.git-revision: ${GIT_REVISION} - normalization-oracle: - image: airbyte/normalization-oracle:${VERSION} - build: - dockerfile: oracle.Dockerfile - context: . - labels: - io.airbyte.git-revision: ${GIT_REVISION} - normalization-clickhouse: - image: airbyte/normalization-clickhouse:${VERSION} - build: - dockerfile: clickhouse.Dockerfile - context: . - labels: - io.airbyte.git-revision: ${GIT_REVISION} - normalization-snowflake: - image: airbyte/normalization-snowflake:${VERSION} - build: - dockerfile: snowflake.Dockerfile - context: . - labels: - io.airbyte.git-revision: ${GIT_REVISION} - normalization-redshift: - image: airbyte/normalization-redshift:${VERSION} - build: - dockerfile: redshift.Dockerfile - context: . - labels: - io.airbyte.git-revision: ${GIT_REVISION} - normalization-tidb: - image: airbyte/normalization-tidb:${VERSION} - build: - dockerfile: tidb.Dockerfile - context: . - labels: - io.airbyte.git-revision: ${GIT_REVISION} - normalization-duckdb: - image: airbyte/normalization-duckdb:${VERSION} - build: - dockerfile: duckdb.Dockerfile - context: . - labels: - io.airbyte.git-revision: ${GIT_REVISION} diff --git a/airbyte-integrations/bases/base-normalization/docker-compose.yaml b/airbyte-integrations/bases/base-normalization/docker-compose.yaml deleted file mode 100644 index 3b85f9bf0e9ec..0000000000000 --- a/airbyte-integrations/bases/base-normalization/docker-compose.yaml +++ /dev/null @@ -1,22 +0,0 @@ -version: "3.7" - -# this file only exists so that we can easily check that all of these images exist in docker hub in check_images_exist.sh -services: - normalization: - image: airbyte/normalization:${VERSION} - normalization-mssql: - image: airbyte/normalization-mssql:${VERSION} - normalization-mysql: - image: airbyte/normalization-mysql:${VERSION} - normalization-oracle: - image: airbyte/normalization-oracle:${VERSION} - normalization-clickhouse: - image: airbyte/normalization-clickhouse:${VERSION} - normalization-snowflake: - image: airbyte/normalization-snowflake:${VERSION} - normalization-redshift: - image: airbyte/normalization-redshift:${VERSION} - normalization-tidb: - image: airbyte/normalization-tidb:${VERSION} - normalization-duckdb: - image: airbyte/normalization-duckdb:${VERSION} diff --git a/airbyte-integrations/bases/base-normalization/duckdb.Dockerfile b/airbyte-integrations/bases/base-normalization/duckdb.Dockerfile deleted file mode 100644 index af039e7114ecd..0000000000000 --- a/airbyte-integrations/bases/base-normalization/duckdb.Dockerfile +++ /dev/null @@ -1,40 +0,0 @@ -FROM fishtownanalytics/dbt:1.0.0 -COPY --from=airbyte/base-airbyte-protocol-python:0.1.1 /airbyte /airbyte - -# Install SSH Tunneling dependencies -RUN apt-get update && apt-get install -y jq sshpass - -WORKDIR /airbyte -COPY entrypoint.sh . -COPY build/sshtunneling.sh . - -WORKDIR /airbyte/normalization_code -COPY normalization ./normalization -COPY setup.py . -COPY dbt-project-template/ ./dbt-template/ - -# Install python dependencies -WORKDIR /airbyte/base_python_structs - -# workaround for https://github.com/yaml/pyyaml/issues/601 -# this should be fixed in the airbyte/base-airbyte-protocol-python image -RUN pip install "Cython<3.0" "pyyaml==5.4" --no-build-isolation - -RUN pip install . - -WORKDIR /airbyte/normalization_code -RUN pip install . -RUN pip install dbt-duckdb==1.0.1 - -#adding duckdb manually (outside of setup.py - lots of errors) -RUN pip install duckdb - -WORKDIR /airbyte/normalization_code/dbt-template/ -# Download external dbt dependencies -RUN dbt deps - -WORKDIR /airbyte -ENV AIRBYTE_ENTRYPOINT "/airbyte/entrypoint.sh" -ENTRYPOINT ["/airbyte/entrypoint.sh"] - -LABEL io.airbyte.name=airbyte/normalization-duckdb diff --git a/airbyte-integrations/bases/base-normalization/entrypoint.sh b/airbyte-integrations/bases/base-normalization/entrypoint.sh deleted file mode 100755 index a1df178483c27..0000000000000 --- a/airbyte-integrations/bases/base-normalization/entrypoint.sh +++ /dev/null @@ -1,160 +0,0 @@ -#!/usr/bin/env bash - -set -e # tells bash, in a script, to exit whenever anything returns a non-zero return value. - -function echo2() { - echo >&2 "$@" -} - -function error() { - echo2 "$@" - exit 1 -} - -function config_cleanup() { - # Remove config file as it might still contain sensitive credentials (for example, - # injected OAuth Parameters should not be visible to custom docker images running custom transformation operations) - rm -f "${CONFIG_FILE}" -} - -function check_dbt_event_buffer_size() { - ret=0 - dbt --help | grep -E -- '--event-buffer-size' && return - ret=1 -} - -PROJECT_DIR=$(pwd) - -# How many commits should be downloaded from git to view history of a branch -GIT_HISTORY_DEPTH=5 - -# This function produces a working DBT project folder at the $PROJECT_DIR path so that dbt commands can be run -# from it successfully with the proper credentials. This can be accomplished by providing different custom variables -# to tweak the final project structure. For example, we can either use a user-provided base folder (git repo) or -# use the standard/base template folder to generate normalization models from. -function configuredbt() { - # We first need to generate a workspace folder for a dbt project to run from: - if [[ -z "${GIT_REPO}" ]]; then - # No git repository provided, use the dbt-template folder (shipped inside normalization docker image) - # as the base folder for dbt workspace - cp -r /airbyte/normalization_code/dbt-template/* "${PROJECT_DIR}" - echo "Running: transform-config --config ${CONFIG_FILE} --integration-type ${INTEGRATION_TYPE} --out ${PROJECT_DIR}" - set +e # allow script to continue running even if next commands fail to run properly - # Generate a profiles.yml file for the selected destination/integration type - transform-config --config "${CONFIG_FILE}" --integration-type "${INTEGRATION_TYPE}" --out "${PROJECT_DIR}" - if [[ -n "${CATALOG_FILE}" ]]; then - # If catalog file is provided, generate normalization models, otherwise skip it - echo "Running: transform-catalog --integration-type ${INTEGRATION_TYPE} --profile-config-dir ${PROJECT_DIR} --catalog ${CATALOG_FILE} --out ${PROJECT_DIR}/models/generated/ --json-column _airbyte_data" - transform-catalog --integration-type "${INTEGRATION_TYPE}" --profile-config-dir "${PROJECT_DIR}" --catalog "${CATALOG_FILE}" --out "${PROJECT_DIR}/models/generated/" --json-column "_airbyte_data" - TRANSFORM_EXIT_CODE=$? - if [ ${TRANSFORM_EXIT_CODE} -ne 0 ]; then - echo -e "\nShowing destination_catalog.json to diagnose/debug errors (${TRANSFORM_EXIT_CODE}):\n" - cat "${CATALOG_FILE}" | jq - exit ${TRANSFORM_EXIT_CODE} - fi - fi - set -e # tells bash, in a script, to exit whenever anything returns a non-zero return value. - else - trap config_cleanup EXIT - # Use git repository as a base workspace folder for dbt projects - if [[ -d git_repo ]]; then - rm -rf git_repo - fi - # Make a shallow clone of the latest git repository in the workspace folder - if [[ -z "${GIT_BRANCH}" ]]; then - # Checkout a particular branch from the git repository - echo "Running: git clone --depth ${GIT_HISTORY_DEPTH} --single-branch \$GIT_REPO git_repo" - git clone --depth ${GIT_HISTORY_DEPTH} --single-branch "${GIT_REPO}" git_repo - else - # No git branch specified, use the default branch of the git repository - echo "Running: git clone --depth ${GIT_HISTORY_DEPTH} -b ${GIT_BRANCH} --single-branch \$GIT_REPO git_repo" - git clone --depth ${GIT_HISTORY_DEPTH} -b "${GIT_BRANCH}" --single-branch "${GIT_REPO}" git_repo - fi - # Print few history logs to make it easier for users to verify the right code version has been checked out from git - echo "Last 5 commits in git_repo:" - (cd git_repo; git log --oneline -${GIT_HISTORY_DEPTH}; cd -) - # Generate a profiles.yml file for the selected destination/integration type - echo "Running: transform-config --config ${CONFIG_FILE} --integration-type ${INTEGRATION_TYPE} --out ${PROJECT_DIR}" - transform-config --config "${CONFIG_FILE}" --integration-type "${INTEGRATION_TYPE}" --out "${PROJECT_DIR}" - config_cleanup - fi -} - -## todo: make it easy to select source or destination and validate based on selection by adding an integration type env variable. -function main() { - CMD="$1" - shift 1 || error "command not specified." - - while [ $# -ne 0 ]; do - case "$1" in - --config) - CONFIG_FILE="$2" - shift 2 - ;; - --catalog) - CATALOG_FILE="$2" - shift 2 - ;; - --integration-type) - INTEGRATION_TYPE="$2" - shift 2 - ;; - --git-repo) - GIT_REPO="$2" - shift 2 - ;; - --git-branch) - GIT_BRANCH="$2" - shift 2 - ;; - *) - error "Unknown option: $1" - ;; - esac - done - - case "$CMD" in - run) - configuredbt - . /airbyte/sshtunneling.sh - openssh "${PROJECT_DIR}/ssh.json" - trap 'closessh' EXIT - - set +e # allow script to continue running even if next commands fail to run properly - # We don't run dbt 1.0.x on all destinations (because their plugins don't support it yet) - # So we need to only pass `--event-buffer-size` if it's supported by DBT. - # Same goes for JSON formatted logging. - check_dbt_event_buffer_size - if [ "$ret" -eq 0 ]; then - echo -e "\nDBT >=1.0.0 detected; using 10K event buffer size\n" - dbt_additional_args="--event-buffer-size=10000 --log-format json" - else - dbt_additional_args="" - fi - - # Run dbt to compile and execute the generated normalization models - dbt ${dbt_additional_args} run --profiles-dir "${PROJECT_DIR}" --project-dir "${PROJECT_DIR}" - DBT_EXIT_CODE=$? - if [ ${DBT_EXIT_CODE} -ne 0 ]; then - echo -e "\nDiagnosing dbt debug to check if destination is available for dbt and well configured (${DBT_EXIT_CODE}):\n" - dbt debug --profiles-dir "${PROJECT_DIR}" --project-dir "${PROJECT_DIR}" - DBT_DEBUG_EXIT_CODE=$? - if [ ${DBT_DEBUG_EXIT_CODE} -eq 0 ]; then - # dbt debug is successful, so the error must be somewhere else... - echo -e "\nForward dbt output logs to diagnose/debug errors (${DBT_DEBUG_EXIT_CODE}):\n" - cat "${PROJECT_DIR}/../logs/dbt.log" - fi - fi - closessh - exit ${DBT_EXIT_CODE} - ;; - configure-dbt) - configuredbt - ;; - *) - error "Unknown command: $CMD" - ;; - esac -} - -main "$@" diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/__init__.py b/airbyte-integrations/bases/base-normalization/integration_tests/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/dbt_integration_test.py b/airbyte-integrations/bases/base-normalization/integration_tests/dbt_integration_test.py deleted file mode 100644 index b70b9248eac19..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/dbt_integration_test.py +++ /dev/null @@ -1,740 +0,0 @@ -# -# Copyright (c) 2023 Airbyte, Inc., all rights reserved. -# - - -import json -import os -import pathlib -import random -import re -import socket -import string -import subprocess -import sys -import threading -import time -from copy import copy -from typing import Any, Callable, Dict, List, Union - -import yaml -from normalization.destination_type import DestinationType -from normalization.transform_catalog.transform import read_yaml_config, write_yaml_config -from normalization.transform_config.transform import TransformConfig - -NORMALIZATION_TEST_TARGET = "NORMALIZATION_TEST_TARGET" -NORMALIZATION_TEST_MSSQL_DB_PORT = "NORMALIZATION_TEST_MSSQL_DB_PORT" -NORMALIZATION_TEST_MYSQL_DB_PORT = "NORMALIZATION_TEST_MYSQL_DB_PORT" -NORMALIZATION_TEST_POSTGRES_DB_PORT = "NORMALIZATION_TEST_POSTGRES_DB_PORT" -NORMALIZATION_TEST_CLICKHOUSE_DB_PORT = "NORMALIZATION_TEST_CLICKHOUSE_DB_PORT" -NORMALIZATION_TEST_TIDB_DB_PORT = "NORMALIZATION_TEST_TIDB_DB_PORT" -NORMALIZATION_TEST_DUCKDB_DESTINATION_PATH = "NORMALIZATION_TEST_DUCKDB_DESTINATION_PATH" - - -class DbtIntegrationTest(object): - def __init__(self): - self.target_schema = "test_normalization" - self.container_prefix = f"test_normalization_db_{self.random_string(3)}" - self.db_names = [] - - @staticmethod - def generate_random_string(prefix: str) -> str: - return prefix + DbtIntegrationTest.random_string(5) - - @staticmethod - def random_string(length: int) -> str: - return "".join(random.choice(string.ascii_lowercase) for i in range(length)) - - def set_target_schema(self, target_schema: str): - self.target_schema = target_schema - - def setup_db(self, destinations_to_test: List[str]): - if DestinationType.POSTGRES.value in destinations_to_test: - self.setup_postgres_db() - if DestinationType.MYSQL.value in destinations_to_test: - self.setup_mysql_db() - if DestinationType.MSSQL.value in destinations_to_test: - self.setup_mssql_db() - if DestinationType.CLICKHOUSE.value in destinations_to_test: - self.setup_clickhouse_db() - if DestinationType.TIDB.value in destinations_to_test: - self.setup_tidb_db() - - def setup_postgres_db(self): - start_db = True - if os.getenv(NORMALIZATION_TEST_POSTGRES_DB_PORT): - port = int(os.getenv(NORMALIZATION_TEST_POSTGRES_DB_PORT)) - start_db = False - else: - port = self.find_free_port() - config = { - "host": "localhost", - "username": "integration-tests", - "password": "integration-tests", - "port": port, - "database": "postgres", - "schema": self.target_schema, - } - if start_db: - self.db_names.append("postgres") - print("Starting localhost postgres container for tests") - commands = [ - "docker", - "run", - "--rm", - "--name", - f"{self.container_prefix}_postgres", - "-e", - f"POSTGRES_USER={config['username']}", - "-e", - f"POSTGRES_PASSWORD={config['password']}", - "-p", - f"{config['port']}:5432", - "-d", - "marcosmarxm/postgres-ssl:dev", - "-c", - "ssl=on", - "-c", - "ssl_cert_file=/var/lib/postgresql/server.crt", - "-c", - "ssl_key_file=/var/lib/postgresql/server.key", - ] - print("Executing: ", " ".join(commands)) - subprocess.call(commands) - print("....Waiting for Postgres DB to start...15 sec") - time.sleep(15) - if not os.path.exists("../secrets"): - os.makedirs("../secrets") - with open("../secrets/postgres.json", "w") as fh: - fh.write(json.dumps(config)) - - def setup_mysql_db(self): - start_db = True - if os.getenv(NORMALIZATION_TEST_MYSQL_DB_PORT): - port = int(os.getenv(NORMALIZATION_TEST_MYSQL_DB_PORT)) - start_db = False - else: - port = self.find_free_port() - config = { - "host": "localhost", - "port": port, - "database": self.target_schema, - "username": "root", - "password": "", - } - if start_db: - self.db_names.append("mysql") - print("Starting localhost mysql container for tests") - commands = [ - "docker", - "run", - "--rm", - "--name", - f"{self.container_prefix}_mysql", - "-e", - "MYSQL_ALLOW_EMPTY_PASSWORD=yes", - "-e", - "MYSQL_INITDB_SKIP_TZINFO=yes", - "-e", - f"MYSQL_DATABASE={config['database']}", - "-e", - "MYSQL_ROOT_HOST=%", - "-p", - f"{config['port']}:3306", - "-d", - "mysql/mysql-server", - ] - print("Executing: ", " ".join(commands)) - subprocess.call(commands) - print("....Waiting for MySQL DB to start...15 sec") - time.sleep(15) - if not os.path.exists("../secrets"): - os.makedirs("../secrets") - with open("../secrets/mysql.json", "w") as fh: - fh.write(json.dumps(config)) - - def setup_mssql_db(self): - start_db = True - if os.getenv(NORMALIZATION_TEST_MSSQL_DB_PORT): - port = int(os.getenv(NORMALIZATION_TEST_MSSQL_DB_PORT)) - start_db = False - else: - port = self.find_free_port() - config = { - "host": "localhost", - "username": "SA", - "password": "MyStr0ngP@ssw0rd", - "port": port, - "database": self.target_schema, - "schema": self.target_schema, - } - if start_db: - self.db_names.append("mssql") - print("Starting localhost MS SQL Server container for tests") - command_start_container = [ - "docker", - "run", - "--rm", - "--name", - f"{self.container_prefix}_mssql", - "-h", - f"{self.container_prefix}_mssql", - "-e", - "ACCEPT_EULA='Y'", - "-e", - f"SA_PASSWORD='{config['password']}'", - "-e", - "MSSQL_PID='Standard'", - "-p", - f"{config['port']}:1433", - "-d", - "mcr.microsoft.com/mssql/server:2019-GA-ubuntu-16.04", - ] - # cmds & parameters - cmd_start_container = " ".join(command_start_container) - wait_sec = 30 - # run the docker container - print("Executing: ", cmd_start_container) - subprocess.check_call(cmd_start_container, shell=True) - # wait for service is available - print(f"....Waiting for MS SQL Server to start...{wait_sec} sec") - time.sleep(wait_sec) - # Run additional commands to prepare the table - command_create_db = [ - "docker", - "exec", - f"{self.container_prefix}_mssql", - "/opt/mssql-tools/bin/sqlcmd", - "-S", - config["host"], - "-U", - config["username"], - "-P", - config["password"], - "-Q", - f"CREATE DATABASE [{config['database']}]", - ] - # create test db - print("Executing: ", " ".join(command_create_db)) - subprocess.call(command_create_db) - if not os.path.exists("../secrets"): - os.makedirs("../secrets") - with open("../secrets/mssql.json", "w") as fh: - fh.write(json.dumps(config)) - - def setup_clickhouse_db(self): - """ - ClickHouse official JDBC driver uses HTTP port 8123. - - Ref: https://altinity.com/blog/2019/3/15/clickhouse-networking-part-1 - """ - start_db = True - port = 8123 - if os.getenv(NORMALIZATION_TEST_CLICKHOUSE_DB_PORT): - port = int(os.getenv(NORMALIZATION_TEST_CLICKHOUSE_DB_PORT)) - start_db = False - if start_db: - port = self.find_free_port() - config = { - "host": "localhost", - "port": port, - "database": self.target_schema, - "username": "default", - "password": "", - "ssl": False, - } - if start_db: - self.db_names.append("clickhouse") - print("Starting localhost clickhouse container for tests") - commands = [ - "docker", - "run", - "--rm", - "--name", - f"{self.container_prefix}_clickhouse", - "--ulimit", - "nofile=262144:262144", - "-p", - f"{config['port']}:8123", # clickhouse JDBC driver use HTTP port - "-d", - # so far, only the latest version ClickHouse server image turned on - # window functions - "clickhouse/clickhouse-server:latest", - ] - print("Executing: ", " ".join(commands)) - subprocess.call(commands) - print("....Waiting for ClickHouse DB to start...15 sec") - time.sleep(15) - # Run additional commands to prepare the table - command_create_db = [ - "docker", - "run", - "--rm", - "--link", - f"{self.container_prefix}_clickhouse:clickhouse-server", - "clickhouse/clickhouse-client:21.8.10.19", - "--host", - "clickhouse-server", - "--query", - f"CREATE DATABASE IF NOT EXISTS {config['database']}", - ] - # create test db - print("Executing: ", " ".join(command_create_db)) - subprocess.call(command_create_db) - if not os.path.exists("../secrets"): - os.makedirs("../secrets") - with open("../secrets/clickhouse.json", "w") as fh: - fh.write(json.dumps(config)) - - def setup_tidb_db(self): - start_db = True - if os.getenv(NORMALIZATION_TEST_TIDB_DB_PORT): - port = int(os.getenv(NORMALIZATION_TEST_TIDB_DB_PORT)) - start_db = False - else: - port = self.find_free_port() - config = { - "host": "127.0.0.1", - "port": port, - "database": self.target_schema, - "schema": self.target_schema, - "username": "root", - "password": "", - "ssl": False, - } - if start_db: - self.db_names.append("tidb") - print("Starting tidb container for tests") - commands = [ - "docker", - "run", - "--rm", - "--name", - f"{self.container_prefix}_tidb", - "-p", - f"{config['port']}:4000", - "-d", - "pingcap/tidb:v5.4.0", - ] - print("Executing: ", " ".join(commands)) - subprocess.call(commands) - print("....Waiting for TiDB to start...15 sec") - time.sleep(15) - command_create_db = [ - "docker", - "run", - "--rm", - "--link", - f"{self.container_prefix}_tidb:tidb", - "arey/mysql-client", - "--host=tidb", - "--user=root", - "--port=4000", - f"--execute=CREATE DATABASE IF NOT EXISTS {self.target_schema}", - ] - print("Executing: ", " ".join(command_create_db)) - subprocess.call(command_create_db) - if not os.path.exists("../secrets"): - os.makedirs("../secrets") - with open("../secrets/tidb.json", "w") as fh: - fh.write(json.dumps(config)) - - @staticmethod - def find_free_port(): - """ - Find an unused port to create a database listening on localhost to run destination-postgres - """ - s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - s.bind(("", 0)) - addr = s.getsockname() - s.close() - return addr[1] - - def tear_down_db(self): - for db_name in self.db_names: - print(f"Stopping localhost {db_name} container for tests") - try: - subprocess.call(["docker", "kill", f"{self.container_prefix}_{db_name}"]) - except Exception as e: - print(f"WARN: Exception while shutting down {db_name}: {e}") - - @staticmethod - def change_current_test_dir(request): - # This makes the test run whether it is executed from the tests folder (with pytest/gradle) - # or from the base-normalization folder (through pycharm) - integration_tests_dir = os.path.join(request.fspath.dirname, "integration_tests") - if os.path.exists(integration_tests_dir): - os.chdir(integration_tests_dir) - else: - os.chdir(request.fspath.dirname) - - def generate_profile_yaml_file( - self, destination_type: DestinationType, test_root_dir: str, random_schema: bool = False - ) -> Dict[str, Any]: - """ - Each destination requires different settings to connect to. This step generates the adequate profiles.yml - as described here: https://docs.getdbt.com/reference/profiles.yml - """ - config_generator = TransformConfig() - profiles_config = config_generator.read_json_config(f"../secrets/{destination_type.value.lower()}.json") - # Adapt credential file to look like destination config.json - if destination_type.value == DestinationType.BIGQUERY.value: - credentials = profiles_config["basic_bigquery_config"] - profiles_config = { - "credentials_json": json.dumps(credentials), - "dataset_id": self.target_schema, - "project_id": credentials["project_id"], - "dataset_location": "US", - } - elif destination_type.value == DestinationType.MYSQL.value: - profiles_config["database"] = self.target_schema - elif destination_type.value == DestinationType.REDSHIFT.value: - profiles_config["schema"] = self.target_schema - if random_schema: - profiles_config["schema"] = self.target_schema + "_" + "".join(random.choices(string.ascii_lowercase, k=5)) - else: - profiles_config["schema"] = self.target_schema - if destination_type.value == DestinationType.CLICKHOUSE.value: - clickhouse_config = copy(profiles_config) - profiles_yaml = config_generator.transform(destination_type, clickhouse_config) - else: - profiles_yaml = config_generator.transform(destination_type, profiles_config) - config_generator.write_yaml_config(test_root_dir, profiles_yaml, "profiles.yml") - return profiles_config - - @staticmethod - def run_destination_process(message_file: str, test_root_dir: str, commands: List[str]): - print("Executing: ", " ".join(commands)) - with open(os.path.join(test_root_dir, "destination_output.log"), "ab") as f: - process = subprocess.Popen(commands, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - - def writer(): - if os.path.exists(message_file): - with open(message_file, "rb") as input_data: - while True: - line = input_data.readline() - if not line: - break - if not line.startswith(b"//"): - process.stdin.write(line) - process.stdin.close() - - thread = threading.Thread(target=writer) - thread.start() - for line in iter(process.stdout.readline, b""): - f.write(line) - sys.stdout.write(line.decode("utf-8")) - thread.join() - process.wait() - return process.returncode == 0 - - @staticmethod - def get_normalization_image(destination_type: DestinationType) -> str: - if DestinationType.MSSQL.value == destination_type.value: - return "airbyte/normalization-mssql:dev" - elif DestinationType.MYSQL.value == destination_type.value: - return "airbyte/normalization-mysql:dev" - elif DestinationType.ORACLE.value == destination_type.value: - return "airbyte/normalization-oracle:dev" - elif DestinationType.CLICKHOUSE.value == destination_type.value: - return "airbyte/normalization-clickhouse:dev" - elif DestinationType.SNOWFLAKE.value == destination_type.value: - return "airbyte/normalization-snowflake:dev" - elif DestinationType.REDSHIFT.value == destination_type.value: - return "airbyte/normalization-redshift:dev" - elif DestinationType.TIDB.value == destination_type.value: - return "airbyte/normalization-tidb:dev" - else: - return "airbyte/normalization:dev" - - def dbt_check(self, destination_type: DestinationType, test_root_dir: str): - """ - Run the dbt CLI to perform transformations on the test raw data in the destination - """ - normalization_image: str = self.get_normalization_image(destination_type) - # Perform sanity check on dbt project settings - assert self.run_check_dbt_command(normalization_image, "debug", test_root_dir) - assert self.run_check_dbt_command(normalization_image, "deps", test_root_dir) - - def dbt_run(self, destination_type: DestinationType, test_root_dir: str, force_full_refresh: bool = False): - """ - Run the dbt CLI to perform transformations on the test raw data in the destination - """ - normalization_image: str = self.get_normalization_image(destination_type) - # Compile dbt models files into destination sql dialect, then run the transformation queries - assert self.run_check_dbt_command(normalization_image, "run", test_root_dir, force_full_refresh) - - def dbt_run_macro(self, destination_type: DestinationType, test_root_dir: str, macro: str, macro_args: str = None): - """ - Run the dbt CLI to perform transformations on the test raw data in the destination, using independent macro. - """ - normalization_image: str = self.get_normalization_image(destination_type) - # Compile dbt models files into destination sql dialect, then run the transformation queries - assert self.run_dbt_run_operation(normalization_image, test_root_dir, macro, macro_args) - - def run_check_dbt_command(self, normalization_image: str, command: str, cwd: str, force_full_refresh: bool = False) -> bool: - """ - Run dbt subprocess while checking and counting for "ERROR", "FAIL" or "WARNING" printed in its outputs - """ - if any([normalization_image.startswith(x) for x in ["airbyte/normalization-oracle", "airbyte/normalization-clickhouse"]]): - dbtAdditionalArgs = [] - else: - dbtAdditionalArgs = ["--event-buffer-size=10000"] - - commands = ( - [ - "docker", - "run", - "--rm", - "--init", - "-v", - f"{cwd}:/workspace", - "-v", - f"{cwd}/build:/build", - "-v", - f"{cwd}/logs:/logs", - "-v", - f"{cwd}/build/dbt_packages:/dbt", - "--network", - "host", - "--entrypoint", - "/usr/local/bin/dbt", - "-i", - normalization_image, - ] - + dbtAdditionalArgs - + [ - command, - "--profiles-dir=/workspace", - "--project-dir=/workspace", - ] - ) - if force_full_refresh: - commands.append("--full-refresh") - command = f"{command} --full-refresh" - print("Executing: ", " ".join(commands)) - print(f"Equivalent to: dbt {command} --profiles-dir={cwd} --project-dir={cwd}") - return self.run_check_dbt_subprocess(commands, cwd) - - def run_dbt_run_operation(self, normalization_image: str, cwd: str, macro: str, macro_args: str = None) -> bool: - """ - Run dbt subprocess while checking and counting for "ERROR", "FAIL" or "WARNING" printed in its outputs - """ - args = ["--args", macro_args] if macro_args else [] - commands = ( - [ - "docker", - "run", - "--rm", - "--init", - "-v", - f"{cwd}:/workspace", - "-v", - f"{cwd}/build:/build", - "-v", - f"{cwd}/logs:/logs", - "-v", - f"{cwd}/build/dbt_packages:/dbt", - "--network", - "host", - "--entrypoint", - "/usr/local/bin/dbt", - "-i", - normalization_image, - ] - + ["run-operation", macro] - + args - + ["--profiles-dir=/workspace", "--project-dir=/workspace"] - ) - - print("Executing: ", " ".join(commands)) - print(f"Equivalent to: dbt run-operation {macro} --args {macro_args} --profiles-dir={cwd} --project-dir={cwd}") - return self.run_check_dbt_subprocess(commands, cwd) - - def run_check_dbt_subprocess(self, commands: list, cwd: str): - error_count = 0 - with open(os.path.join(cwd, "dbt_output.log"), "ab") as f: - process = subprocess.Popen(commands, cwd=cwd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, env=os.environ) - for line in iter(lambda: process.stdout.readline(), b""): - f.write(line) - str_line = line.decode("utf-8") - sys.stdout.write(str_line) - # keywords to match lines as signaling errors - if "ERROR" in str_line or "FAIL" in str_line or "WARNING" in str_line: - # exception keywords in lines to ignore as errors (such as summary or expected warnings) - is_exception = False - for except_clause in [ - "Done.", # DBT Summary - "PASS=", # DBT Summary - "Nothing to do.", # When no schema/data tests are setup - "Configuration paths exist in your dbt_project.yml", # When no cte / view are generated - "Error loading config file: .dockercfg: $HOME is not defined", # ignore warning - "depends on a node named 'disabled_test' which was not found", # Tests throwing warning because it is disabled - "The requested image's platform (linux/amd64) does not match the detected host platform " - + "(linux/arm64/v8) and no specific platform was requested", # temporary patch until we publish images for arm64 - ]: - if except_clause in str_line: - is_exception = True - break - if not is_exception: - # count lines signaling an error/failure/warning - error_count += 1 - process.wait() - message = ( - f"{' '.join(commands)}\n\tterminated with return code {process.returncode} " - f"with {error_count} 'Error/Warning/Fail' mention(s)." - ) - print(message) - assert error_count == 0, message - assert process.returncode == 0, message - if error_count > 0: - return False - return process.returncode == 0 - - @staticmethod - def copy_replace(src, dst, pattern=None, replace_value=None): - """ - Copies a file from src to dst replacing pattern by replace_value - Parameters - ---------- - src : string - Path to the source filename to copy from - dst : string - Path to the output filename to copy to - pattern - list of Patterns to replace inside the src file - replace_value - list of Values to replace by in the dst file - """ - file1 = open(src, "r") if isinstance(src, str) else src - file2 = open(dst, "w") if isinstance(dst, str) else dst - pattern = [pattern] if isinstance(pattern, str) else pattern - replace_value = [replace_value] if isinstance(replace_value, str) else replace_value - if replace_value and pattern: - if len(replace_value) != len(pattern): - raise Exception("Invalid parameters: pattern and replace_value" " have different sizes.") - rules = [(re.compile(regex, re.IGNORECASE), value) for regex, value in zip(pattern, replace_value)] - else: - rules = [] - for line in file1: - if rules: - for rule in rules: - line = re.sub(rule[0], rule[1], line) - file2.write(line) - if isinstance(src, str): - file1.close() - if isinstance(dst, str): - file2.close() - - @staticmethod - def get_test_targets() -> List[str]: - """ - Returns a list of destinations to run tests on. - - if the environment variable NORMALIZATION_TEST_TARGET is set with a comma separated list of destination names, - then the tests are run only on that subsets of destinations - Otherwise tests are run against all destinations - """ - if os.getenv(NORMALIZATION_TEST_TARGET): - target_str = os.getenv(NORMALIZATION_TEST_TARGET) - return [d.value for d in {DestinationType.from_string(s.strip()) for s in target_str.split(",")}] - else: - return [d.value for d in DestinationType] - - @staticmethod - def update_yaml_file(filename: str, callback: Callable): - config = read_yaml_config(filename) - updated, config = callback(config) - if updated: - write_yaml_config(config, filename) - - def clean_tmp_tables( - self, - destination_type: Union[DestinationType, List[DestinationType]], - test_type: str, - tmp_folders: list = None, - git_versioned_tests: list = None, - ): - """ - Cleans-up all temporary schemas created during the test session. - It parses the provided tmp_folders: List[str] or uses `git_versioned_tests` to find sources.yml files generated for the tests. - It gets target schemas created by the tests and removes them using custom scenario specified in - `dbt-project-template/macros/clean_tmp_tables.sql` macro. - - REQUIREMENTS: - 1) Idealy, the schemas should have unique names like: test_normalization_ to avoid conflicts. - 2) The `clean_tmp_tables.sql` macro should have the specific macro for target destination to proceed. - - INPUT ARGUMENTS: - :: destination_type : either single destination or list of destinations - :: test_type: either "ephemeral" or "normalization" should be supplied. - :: tmp_folders: should be supplied if test_type = "ephemeral", to get schemas from /build/normalization_test_output folders - :: git_versioned_tests: should be supplied if test_type = "normalization", to get schemas from integration_tests/normalization_test_output folders - - EXAMPLE: - clean_up_args = { - "destination_type": [ DestinationType.REDSHIFT, DestinationType.POSTGRES, ... ] - "test_type": "normalization", - "git_versioned_tests": git_versioned_tests, - } - """ - - path_to_sources: str = "/models/generated/sources.yml" - test_folders: dict = {} - source_files: dict = {} - schemas_to_remove: dict = {} - - # collecting information about tmp_tables created for the test for each destination - for destination in destination_type: - test_folders[destination.value] = [] - source_files[destination.value] = [] - schemas_to_remove[destination.value] = [] - - # based on test_type select path to source files - if test_type == "ephemeral" or test_type == "test_reset_scd_overwrite": - if not tmp_folders: - raise TypeError("`tmp_folders` arg is not provided.") - for folder in tmp_folders: - if destination.value in folder: - test_folders[destination.value].append(folder) - source_files[destination.value].append(f"{folder}{path_to_sources}") - elif test_type == "normalization": - if not git_versioned_tests: - raise TypeError("`git_versioned_tests` arg is not provided.") - base_path = f"{pathlib.Path().absolute()}/integration_tests/normalization_test_output" - for test in git_versioned_tests: - test_root_dir: str = f"{base_path}/{destination.value}/{test}" - test_folders[destination.value].append(test_root_dir) - source_files[destination.value].append(f"{test_root_dir}{path_to_sources}") - else: - raise TypeError(f"\n`test_type`: {test_type} is not a registered, use `ephemeral` or `normalization` instead.\n") - - # parse source.yml files from test folders to get schemas and table names created for the tests - for file in source_files[destination.value]: - source_yml = {} - try: - with open(file, "r") as source_file: - source_yml = yaml.safe_load(source_file) - except FileNotFoundError: - print(f"\n{destination.value}: {file} doesn't exist, consider to remove any temp_tables and schemas manually!\n") - pass - test_sources: list = source_yml.get("sources", []) if source_yml else [] - - for source in test_sources: - target_schema: str = source.get("name") - if target_schema not in schemas_to_remove[destination.value]: - schemas_to_remove[destination.value].append(target_schema) - # adding _airbyte_* tmp schemas to be removed - schemas_to_remove[destination.value].append(f"_airbyte_{target_schema}") - - # cleaning up tmp_tables generated by the tests - for destination in destination_type: - if not schemas_to_remove[destination.value]: - print(f"\n\t{destination.value.upper()} DESTINATION: SKIP CLEANING, NOTHING TO REMOVE.\n") - else: - print(f"\n\t{destination.value.upper()} DESTINATION: CLEANING LEFTOVERS...\n") - print(f"\t{schemas_to_remove[destination.value]}\n") - test_root_folder = test_folders[destination.value][0] - args = json.dumps({"schemas": schemas_to_remove[destination.value]}) - self.dbt_check(destination, test_root_folder) - self.dbt_run_macro(destination, test_root_folder, "clean_tmp_tables", args) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/dbt_project.yml deleted file mode 100755 index 474ab801dbf43..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/dbt_project.yml +++ /dev/null @@ -1,125 +0,0 @@ -name: airbyte_utils -version: '1.0' -config-version: 2 -profile: normalize -model-paths: -- models -docs-paths: -- docs -analysis-paths: -- analysis -test-paths: -- tests -seed-paths: -- data -macro-paths: -- macros -target-path: ../build -log-path: ../logs -packages-install-path: /dbt -clean-targets: -- build -- dbt_modules -quoting: - database: true - schema: false - identifier: true -models: - airbyte_utils: - +materialized: table - generated: - airbyte_ctes: - +tags: airbyte_internal_cte - +materialized: ephemeral - airbyte_incremental: - +tags: incremental_tables - +materialized: incremental - +on_schema_change: sync_all_columns - airbyte_tables: - +tags: normalized_tables - +materialized: table - airbyte_views: - +tags: airbyte_internal_views - +materialized: view -dispatch: -- macro_namespace: dbt_utils - search_order: - - airbyte_utils - - dbt_utils -vars: - json_column: _airbyte_data - models_to_source: - nested_stream_with_complex_columns_resulting_into_long_names_ab1: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_complex_columns_resulting_into_long_names_ab2: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_complex_columns_resulting_into_long_names_stg: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_complex_columns_resulting_into_long_names_scd: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_complex_columns_resulting_into_long_names: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - non_nested_stream_without_namespace_resulting_into_long_names_ab1: test_normalization._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names - non_nested_stream_without_namespace_resulting_into_long_names_ab2: test_normalization._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names - non_nested_stream_without_namespace_resulting_into_long_names_ab3: test_normalization._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names - non_nested_stream_without_namespace_resulting_into_long_names: test_normalization._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names - some_stream_that_was_empty_ab1: test_normalization._airbyte_raw_some_stream_that_was_empty - some_stream_that_was_empty_ab2: test_normalization._airbyte_raw_some_stream_that_was_empty - some_stream_that_was_empty_stg: test_normalization._airbyte_raw_some_stream_that_was_empty - some_stream_that_was_empty_scd: test_normalization._airbyte_raw_some_stream_that_was_empty - some_stream_that_was_empty: test_normalization._airbyte_raw_some_stream_that_was_empty - simple_stream_with_namespace_resulting_into_long_names_ab1: test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names - simple_stream_with_namespace_resulting_into_long_names_ab2: test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names - simple_stream_with_namespace_resulting_into_long_names_ab3: test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names - simple_stream_with_namespace_resulting_into_long_names: test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names - conflict_stream_name_ab1: test_normalization._airbyte_raw_conflict_stream_name - conflict_stream_name_ab2: test_normalization._airbyte_raw_conflict_stream_name - conflict_stream_name_ab3: test_normalization._airbyte_raw_conflict_stream_name - conflict_stream_name: test_normalization._airbyte_raw_conflict_stream_name - conflict_stream_scalar_ab1: test_normalization._airbyte_raw_conflict_stream_scalar - conflict_stream_scalar_ab2: test_normalization._airbyte_raw_conflict_stream_scalar - conflict_stream_scalar_ab3: test_normalization._airbyte_raw_conflict_stream_scalar - conflict_stream_scalar: test_normalization._airbyte_raw_conflict_stream_scalar - conflict_stream_array_ab1: test_normalization._airbyte_raw_conflict_stream_array - conflict_stream_array_ab2: test_normalization._airbyte_raw_conflict_stream_array - conflict_stream_array_ab3: test_normalization._airbyte_raw_conflict_stream_array - conflict_stream_array: test_normalization._airbyte_raw_conflict_stream_array - unnest_alias_ab1: test_normalization._airbyte_raw_unnest_alias - unnest_alias_ab2: test_normalization._airbyte_raw_unnest_alias - unnest_alias_ab3: test_normalization._airbyte_raw_unnest_alias - unnest_alias: test_normalization._airbyte_raw_unnest_alias - arrays_ab1: test_normalization._airbyte_raw_arrays - arrays_ab2: test_normalization._airbyte_raw_arrays - arrays_ab3: test_normalization._airbyte_raw_arrays - arrays: test_normalization._airbyte_raw_arrays - nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_complex_columns_resulting_into_long_names_partition_ab2: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_complex_columns_resulting_into_long_names_partition_ab3: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_complex_columns_resulting_into_long_names_partition: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - conflict_stream_name_conflict_stream_name_ab1: test_normalization._airbyte_raw_conflict_stream_name - conflict_stream_name_conflict_stream_name_ab2: test_normalization._airbyte_raw_conflict_stream_name - conflict_stream_name_conflict_stream_name_ab3: test_normalization._airbyte_raw_conflict_stream_name - conflict_stream_name_conflict_stream_name: test_normalization._airbyte_raw_conflict_stream_name - unnest_alias_children_ab1: test_normalization._airbyte_raw_unnest_alias - unnest_alias_children_ab2: test_normalization._airbyte_raw_unnest_alias - unnest_alias_children_ab3: test_normalization._airbyte_raw_unnest_alias - unnest_alias_children: test_normalization._airbyte_raw_unnest_alias - arrays_nested_array_parent_ab1: test_normalization._airbyte_raw_arrays - arrays_nested_array_parent_ab2: test_normalization._airbyte_raw_arrays - arrays_nested_array_parent_ab3: test_normalization._airbyte_raw_arrays - arrays_nested_array_parent: test_normalization._airbyte_raw_arrays - nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab2: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab3: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab1: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab2: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab3: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - conflict_stream_name_conflict_stream_name_conflict_stream_name_ab1: test_normalization._airbyte_raw_conflict_stream_name - conflict_stream_name_conflict_stream_name_conflict_stream_name_ab2: test_normalization._airbyte_raw_conflict_stream_name - conflict_stream_name_conflict_stream_name_conflict_stream_name_ab3: test_normalization._airbyte_raw_conflict_stream_name - conflict_stream_name_conflict_stream_name_conflict_stream_name: test_normalization._airbyte_raw_conflict_stream_name - unnest_alias_children_owner_ab1: test_normalization._airbyte_raw_unnest_alias - unnest_alias_children_owner_ab2: test_normalization._airbyte_raw_unnest_alias - unnest_alias_children_owner_ab3: test_normalization._airbyte_raw_unnest_alias - unnest_alias_children_owner: test_normalization._airbyte_raw_unnest_alias - unnest_alias_children_owner_column___with__quotes_ab1: test_normalization._airbyte_raw_unnest_alias - unnest_alias_children_owner_column___with__quotes_ab2: test_normalization._airbyte_raw_unnest_alias - unnest_alias_children_owner_column___with__quotes_ab3: test_normalization._airbyte_raw_unnest_alias - unnest_alias_children_owner_column___with__quotes: test_normalization._airbyte_raw_unnest_alias diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql deleted file mode 100644 index 59cf6d3a78044..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql +++ /dev/null @@ -1,90 +0,0 @@ - - - create or replace table `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_scd` - partition by range_bucket( - _airbyte_active_row, - generate_array(0, 1, 1) - ) - cluster by _airbyte_unique_key_scd, _airbyte_emitted_at - OPTIONS() - as ( - --- depends_on: ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') -with - -input_data as ( - select * - from `dataline-integration-testing`._airbyte_test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_stg` - -- nested_stream_with_complex_columns_resulting_into_long_names from `dataline-integration-testing`.test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names -), - -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - to_hex(md5(cast(concat(coalesce(cast(id as - string -), '')) as - string -))) as _airbyte_unique_key, - id, - date, - `partition`, - date as _airbyte_start_at, - lag(date) over ( - partition by id - order by - date is null asc, - date desc, - _airbyte_emitted_at desc - ) as _airbyte_end_at, - case when row_number() over ( - partition by id - order by - date is null asc, - date desc, - _airbyte_emitted_at desc - ) = 1 then 1 else 0 end as _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid - from input_data -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by - _airbyte_unique_key, - _airbyte_start_at, - _airbyte_emitted_at - order by _airbyte_active_row desc, _airbyte_ab_id - ) as _airbyte_row_num, - to_hex(md5(cast(concat(coalesce(cast(_airbyte_unique_key as - string -), ''), '-', coalesce(cast(_airbyte_start_at as - string -), ''), '-', coalesce(cast(_airbyte_emitted_at as - string -), '')) as - string -))) as _airbyte_unique_key_scd, - scd_data.* - from scd_data -) -select - _airbyte_unique_key, - _airbyte_unique_key_scd, - id, - date, - `partition`, - _airbyte_start_at, - _airbyte_end_at, - _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - CURRENT_TIMESTAMP() as _airbyte_normalized_at, - _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid -from dedup_data where _airbyte_row_num = 1 - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql deleted file mode 100644 index 39484347df2bd..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql +++ /dev/null @@ -1,26 +0,0 @@ - - - create or replace table `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names` - partition by timestamp_trunc(_airbyte_emitted_at, day) - cluster by _airbyte_unique_key, _airbyte_emitted_at - OPTIONS() - as ( - --- Final base SQL model --- depends_on: `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_scd` -select - _airbyte_unique_key, - id, - date, - `partition`, - _airbyte_ab_id, - _airbyte_emitted_at, - CURRENT_TIMESTAMP() as _airbyte_normalized_at, - _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid -from `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_scd` --- nested_stream_with_complex_columns_resulting_into_long_names from `dataline-integration-testing`.test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names -where 1 = 1 -and _airbyte_active_row = 1 - - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql deleted file mode 100644 index bfd09f00f73a2..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql +++ /dev/null @@ -1,74 +0,0 @@ - - - create or replace table `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition` - partition by timestamp_trunc(_airbyte_emitted_at, day) - cluster by _airbyte_emitted_at - OPTIONS() - as ( - -with __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_scd` -select - _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid, - json_extract_array(`partition`, "$['double_array_data']") as double_array_data, - json_extract_array(`partition`, "$['DATA']") as DATA, - _airbyte_ab_id, - _airbyte_emitted_at, - CURRENT_TIMESTAMP() as _airbyte_normalized_at -from `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_scd` as table_alias --- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition -where 1 = 1 -and `partition` is not null - -), __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1 -select - _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid, - double_array_data, - DATA, - _airbyte_ab_id, - _airbyte_emitted_at, - CURRENT_TIMESTAMP() as _airbyte_normalized_at -from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1 --- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition -where 1 = 1 - -), __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab2 -select - to_hex(md5(cast(concat(coalesce(cast(_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid as - string -), ''), '-', coalesce(cast(array_to_string(double_array_data, "|", "") as - string -), ''), '-', coalesce(cast(array_to_string(DATA, "|", "") as - string -), '')) as - string -))) as _airbyte_partition_hashid, - tmp.* -from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab2 tmp --- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition -where 1 = 1 - -)-- Final base SQL model --- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab3 -select - _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid, - double_array_data, - DATA, - _airbyte_ab_id, - _airbyte_emitted_at, - CURRENT_TIMESTAMP() as _airbyte_normalized_at, - _airbyte_partition_hashid -from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab3 --- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition from `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_scd` -where 1 = 1 - - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA.sql deleted file mode 100644 index e1d9a01e02e8e..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA.sql +++ /dev/null @@ -1,73 +0,0 @@ - - - create or replace table `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA` - partition by timestamp_trunc(_airbyte_emitted_at, day) - cluster by _airbyte_emitted_at - OPTIONS() - as ( - -with __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition` - -select - _airbyte_partition_hashid, - json_extract_scalar(DATA, "$['currency']") as currency, - _airbyte_ab_id, - _airbyte_emitted_at, - CURRENT_TIMESTAMP() as _airbyte_normalized_at -from `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition` as table_alias --- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA -cross join unnest(DATA) as DATA -where 1 = 1 -and DATA is not null - -), __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab1 -select - _airbyte_partition_hashid, - cast(currency as - string -) as currency, - _airbyte_ab_id, - _airbyte_emitted_at, - CURRENT_TIMESTAMP() as _airbyte_normalized_at -from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab1 --- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA -where 1 = 1 - -), __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab2 -select - to_hex(md5(cast(concat(coalesce(cast(_airbyte_partition_hashid as - string -), ''), '-', coalesce(cast(currency as - string -), '')) as - string -))) as _airbyte_DATA_hashid, - tmp.* -from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab2 tmp --- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA -where 1 = 1 - -)-- Final base SQL model --- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab3 -select - _airbyte_partition_hashid, - currency, - _airbyte_ab_id, - _airbyte_emitted_at, - CURRENT_TIMESTAMP() as _airbyte_normalized_at, - _airbyte_DATA_hashid -from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab3 --- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA from `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition` -where 1 = 1 - - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql deleted file mode 100644 index f537df341d3a3..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql +++ /dev/null @@ -1,73 +0,0 @@ - - - create or replace table `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data` - partition by timestamp_trunc(_airbyte_emitted_at, day) - cluster by _airbyte_emitted_at - OPTIONS() - as ( - -with __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition` - -select - _airbyte_partition_hashid, - json_extract_scalar(double_array_data, "$['id']") as id, - _airbyte_ab_id, - _airbyte_emitted_at, - CURRENT_TIMESTAMP() as _airbyte_normalized_at -from `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition` as table_alias --- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data -cross join unnest(double_array_data) as double_array_data -where 1 = 1 -and double_array_data is not null - -), __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1 -select - _airbyte_partition_hashid, - cast(id as - string -) as id, - _airbyte_ab_id, - _airbyte_emitted_at, - CURRENT_TIMESTAMP() as _airbyte_normalized_at -from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1 --- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data -where 1 = 1 - -), __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab2 -select - to_hex(md5(cast(concat(coalesce(cast(_airbyte_partition_hashid as - string -), ''), '-', coalesce(cast(id as - string -), '')) as - string -))) as _airbyte_double_array_data_hashid, - tmp.* -from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab2 tmp --- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data -where 1 = 1 - -)-- Final base SQL model --- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab3 -select - _airbyte_partition_hashid, - id, - _airbyte_ab_id, - _airbyte_emitted_at, - CURRENT_TIMESTAMP() as _airbyte_normalized_at, - _airbyte_double_array_data_hashid -from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab3 --- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data from `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition` -where 1 = 1 - - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_ab1.sql deleted file mode 100644 index b988a169ef1f2..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_ab1.sql +++ /dev/null @@ -1,21 +0,0 @@ -{{ config( - cluster_by = "_airbyte_emitted_at", - partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"}, - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} -select - {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as id, - {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as date, - {{ json_extract('table_alias', '_airbyte_data', ['partition'], ['partition']) }} as {{ adapter.quote('partition') }}, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} as table_alias --- nested_stream_with_complex_columns_resulting_into_long_names -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_ab2.sql deleted file mode 100644 index 3c6ed6e761a2b..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_ab2.sql +++ /dev/null @@ -1,21 +0,0 @@ -{{ config( - cluster_by = "_airbyte_emitted_at", - partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"}, - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_ab1') }} -select - cast(id as {{ dbt_utils.type_string() }}) as id, - cast(date as {{ dbt_utils.type_string() }}) as date, - cast({{ adapter.quote('partition') }} as {{ type_json() }}) as {{ adapter.quote('partition') }}, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_ab1') }} --- nested_stream_with_complex_columns_resulting_into_long_names -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab1.sql deleted file mode 100644 index 3ada03a427fe1..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab1.sql +++ /dev/null @@ -1,22 +0,0 @@ -{{ config( - cluster_by = "_airbyte_emitted_at", - partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"}, - schema = "_airbyte_test_normalization", - tags = [ "nested-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition') }} -{{ unnest_cte(ref('nested_stream_with_complex_columns_resulting_into_long_names_partition'), 'partition', 'DATA') }} -select - _airbyte_partition_hashid, - {{ json_extract_scalar(unnested_column_value('DATA'), ['currency'], ['currency']) }} as currency, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition') }} as table_alias --- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA -{{ cross_join_unnest('partition', 'DATA') }} -where 1 = 1 -and DATA is not null -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1.sql deleted file mode 100644 index 0734951e51265..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1.sql +++ /dev/null @@ -1,21 +0,0 @@ -{{ config( - cluster_by = "_airbyte_emitted_at", - partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"}, - schema = "_airbyte_test_normalization", - tags = [ "nested-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd') }} -select - _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid, - {{ json_extract_array(adapter.quote('partition'), ['double_array_data'], ['double_array_data']) }} as double_array_data, - {{ json_extract_array(adapter.quote('partition'), ['DATA'], ['DATA']) }} as DATA, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd') }} as table_alias --- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition -where 1 = 1 -and {{ adapter.quote('partition') }} is not null -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1.sql deleted file mode 100644 index 912073c317273..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1.sql +++ /dev/null @@ -1,22 +0,0 @@ -{{ config( - cluster_by = "_airbyte_emitted_at", - partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"}, - schema = "_airbyte_test_normalization", - tags = [ "nested-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition') }} -{{ unnest_cte(ref('nested_stream_with_complex_columns_resulting_into_long_names_partition'), 'partition', 'double_array_data') }} -select - _airbyte_partition_hashid, - {{ json_extract_scalar(unnested_column_value('double_array_data'), ['id'], ['id']) }} as id, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition') }} as table_alias --- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data -{{ cross_join_unnest('partition', 'double_array_data') }} -where 1 = 1 -and double_array_data is not null -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql deleted file mode 100644 index 1df163184ca05..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql +++ /dev/null @@ -1,164 +0,0 @@ -{{ config( - cluster_by = ["_airbyte_unique_key_scd","_airbyte_emitted_at"], - partition_by = {"field": "_airbyte_active_row", "data_type": "int64", "range": {"start": 0, "end": 1, "interval": 1}}, - unique_key = "_airbyte_unique_key_scd", - schema = "test_normalization", - post_hook = [" - {% - set final_table_relation = adapter.get_relation( - database=this.database, - schema=this.schema, - identifier='nested_stream_with_complex_columns_resulting_into_long_names' - ) - %} - {# - If the final table doesn't exist, then obviously we can't delete anything from it. - Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) - So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) - #} - {% - if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') - %} - -- Delete records which are no longer active: - -- This query is equivalent, but the left join version is more performant: - -- delete from final_table where unique_key in ( - -- select unique_key from scd_table where 1 = 1 - -- ) and unique_key not in ( - -- select unique_key from scd_table where active_row = 1 - -- ) - -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD - -- entries that were _updated_ recently. This is because a deleted record will have an SCD record - -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. - delete from {{ final_table_relation }} final_table where final_table._airbyte_unique_key in ( - select recent_records.unique_key - from ( - select distinct _airbyte_unique_key as unique_key - from {{ this }} - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_complex_columns_resulting_into_long_names')) }} - ) recent_records - left join ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_complex_columns_resulting_into_long_names')) }} - group by _airbyte_unique_key - ) active_counts - on recent_records.unique_key = active_counts.unique_key - where active_count is null or active_count = 0 - ) - {% else %} - -- We have to have a non-empty query, so just do a noop delete - delete from {{ this }} where 1=0 - {% endif %} - ","drop view _airbyte_test_normalization.nested_stream_with_complex_columns_resulting_into_long_names_stg"], - tags = [ "top-level" ] -) }} --- depends_on: ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') -with -{% if is_incremental() %} -new_data as ( - -- retrieve incremental "new" data - select - * - from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') }} - -- nested_stream_with_complex_columns_resulting_into_long_names from {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} - where 1 = 1 - {{ incremental_clause('_airbyte_emitted_at', this) }} -), -new_data_ids as ( - -- build a subset of _airbyte_unique_key from rows that are new - select distinct - {{ dbt_utils.surrogate_key([ - 'id', - ]) }} as _airbyte_unique_key - from new_data -), -empty_new_data as ( - -- build an empty table to only keep the table's column types - select * from new_data where 1 = 0 -), -previous_active_scd_data as ( - -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes - select - {{ star_intersect(ref('nested_stream_with_complex_columns_resulting_into_long_names_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} - from {{ this }} as this_data - -- make a join with new_data using primary key to filter active data that need to be updated only - join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key - -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) - left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id - where _airbyte_active_row = 1 -), -input_data as ( - select {{ dbt_utils.star(ref('nested_stream_with_complex_columns_resulting_into_long_names_stg')) }} from new_data - union all - select {{ dbt_utils.star(ref('nested_stream_with_complex_columns_resulting_into_long_names_stg')) }} from previous_active_scd_data -), -{% else %} -input_data as ( - select * - from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') }} - -- nested_stream_with_complex_columns_resulting_into_long_names from {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} -), -{% endif %} -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - {{ dbt_utils.surrogate_key([ - 'id', - ]) }} as _airbyte_unique_key, - id, - date, - {{ adapter.quote('partition') }}, - date as _airbyte_start_at, - lag(date) over ( - partition by id - order by - date is null asc, - date desc, - _airbyte_emitted_at desc - ) as _airbyte_end_at, - case when row_number() over ( - partition by id - order by - date is null asc, - date desc, - _airbyte_emitted_at desc - ) = 1 then 1 else 0 end as _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid - from input_data -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by - _airbyte_unique_key, - _airbyte_start_at, - _airbyte_emitted_at - order by _airbyte_active_row desc, _airbyte_ab_id - ) as _airbyte_row_num, - {{ dbt_utils.surrogate_key([ - '_airbyte_unique_key', - '_airbyte_start_at', - '_airbyte_emitted_at' - ]) }} as _airbyte_unique_key_scd, - scd_data.* - from scd_data -) -select - _airbyte_unique_key, - _airbyte_unique_key_scd, - id, - date, - {{ adapter.quote('partition') }}, - _airbyte_start_at, - _airbyte_end_at, - _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid -from dedup_data where _airbyte_row_num = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql deleted file mode 100644 index c0bd55eeb61d0..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql +++ /dev/null @@ -1,24 +0,0 @@ -{{ config( - cluster_by = ["_airbyte_unique_key","_airbyte_emitted_at"], - partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"}, - unique_key = "_airbyte_unique_key", - schema = "test_normalization", - tags = [ "top-level" ] -) }} --- Final base SQL model --- depends_on: {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd') }} -select - _airbyte_unique_key, - id, - date, - {{ adapter.quote('partition') }}, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid -from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd') }} --- nested_stream_with_complex_columns_resulting_into_long_names from {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} -where 1 = 1 -and _airbyte_active_row = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql deleted file mode 100644 index f8cd174b2a5b7..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql +++ /dev/null @@ -1,21 +0,0 @@ -{{ config( - cluster_by = "_airbyte_emitted_at", - partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"}, - schema = "test_normalization", - tags = [ "nested" ] -) }} --- Final base SQL model --- depends_on: {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition_ab3') }} -select - _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid, - double_array_data, - DATA, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_partition_hashid -from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition_ab3') }} --- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd') }} -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA.sql deleted file mode 100644 index 861e33d4859a1..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA.sql +++ /dev/null @@ -1,20 +0,0 @@ -{{ config( - cluster_by = "_airbyte_emitted_at", - partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"}, - schema = "test_normalization", - tags = [ "nested" ] -) }} --- Final base SQL model --- depends_on: {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab3') }} -select - _airbyte_partition_hashid, - currency, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_DATA_hashid -from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab3') }} --- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition') }} -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql deleted file mode 100644 index c6b980124a5a6..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql +++ /dev/null @@ -1,20 +0,0 @@ -{{ config( - cluster_by = "_airbyte_emitted_at", - partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"}, - schema = "test_normalization", - tags = [ "nested" ] -) }} --- Final base SQL model --- depends_on: {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab3') }} -select - _airbyte_partition_hashid, - id, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_double_array_data_hashid -from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab3') }} --- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition') }} -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/sources.yml deleted file mode 100644 index 29bae1b4b5105..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/sources.yml +++ /dev/null @@ -1,23 +0,0 @@ -version: 2 -sources: -- name: test_normalization - quoting: - database: true - schema: false - identifier: false - tables: - - name: _airbyte_raw_arrays - - name: _airbyte_raw_conflict_stream_array - - name: _airbyte_raw_conflict_stream_name - - name: _airbyte_raw_conflict_stream_scalar - - name: _airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - - name: _airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names - - name: _airbyte_raw_some_stream_that_was_empty - - name: _airbyte_raw_unnest_alias -- name: test_normalization_namespace - quoting: - database: true - schema: false - identifier: false - tables: - - name: _airbyte_raw_simple_stream_with_namespace_resulting_into_long_names diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql deleted file mode 100644 index e2187e231d380..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql +++ /dev/null @@ -1,27 +0,0 @@ - - - - - - - - merge into `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_scd` as DBT_INTERNAL_DEST - using ( - select * from `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_scd__dbt_tmp` - ) as DBT_INTERNAL_SOURCE - on - DBT_INTERNAL_SOURCE._airbyte_unique_key_scd = DBT_INTERNAL_DEST._airbyte_unique_key_scd - - - - when matched then update set - `_airbyte_unique_key` = DBT_INTERNAL_SOURCE.`_airbyte_unique_key`,`_airbyte_unique_key_scd` = DBT_INTERNAL_SOURCE.`_airbyte_unique_key_scd`,`id` = DBT_INTERNAL_SOURCE.`id`,`date` = DBT_INTERNAL_SOURCE.`date`,`partition` = DBT_INTERNAL_SOURCE.`partition`,`_airbyte_start_at` = DBT_INTERNAL_SOURCE.`_airbyte_start_at`,`_airbyte_end_at` = DBT_INTERNAL_SOURCE.`_airbyte_end_at`,`_airbyte_active_row` = DBT_INTERNAL_SOURCE.`_airbyte_active_row`,`_airbyte_ab_id` = DBT_INTERNAL_SOURCE.`_airbyte_ab_id`,`_airbyte_emitted_at` = DBT_INTERNAL_SOURCE.`_airbyte_emitted_at`,`_airbyte_normalized_at` = DBT_INTERNAL_SOURCE.`_airbyte_normalized_at`,`_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid` = DBT_INTERNAL_SOURCE.`_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid` - - - when not matched then insert - (`_airbyte_unique_key`, `_airbyte_unique_key_scd`, `id`, `date`, `partition`, `_airbyte_start_at`, `_airbyte_end_at`, `_airbyte_active_row`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid`) - values - (`_airbyte_unique_key`, `_airbyte_unique_key_scd`, `id`, `date`, `partition`, `_airbyte_start_at`, `_airbyte_end_at`, `_airbyte_active_row`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid`) - - - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql deleted file mode 100644 index c2f7397d2c3bd..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql +++ /dev/null @@ -1,27 +0,0 @@ - - - - - - - - merge into `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names` as DBT_INTERNAL_DEST - using ( - select * from `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names__dbt_tmp` - ) as DBT_INTERNAL_SOURCE - on - DBT_INTERNAL_SOURCE._airbyte_unique_key = DBT_INTERNAL_DEST._airbyte_unique_key - - - - when matched then update set - `_airbyte_unique_key` = DBT_INTERNAL_SOURCE.`_airbyte_unique_key`,`id` = DBT_INTERNAL_SOURCE.`id`,`date` = DBT_INTERNAL_SOURCE.`date`,`partition` = DBT_INTERNAL_SOURCE.`partition`,`_airbyte_ab_id` = DBT_INTERNAL_SOURCE.`_airbyte_ab_id`,`_airbyte_emitted_at` = DBT_INTERNAL_SOURCE.`_airbyte_emitted_at`,`_airbyte_normalized_at` = DBT_INTERNAL_SOURCE.`_airbyte_normalized_at`,`_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid` = DBT_INTERNAL_SOURCE.`_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid` - - - when not matched then insert - (`_airbyte_unique_key`, `id`, `date`, `partition`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid`) - values - (`_airbyte_unique_key`, `id`, `date`, `partition`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid`) - - - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql deleted file mode 100644 index 2a9c82fbe4001..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql +++ /dev/null @@ -1,21 +0,0 @@ - - - - - - - merge into `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition` as DBT_INTERNAL_DEST - using ( - select * from `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition__dbt_tmp` - ) as DBT_INTERNAL_SOURCE - on FALSE - - - - when not matched then insert - (`_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid`, `double_array_data`, `DATA`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_partition_hashid`) - values - (`_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid`, `double_array_data`, `DATA`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_partition_hashid`) - - - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA.sql deleted file mode 100644 index da77d8e6172f0..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA.sql +++ /dev/null @@ -1,21 +0,0 @@ - - - - - - - merge into `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA` as DBT_INTERNAL_DEST - using ( - select * from `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA__dbt_tmp` - ) as DBT_INTERNAL_SOURCE - on FALSE - - - - when not matched then insert - (`_airbyte_partition_hashid`, `currency`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_DATA_hashid`) - values - (`_airbyte_partition_hashid`, `currency`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_DATA_hashid`) - - - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql deleted file mode 100644 index a1198af2586c1..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql +++ /dev/null @@ -1,21 +0,0 @@ - - - - - - - merge into `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data` as DBT_INTERNAL_DEST - using ( - select * from `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data__dbt_tmp` - ) as DBT_INTERNAL_SOURCE - on FALSE - - - - when not matched then insert - (`_airbyte_partition_hashid`, `id`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_double_array_data_hashid`) - values - (`_airbyte_partition_hashid`, `id`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_double_array_data_hashid`) - - - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/dbt_project.yml deleted file mode 100755 index 013a446b320a5..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/dbt_project.yml +++ /dev/null @@ -1,70 +0,0 @@ -name: airbyte_utils -version: '1.0' -config-version: 2 -profile: normalize -model-paths: -- modified_models -docs-paths: -- docs -analysis-paths: -- analysis -test-paths: -- tests -seed-paths: -- data -macro-paths: -- macros -target-path: ../build -log-path: ../logs -packages-install-path: /dbt -clean-targets: -- build -- dbt_modules -quoting: - database: true - schema: false - identifier: true -models: - airbyte_utils: - +materialized: table - generated: - airbyte_ctes: - +tags: airbyte_internal_cte - +materialized: ephemeral - airbyte_incremental: - +tags: incremental_tables - +materialized: incremental - +on_schema_change: sync_all_columns - airbyte_tables: - +tags: normalized_tables - +materialized: table - airbyte_views: - +tags: airbyte_internal_views - +materialized: view -dispatch: -- macro_namespace: dbt_utils - search_order: - - airbyte_utils - - dbt_utils -vars: - json_column: _airbyte_data - models_to_source: - exchange_rate_ab1: test_normalization._airbyte_raw_exchange_rate - exchange_rate_ab2: test_normalization._airbyte_raw_exchange_rate - exchange_rate_ab3: test_normalization._airbyte_raw_exchange_rate - exchange_rate: test_normalization._airbyte_raw_exchange_rate - dedup_exchange_rate_ab1: test_normalization._airbyte_raw_dedup_exchange_rate - dedup_exchange_rate_ab2: test_normalization._airbyte_raw_dedup_exchange_rate - dedup_exchange_rate_stg: test_normalization._airbyte_raw_dedup_exchange_rate - dedup_exchange_rate_scd: test_normalization._airbyte_raw_dedup_exchange_rate - dedup_exchange_rate: test_normalization._airbyte_raw_dedup_exchange_rate - renamed_dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded - renamed_dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded - renamed_dedup_cdc_excluded_stg: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded - renamed_dedup_cdc_excluded_scd: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded - renamed_dedup_cdc_excluded: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded - dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_dedup_cdc_excluded - dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_dedup_cdc_excluded - dedup_cdc_excluded_stg: test_normalization._airbyte_raw_dedup_cdc_excluded - dedup_cdc_excluded_scd: test_normalization._airbyte_raw_dedup_cdc_excluded - dedup_cdc_excluded: test_normalization._airbyte_raw_dedup_cdc_excluded diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_dbt_project.yml deleted file mode 100644 index 12745c37a1508..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_dbt_project.yml +++ /dev/null @@ -1,90 +0,0 @@ -name: airbyte_utils -version: '1.0' -config-version: 2 -profile: normalize -model-paths: -- models -docs-paths: -- docs -analysis-paths: -- analysis -test-paths: -- tests -seed-paths: -- data -macro-paths: -- macros -target-path: ../build -log-path: ../logs -packages-install-path: /dbt -clean-targets: -- build -- dbt_modules -quoting: - database: true - schema: false - identifier: true -models: - airbyte_utils: - +materialized: table - generated: - airbyte_ctes: - +tags: airbyte_internal_cte - +materialized: ephemeral - airbyte_incremental: - +tags: incremental_tables - +materialized: incremental - +on_schema_change: sync_all_columns - airbyte_tables: - +tags: normalized_tables - +materialized: table - airbyte_views: - +tags: airbyte_internal_views - +materialized: view -dispatch: -- macro_namespace: dbt_utils - search_order: - - airbyte_utils - - dbt_utils -vars: - json_column: _airbyte_data - models_to_source: - exchange_rate_ab1: test_normalization._airbyte_raw_exchange_rate - exchange_rate_ab2: test_normalization._airbyte_raw_exchange_rate - exchange_rate_ab3: test_normalization._airbyte_raw_exchange_rate - exchange_rate: test_normalization._airbyte_raw_exchange_rate - dedup_exchange_rate_ab1: test_normalization._airbyte_raw_dedup_exchange_rate - dedup_exchange_rate_ab2: test_normalization._airbyte_raw_dedup_exchange_rate - dedup_exchange_rate_stg: test_normalization._airbyte_raw_dedup_exchange_rate - dedup_exchange_rate_scd: test_normalization._airbyte_raw_dedup_exchange_rate - dedup_exchange_rate: test_normalization._airbyte_raw_dedup_exchange_rate - renamed_dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded - renamed_dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded - renamed_dedup_cdc_excluded_stg: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded - renamed_dedup_cdc_excluded_scd: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded - renamed_dedup_cdc_excluded: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded - dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_dedup_cdc_excluded - dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_dedup_cdc_excluded - dedup_cdc_excluded_stg: test_normalization._airbyte_raw_dedup_cdc_excluded - dedup_cdc_excluded_scd: test_normalization._airbyte_raw_dedup_cdc_excluded - dedup_cdc_excluded: test_normalization._airbyte_raw_dedup_cdc_excluded - pos_dedup_cdcx_ab1: test_normalization._airbyte_raw_pos_dedup_cdcx - pos_dedup_cdcx_ab2: test_normalization._airbyte_raw_pos_dedup_cdcx - pos_dedup_cdcx_stg: test_normalization._airbyte_raw_pos_dedup_cdcx - pos_dedup_cdcx_scd: test_normalization._airbyte_raw_pos_dedup_cdcx - pos_dedup_cdcx: test_normalization._airbyte_raw_pos_dedup_cdcx - 1_prefix_startwith_number_ab1: test_normalization._airbyte_raw_1_prefix_startwith_number - 1_prefix_startwith_number_ab2: test_normalization._airbyte_raw_1_prefix_startwith_number - 1_prefix_startwith_number_stg: test_normalization._airbyte_raw_1_prefix_startwith_number - 1_prefix_startwith_number_scd: test_normalization._airbyte_raw_1_prefix_startwith_number - 1_prefix_startwith_number: test_normalization._airbyte_raw_1_prefix_startwith_number - multiple_column_names_conflicts_ab1: test_normalization._airbyte_raw_multiple_column_names_conflicts - multiple_column_names_conflicts_ab2: test_normalization._airbyte_raw_multiple_column_names_conflicts - multiple_column_names_conflicts_stg: test_normalization._airbyte_raw_multiple_column_names_conflicts - multiple_column_names_conflicts_scd: test_normalization._airbyte_raw_multiple_column_names_conflicts - multiple_column_names_conflicts: test_normalization._airbyte_raw_multiple_column_names_conflicts - types_testing_ab1: test_normalization._airbyte_raw_types_testing - types_testing_ab2: test_normalization._airbyte_raw_types_testing - types_testing_stg: test_normalization._airbyte_raw_types_testing - types_testing_scd: test_normalization._airbyte_raw_types_testing - types_testing: test_normalization._airbyte_raw_types_testing diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql deleted file mode 100644 index d7fd59df15b5d..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ /dev/null @@ -1,108 +0,0 @@ - - - create or replace table `dataline-integration-testing`.test_normalization.`dedup_exchange_rate_scd` - partition by range_bucket( - _airbyte_active_row, - generate_array(0, 1, 1) - ) - cluster by _airbyte_unique_key_scd, _airbyte_emitted_at - OPTIONS() - as ( - --- depends_on: ref('dedup_exchange_rate_stg') -with - -input_data as ( - select * - from `dataline-integration-testing`._airbyte_test_normalization.`dedup_exchange_rate_stg` - -- dedup_exchange_rate from `dataline-integration-testing`.test_normalization._airbyte_raw_dedup_exchange_rate -), - -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - to_hex(md5(cast(concat(coalesce(cast(id as - string -), ''), '-', coalesce(cast(currency as - string -), ''), '-', coalesce(cast(NZD as - string -), '')) as - string -))) as _airbyte_unique_key, - id, - currency, - date, - timestamp_col, - HKD_special___characters, - HKD_special___characters_1, - NZD, - USD, - date as _airbyte_start_at, - lag(date) over ( - partition by id, currency, cast(NZD as - string -) - order by - date is null asc, - date desc, - _airbyte_emitted_at desc - ) as _airbyte_end_at, - case when row_number() over ( - partition by id, currency, cast(NZD as - string -) - order by - date is null asc, - date desc, - _airbyte_emitted_at desc - ) = 1 then 1 else 0 end as _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_dedup_exchange_rate_hashid - from input_data -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by - _airbyte_unique_key, - _airbyte_start_at, - _airbyte_emitted_at - order by _airbyte_active_row desc, _airbyte_ab_id - ) as _airbyte_row_num, - to_hex(md5(cast(concat(coalesce(cast(_airbyte_unique_key as - string -), ''), '-', coalesce(cast(_airbyte_start_at as - string -), ''), '-', coalesce(cast(_airbyte_emitted_at as - string -), '')) as - string -))) as _airbyte_unique_key_scd, - scd_data.* - from scd_data -) -select - _airbyte_unique_key, - _airbyte_unique_key_scd, - id, - currency, - date, - timestamp_col, - HKD_special___characters, - HKD_special___characters_1, - NZD, - USD, - _airbyte_start_at, - _airbyte_end_at, - _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - CURRENT_TIMESTAMP() as _airbyte_normalized_at, - _airbyte_dedup_exchange_rate_hashid -from dedup_data where _airbyte_row_num = 1 - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql deleted file mode 100644 index d862d7ae1f082..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql +++ /dev/null @@ -1,31 +0,0 @@ - - - create or replace table `dataline-integration-testing`.test_normalization.`dedup_exchange_rate` - partition by timestamp_trunc(_airbyte_emitted_at, day) - cluster by _airbyte_unique_key, _airbyte_emitted_at - OPTIONS() - as ( - --- Final base SQL model --- depends_on: `dataline-integration-testing`.test_normalization.`dedup_exchange_rate_scd` -select - _airbyte_unique_key, - id, - currency, - date, - timestamp_col, - HKD_special___characters, - HKD_special___characters_1, - NZD, - USD, - _airbyte_ab_id, - _airbyte_emitted_at, - CURRENT_TIMESTAMP() as _airbyte_normalized_at, - _airbyte_dedup_exchange_rate_hashid -from `dataline-integration-testing`.test_normalization.`dedup_exchange_rate_scd` --- dedup_exchange_rate from `dataline-integration-testing`.test_normalization._airbyte_raw_dedup_exchange_rate -where 1 = 1 -and _airbyte_active_row = 1 - - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql deleted file mode 100644 index 3d32bbb2838a9..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql +++ /dev/null @@ -1,145 +0,0 @@ - - - create or replace table `dataline-integration-testing`.test_normalization.`exchange_rate` - partition by timestamp_trunc(_airbyte_emitted_at, day) - cluster by _airbyte_emitted_at - OPTIONS() - as ( - -with __dbt__cte__exchange_rate_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: `dataline-integration-testing`.test_normalization._airbyte_raw_exchange_rate -select - json_extract_scalar(_airbyte_data, "$['id']") as id, - json_extract_scalar(_airbyte_data, "$['currency']") as currency, - json_extract_scalar(_airbyte_data, "$['date']") as date, - json_extract_scalar(_airbyte_data, "$['timestamp_col']") as timestamp_col, - json_extract_scalar(_airbyte_data, "$['HKD@spéçiäl & characters']") as HKD_special___characters, - json_extract_scalar(_airbyte_data, "$['HKD_special___characters']") as HKD_special___characters_1, - json_extract_scalar(_airbyte_data, "$['NZD']") as NZD, - json_extract_scalar(_airbyte_data, "$['USD']") as USD, - json_extract_scalar(_airbyte_data, "$['column___with__quotes']") as column___with__quotes, - json_extract_scalar(_airbyte_data, "$['datetime_tz']") as datetime_tz, - json_extract_scalar(_airbyte_data, "$['datetime_no_tz']") as datetime_no_tz, - json_extract_scalar(_airbyte_data, "$['time_tz']") as time_tz, - json_extract_scalar(_airbyte_data, "$['time_no_tz']") as time_no_tz, - _airbyte_ab_id, - _airbyte_emitted_at, - CURRENT_TIMESTAMP() as _airbyte_normalized_at -from `dataline-integration-testing`.test_normalization._airbyte_raw_exchange_rate as table_alias --- exchange_rate -where 1 = 1 -), __dbt__cte__exchange_rate_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__exchange_rate_ab1 -select - cast(id as - int64 -) as id, - cast(currency as - string -) as currency, - cast(nullif(date, '') as - date -) as date, - cast(nullif(timestamp_col, '') as - timestamp -) as timestamp_col, - cast(HKD_special___characters as - float64 -) as HKD_special___characters, - cast(HKD_special___characters_1 as - string -) as HKD_special___characters_1, - cast(NZD as - float64 -) as NZD, - cast(USD as - float64 -) as USD, - cast(column___with__quotes as - string -) as column___with__quotes, - cast(nullif(datetime_tz, '') as - timestamp -) as datetime_tz, - cast(nullif(datetime_no_tz, '') as - datetime -) as datetime_no_tz, - cast(nullif(time_tz, '') as - STRING -) as time_tz, - cast(nullif(time_no_tz, '') as - time -) as time_no_tz, - _airbyte_ab_id, - _airbyte_emitted_at, - CURRENT_TIMESTAMP() as _airbyte_normalized_at -from __dbt__cte__exchange_rate_ab1 --- exchange_rate -where 1 = 1 -), __dbt__cte__exchange_rate_ab3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__exchange_rate_ab2 -select - to_hex(md5(cast(concat(coalesce(cast(id as - string -), ''), '-', coalesce(cast(currency as - string -), ''), '-', coalesce(cast(date as - string -), ''), '-', coalesce(cast(timestamp_col as - string -), ''), '-', coalesce(cast(HKD_special___characters as - string -), ''), '-', coalesce(cast(HKD_special___characters_1 as - string -), ''), '-', coalesce(cast(NZD as - string -), ''), '-', coalesce(cast(USD as - string -), ''), '-', coalesce(cast(column___with__quotes as - string -), ''), '-', coalesce(cast(datetime_tz as - string -), ''), '-', coalesce(cast(datetime_no_tz as - string -), ''), '-', coalesce(cast(time_tz as - string -), ''), '-', coalesce(cast(time_no_tz as - string -), '')) as - string -))) as _airbyte_exchange_rate_hashid, - tmp.* -from __dbt__cte__exchange_rate_ab2 tmp --- exchange_rate -where 1 = 1 -)-- Final base SQL model --- depends_on: __dbt__cte__exchange_rate_ab3 -select - id, - currency, - date, - timestamp_col, - HKD_special___characters, - HKD_special___characters_1, - NZD, - USD, - column___with__quotes, - datetime_tz, - datetime_no_tz, - time_tz, - time_no_tz, - _airbyte_ab_id, - _airbyte_emitted_at, - CURRENT_TIMESTAMP() as _airbyte_normalized_at, - _airbyte_exchange_rate_hashid -from __dbt__cte__exchange_rate_ab3 --- exchange_rate from `dataline-integration-testing`.test_normalization._airbyte_raw_exchange_rate -where 1 = 1 - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql deleted file mode 100644 index 5f4138f62093a..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql +++ /dev/null @@ -1,89 +0,0 @@ - - - create or replace view `dataline-integration-testing`._airbyte_test_normalization.`dedup_exchange_rate_stg` - OPTIONS() - as -with __dbt__cte__dedup_exchange_rate_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: `dataline-integration-testing`.test_normalization._airbyte_raw_dedup_exchange_rate -select - json_extract_scalar(_airbyte_data, "$['id']") as id, - json_extract_scalar(_airbyte_data, "$['currency']") as currency, - json_extract_scalar(_airbyte_data, "$['date']") as date, - json_extract_scalar(_airbyte_data, "$['timestamp_col']") as timestamp_col, - json_extract_scalar(_airbyte_data, "$['HKD@spéçiäl & characters']") as HKD_special___characters, - json_extract_scalar(_airbyte_data, "$['HKD_special___characters']") as HKD_special___characters_1, - json_extract_scalar(_airbyte_data, "$['NZD']") as NZD, - json_extract_scalar(_airbyte_data, "$['USD']") as USD, - _airbyte_ab_id, - _airbyte_emitted_at, - CURRENT_TIMESTAMP() as _airbyte_normalized_at -from `dataline-integration-testing`.test_normalization._airbyte_raw_dedup_exchange_rate as table_alias --- dedup_exchange_rate -where 1 = 1 - -), __dbt__cte__dedup_exchange_rate_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__dedup_exchange_rate_ab1 -select - cast(id as - int64 -) as id, - cast(currency as - string -) as currency, - cast(nullif(date, '') as - date -) as date, - cast(nullif(timestamp_col, '') as - timestamp -) as timestamp_col, - cast(HKD_special___characters as - float64 -) as HKD_special___characters, - cast(HKD_special___characters_1 as - string -) as HKD_special___characters_1, - cast(NZD as - float64 -) as NZD, - cast(USD as - float64 -) as USD, - _airbyte_ab_id, - _airbyte_emitted_at, - CURRENT_TIMESTAMP() as _airbyte_normalized_at -from __dbt__cte__dedup_exchange_rate_ab1 --- dedup_exchange_rate -where 1 = 1 - -)-- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__dedup_exchange_rate_ab2 -select - to_hex(md5(cast(concat(coalesce(cast(id as - string -), ''), '-', coalesce(cast(currency as - string -), ''), '-', coalesce(cast(date as - string -), ''), '-', coalesce(cast(timestamp_col as - string -), ''), '-', coalesce(cast(HKD_special___characters as - string -), ''), '-', coalesce(cast(HKD_special___characters_1 as - string -), ''), '-', coalesce(cast(NZD as - string -), ''), '-', coalesce(cast(USD as - string -), '')) as - string -))) as _airbyte_dedup_exchange_rate_hashid, - tmp.* -from __dbt__cte__dedup_exchange_rate_ab2 tmp --- dedup_exchange_rate -where 1 = 1 -; - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql deleted file mode 100644 index f5079fc4f3003..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql +++ /dev/null @@ -1,83 +0,0 @@ - - - create or replace view `dataline-integration-testing`._airbyte_test_normalization.`multiple_column_names_conflicts_stg` - OPTIONS() - as -with __dbt__cte__multiple_column_names_conflicts_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: `dataline-integration-testing`.test_normalization._airbyte_raw_multiple_column_names_conflicts -select - json_extract_scalar(_airbyte_data, "$['id']") as id, - json_extract_scalar(_airbyte_data, "$['User Id']") as User_Id, - json_extract_scalar(_airbyte_data, "$['user_id']") as user_id_1, - json_extract_scalar(_airbyte_data, "$['User id']") as User_id_2, - json_extract_scalar(_airbyte_data, "$['user id']") as user_id_3, - json_extract_scalar(_airbyte_data, "$['User@Id']") as User_Id_4, - json_extract_scalar(_airbyte_data, "$['UserId']") as UserId, - _airbyte_ab_id, - _airbyte_emitted_at, - CURRENT_TIMESTAMP() as _airbyte_normalized_at -from `dataline-integration-testing`.test_normalization._airbyte_raw_multiple_column_names_conflicts as table_alias --- multiple_column_names_conflicts -where 1 = 1 - -), __dbt__cte__multiple_column_names_conflicts_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__multiple_column_names_conflicts_ab1 -select - cast(id as - int64 -) as id, - cast(User_Id as - string -) as User_Id, - cast(user_id_1 as - float64 -) as user_id_1, - cast(User_id_2 as - float64 -) as User_id_2, - cast(user_id_3 as - float64 -) as user_id_3, - cast(User_Id_4 as - string -) as User_Id_4, - cast(UserId as - float64 -) as UserId, - _airbyte_ab_id, - _airbyte_emitted_at, - CURRENT_TIMESTAMP() as _airbyte_normalized_at -from __dbt__cte__multiple_column_names_conflicts_ab1 --- multiple_column_names_conflicts -where 1 = 1 - -)-- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__multiple_column_names_conflicts_ab2 -select - to_hex(md5(cast(concat(coalesce(cast(id as - string -), ''), '-', coalesce(cast(User_Id as - string -), ''), '-', coalesce(cast(user_id_1 as - string -), ''), '-', coalesce(cast(User_id_2 as - string -), ''), '-', coalesce(cast(user_id_3 as - string -), ''), '-', coalesce(cast(User_Id_4 as - string -), ''), '-', coalesce(cast(UserId as - string -), '')) as - string -))) as _airbyte_multiple_column_names_conflicts_hashid, - tmp.* -from __dbt__cte__multiple_column_names_conflicts_ab2 tmp --- multiple_column_names_conflicts -where 1 = 1 -; - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql deleted file mode 100644 index 8ef08eb1d426d..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql +++ /dev/null @@ -1,26 +0,0 @@ -{{ config( - cluster_by = "_airbyte_emitted_at", - partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"}, - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} -select - {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as id, - {{ json_extract_scalar('_airbyte_data', ['currency'], ['currency']) }} as currency, - {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as date, - {{ json_extract_scalar('_airbyte_data', ['timestamp_col'], ['timestamp_col']) }} as timestamp_col, - {{ json_extract_scalar('_airbyte_data', ['HKD@spéçiäl & characters'], ['HKD@spéçiäl & characters']) }} as HKD_special___characters, - {{ json_extract_scalar('_airbyte_data', ['HKD_special___characters'], ['HKD_special___characters']) }} as HKD_special___characters_1, - {{ json_extract_scalar('_airbyte_data', ['NZD'], ['NZD']) }} as NZD, - {{ json_extract_scalar('_airbyte_data', ['USD'], ['USD']) }} as USD, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} as table_alias --- dedup_exchange_rate -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql deleted file mode 100644 index eb02cc4ecf859..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql +++ /dev/null @@ -1,26 +0,0 @@ -{{ config( - cluster_by = "_airbyte_emitted_at", - partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"}, - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: {{ ref('dedup_exchange_rate_ab1') }} -select - cast(id as {{ dbt_utils.type_bigint() }}) as id, - cast(currency as {{ dbt_utils.type_string() }}) as currency, - cast({{ empty_string_to_null('date') }} as {{ type_date() }}) as date, - cast({{ empty_string_to_null('timestamp_col') }} as {{ type_timestamp_with_timezone() }}) as timestamp_col, - cast(HKD_special___characters as {{ dbt_utils.type_float() }}) as HKD_special___characters, - cast(HKD_special___characters_1 as {{ dbt_utils.type_string() }}) as HKD_special___characters_1, - cast(NZD as {{ dbt_utils.type_float() }}) as NZD, - cast(USD as {{ dbt_utils.type_float() }}) as USD, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('dedup_exchange_rate_ab1') }} --- dedup_exchange_rate -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql deleted file mode 100644 index ce21bef8c7221..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ /dev/null @@ -1,178 +0,0 @@ -{{ config( - cluster_by = ["_airbyte_unique_key_scd","_airbyte_emitted_at"], - partition_by = {"field": "_airbyte_active_row", "data_type": "int64", "range": {"start": 0, "end": 1, "interval": 1}}, - unique_key = "_airbyte_unique_key_scd", - schema = "test_normalization", - post_hook = [" - {% - set final_table_relation = adapter.get_relation( - database=this.database, - schema=this.schema, - identifier='dedup_exchange_rate' - ) - %} - {# - If the final table doesn't exist, then obviously we can't delete anything from it. - Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) - So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) - #} - {% - if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') - %} - -- Delete records which are no longer active: - -- This query is equivalent, but the left join version is more performant: - -- delete from final_table where unique_key in ( - -- select unique_key from scd_table where 1 = 1 - -- ) and unique_key not in ( - -- select unique_key from scd_table where active_row = 1 - -- ) - -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD - -- entries that were _updated_ recently. This is because a deleted record will have an SCD record - -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. - delete from {{ final_table_relation }} final_table where final_table._airbyte_unique_key in ( - select recent_records.unique_key - from ( - select distinct _airbyte_unique_key as unique_key - from {{ this }} - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - ) recent_records - left join ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - group by _airbyte_unique_key - ) active_counts - on recent_records.unique_key = active_counts.unique_key - where active_count is null or active_count = 0 - ) - {% else %} - -- We have to have a non-empty query, so just do a noop delete - delete from {{ this }} where 1=0 - {% endif %} - ","drop view _airbyte_test_normalization.dedup_exchange_rate_stg"], - tags = [ "top-level" ] -) }} --- depends_on: ref('dedup_exchange_rate_stg') -with -{% if is_incremental() %} -new_data as ( - -- retrieve incremental "new" data - select - * - from {{ ref('dedup_exchange_rate_stg') }} - -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} - where 1 = 1 - {{ incremental_clause('_airbyte_emitted_at', this) }} -), -new_data_ids as ( - -- build a subset of _airbyte_unique_key from rows that are new - select distinct - {{ dbt_utils.surrogate_key([ - 'id', - 'currency', - 'NZD', - ]) }} as _airbyte_unique_key - from new_data -), -empty_new_data as ( - -- build an empty table to only keep the table's column types - select * from new_data where 1 = 0 -), -previous_active_scd_data as ( - -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes - select - {{ star_intersect(ref('dedup_exchange_rate_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} - from {{ this }} as this_data - -- make a join with new_data using primary key to filter active data that need to be updated only - join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key - -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) - left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id - where _airbyte_active_row = 1 -), -input_data as ( - select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from new_data - union all - select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from previous_active_scd_data -), -{% else %} -input_data as ( - select * - from {{ ref('dedup_exchange_rate_stg') }} - -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} -), -{% endif %} -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - {{ dbt_utils.surrogate_key([ - 'id', - 'currency', - 'NZD', - ]) }} as _airbyte_unique_key, - id, - currency, - date, - timestamp_col, - HKD_special___characters, - HKD_special___characters_1, - NZD, - USD, - date as _airbyte_start_at, - lag(date) over ( - partition by id, currency, cast(NZD as {{ dbt_utils.type_string() }}) - order by - date is null asc, - date desc, - _airbyte_emitted_at desc - ) as _airbyte_end_at, - case when row_number() over ( - partition by id, currency, cast(NZD as {{ dbt_utils.type_string() }}) - order by - date is null asc, - date desc, - _airbyte_emitted_at desc - ) = 1 then 1 else 0 end as _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_dedup_exchange_rate_hashid - from input_data -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by - _airbyte_unique_key, - _airbyte_start_at, - _airbyte_emitted_at - order by _airbyte_active_row desc, _airbyte_ab_id - ) as _airbyte_row_num, - {{ dbt_utils.surrogate_key([ - '_airbyte_unique_key', - '_airbyte_start_at', - '_airbyte_emitted_at' - ]) }} as _airbyte_unique_key_scd, - scd_data.* - from scd_data -) -select - _airbyte_unique_key, - _airbyte_unique_key_scd, - id, - currency, - date, - timestamp_col, - HKD_special___characters, - HKD_special___characters_1, - NZD, - USD, - _airbyte_start_at, - _airbyte_end_at, - _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_dedup_exchange_rate_hashid -from dedup_data where _airbyte_row_num = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql deleted file mode 100644 index eb3c93754b6b0..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql +++ /dev/null @@ -1,29 +0,0 @@ -{{ config( - cluster_by = ["_airbyte_unique_key","_airbyte_emitted_at"], - partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"}, - unique_key = "_airbyte_unique_key", - schema = "test_normalization", - tags = [ "top-level" ] -) }} --- Final base SQL model --- depends_on: {{ ref('dedup_exchange_rate_scd') }} -select - _airbyte_unique_key, - id, - currency, - date, - timestamp_col, - HKD_special___characters, - HKD_special___characters_1, - NZD, - USD, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_dedup_exchange_rate_hashid -from {{ ref('dedup_exchange_rate_scd') }} --- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} -where 1 = 1 -and _airbyte_active_row = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql deleted file mode 100644 index 61b42d20863c3..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql +++ /dev/null @@ -1,31 +0,0 @@ -{{ config( - cluster_by = "_airbyte_emitted_at", - partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"}, - unique_key = '_airbyte_ab_id', - schema = "test_normalization", - tags = [ "top-level" ] -) }} --- Final base SQL model --- depends_on: {{ ref('exchange_rate_ab3') }} -select - id, - currency, - date, - timestamp_col, - HKD_special___characters, - HKD_special___characters_1, - NZD, - USD, - column___with__quotes, - datetime_tz, - datetime_no_tz, - time_tz, - time_no_tz, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_exchange_rate_hashid -from {{ ref('exchange_rate_ab3') }} --- exchange_rate from {{ source('test_normalization', '_airbyte_raw_exchange_rate') }} -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql deleted file mode 100644 index 45262775f20b1..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql +++ /dev/null @@ -1,26 +0,0 @@ -{{ config( - cluster_by = "_airbyte_emitted_at", - partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"}, - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to build a hash column based on the values of this record --- depends_on: {{ ref('dedup_exchange_rate_ab2') }} -select - {{ dbt_utils.surrogate_key([ - 'id', - 'currency', - 'date', - 'timestamp_col', - 'HKD_special___characters', - 'HKD_special___characters_1', - 'NZD', - 'USD', - ]) }} as _airbyte_dedup_exchange_rate_hashid, - tmp.* -from {{ ref('dedup_exchange_rate_ab2') }} tmp --- dedup_exchange_rate -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/sources.yml deleted file mode 100644 index f51802427655e..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/sources.yml +++ /dev/null @@ -1,16 +0,0 @@ -version: 2 -sources: -- name: test_normalization - quoting: - database: true - schema: false - identifier: false - tables: - - name: _airbyte_raw_1_prefix_startwith_number - - name: _airbyte_raw_dedup_cdc_excluded - - name: _airbyte_raw_dedup_exchange_rate - - name: _airbyte_raw_exchange_rate - - name: _airbyte_raw_multiple_column_names_conflicts - - name: _airbyte_raw_pos_dedup_cdcx - - name: _airbyte_raw_renamed_dedup_cdc_excluded - - name: _airbyte_raw_types_testing diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql deleted file mode 100644 index b86bc98fe997f..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql +++ /dev/null @@ -1,26 +0,0 @@ -{{ config( - cluster_by = "_airbyte_emitted_at", - partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"}, - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} -select - {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as id, - {{ json_extract_scalar('_airbyte_data', ['currency'], ['currency']) }} as currency, - {{ json_extract_scalar('_airbyte_data', ['new_column'], ['new_column']) }} as new_column, - {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as date, - {{ json_extract_scalar('_airbyte_data', ['timestamp_col'], ['timestamp_col']) }} as timestamp_col, - {{ json_extract_scalar('_airbyte_data', ['HKD@spéçiäl & characters'], ['HKD@spéçiäl & characters']) }} as HKD_special___characters, - {{ json_extract_scalar('_airbyte_data', ['NZD'], ['NZD']) }} as NZD, - {{ json_extract_scalar('_airbyte_data', ['USD'], ['USD']) }} as USD, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} as table_alias --- dedup_exchange_rate -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql deleted file mode 100644 index 09146ddd1c9f8..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql +++ /dev/null @@ -1,26 +0,0 @@ -{{ config( - cluster_by = "_airbyte_emitted_at", - partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"}, - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: {{ ref('dedup_exchange_rate_ab1') }} -select - cast(id as {{ dbt_utils.type_float() }}) as id, - cast(currency as {{ dbt_utils.type_string() }}) as currency, - cast(new_column as {{ dbt_utils.type_float() }}) as new_column, - cast({{ empty_string_to_null('date') }} as {{ type_date() }}) as date, - cast({{ empty_string_to_null('timestamp_col') }} as {{ type_timestamp_with_timezone() }}) as timestamp_col, - cast(HKD_special___characters as {{ dbt_utils.type_float() }}) as HKD_special___characters, - cast(NZD as {{ dbt_utils.type_float() }}) as NZD, - cast(USD as {{ dbt_utils.type_bigint() }}) as USD, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('dedup_exchange_rate_ab1') }} --- dedup_exchange_rate -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql deleted file mode 100644 index 4f6b80934992c..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ /dev/null @@ -1,178 +0,0 @@ -{{ config( - cluster_by = ["_airbyte_unique_key_scd","_airbyte_emitted_at"], - partition_by = {"field": "_airbyte_active_row", "data_type": "int64", "range": {"start": 0, "end": 1, "interval": 1}}, - unique_key = "_airbyte_unique_key_scd", - schema = "test_normalization", - post_hook = [" - {% - set final_table_relation = adapter.get_relation( - database=this.database, - schema=this.schema, - identifier='dedup_exchange_rate' - ) - %} - {# - If the final table doesn't exist, then obviously we can't delete anything from it. - Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) - So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) - #} - {% - if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') - %} - -- Delete records which are no longer active: - -- This query is equivalent, but the left join version is more performant: - -- delete from final_table where unique_key in ( - -- select unique_key from scd_table where 1 = 1 - -- ) and unique_key not in ( - -- select unique_key from scd_table where active_row = 1 - -- ) - -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD - -- entries that were _updated_ recently. This is because a deleted record will have an SCD record - -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. - delete from {{ final_table_relation }} final_table where final_table._airbyte_unique_key in ( - select recent_records.unique_key - from ( - select distinct _airbyte_unique_key as unique_key - from {{ this }} - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - ) recent_records - left join ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - group by _airbyte_unique_key - ) active_counts - on recent_records.unique_key = active_counts.unique_key - where active_count is null or active_count = 0 - ) - {% else %} - -- We have to have a non-empty query, so just do a noop delete - delete from {{ this }} where 1=0 - {% endif %} - ","drop view _airbyte_test_normalization.dedup_exchange_rate_stg"], - tags = [ "top-level" ] -) }} --- depends_on: ref('dedup_exchange_rate_stg') -with -{% if is_incremental() %} -new_data as ( - -- retrieve incremental "new" data - select - * - from {{ ref('dedup_exchange_rate_stg') }} - -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} - where 1 = 1 - {{ incremental_clause('_airbyte_emitted_at', this) }} -), -new_data_ids as ( - -- build a subset of _airbyte_unique_key from rows that are new - select distinct - {{ dbt_utils.surrogate_key([ - 'id', - 'currency', - 'NZD', - ]) }} as _airbyte_unique_key - from new_data -), -empty_new_data as ( - -- build an empty table to only keep the table's column types - select * from new_data where 1 = 0 -), -previous_active_scd_data as ( - -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes - select - {{ star_intersect(ref('dedup_exchange_rate_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} - from {{ this }} as this_data - -- make a join with new_data using primary key to filter active data that need to be updated only - join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key - -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) - left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id - where _airbyte_active_row = 1 -), -input_data as ( - select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from new_data - union all - select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from previous_active_scd_data -), -{% else %} -input_data as ( - select * - from {{ ref('dedup_exchange_rate_stg') }} - -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} -), -{% endif %} -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - {{ dbt_utils.surrogate_key([ - 'id', - 'currency', - 'NZD', - ]) }} as _airbyte_unique_key, - id, - currency, - new_column, - date, - timestamp_col, - HKD_special___characters, - NZD, - USD, - date as _airbyte_start_at, - lag(date) over ( - partition by cast(id as {{ dbt_utils.type_string() }}), currency, cast(NZD as {{ dbt_utils.type_string() }}) - order by - date is null asc, - date desc, - _airbyte_emitted_at desc - ) as _airbyte_end_at, - case when row_number() over ( - partition by cast(id as {{ dbt_utils.type_string() }}), currency, cast(NZD as {{ dbt_utils.type_string() }}) - order by - date is null asc, - date desc, - _airbyte_emitted_at desc - ) = 1 then 1 else 0 end as _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_dedup_exchange_rate_hashid - from input_data -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by - _airbyte_unique_key, - _airbyte_start_at, - _airbyte_emitted_at - order by _airbyte_active_row desc, _airbyte_ab_id - ) as _airbyte_row_num, - {{ dbt_utils.surrogate_key([ - '_airbyte_unique_key', - '_airbyte_start_at', - '_airbyte_emitted_at' - ]) }} as _airbyte_unique_key_scd, - scd_data.* - from scd_data -) -select - _airbyte_unique_key, - _airbyte_unique_key_scd, - id, - currency, - new_column, - date, - timestamp_col, - HKD_special___characters, - NZD, - USD, - _airbyte_start_at, - _airbyte_end_at, - _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_dedup_exchange_rate_hashid -from dedup_data where _airbyte_row_num = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql deleted file mode 100644 index 96601fc9d2873..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql +++ /dev/null @@ -1,29 +0,0 @@ -{{ config( - cluster_by = ["_airbyte_unique_key","_airbyte_emitted_at"], - partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"}, - unique_key = "_airbyte_unique_key", - schema = "test_normalization", - tags = [ "top-level" ] -) }} --- Final base SQL model --- depends_on: {{ ref('dedup_exchange_rate_scd') }} -select - _airbyte_unique_key, - id, - currency, - new_column, - date, - timestamp_col, - HKD_special___characters, - NZD, - USD, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_dedup_exchange_rate_hashid -from {{ ref('dedup_exchange_rate_scd') }} --- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} -where 1 = 1 -and _airbyte_active_row = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_tables/test_normalization/exchange_rate.sql deleted file mode 100644 index 84cb4985e8c95..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_tables/test_normalization/exchange_rate.sql +++ /dev/null @@ -1,27 +0,0 @@ -{{ config( - cluster_by = "_airbyte_emitted_at", - partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"}, - unique_key = '_airbyte_ab_id', - schema = "test_normalization", - tags = [ "top-level" ] -) }} --- Final base SQL model --- depends_on: {{ ref('exchange_rate_ab3') }} -select - id, - currency, - new_column, - date, - timestamp_col, - HKD_special___characters, - NZD, - USD, - column___with__quotes, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_exchange_rate_hashid -from {{ ref('exchange_rate_ab3') }} --- exchange_rate from {{ source('test_normalization', '_airbyte_raw_exchange_rate') }} -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql deleted file mode 100644 index da37e7dc7eaeb..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql +++ /dev/null @@ -1,26 +0,0 @@ -{{ config( - cluster_by = "_airbyte_emitted_at", - partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"}, - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to build a hash column based on the values of this record --- depends_on: {{ ref('dedup_exchange_rate_ab2') }} -select - {{ dbt_utils.surrogate_key([ - 'id', - 'currency', - 'new_column', - 'date', - 'timestamp_col', - 'HKD_special___characters', - 'NZD', - 'USD', - ]) }} as _airbyte_dedup_exchange_rate_hashid, - tmp.* -from {{ ref('dedup_exchange_rate_ab2') }} tmp --- dedup_exchange_rate -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/sources.yml deleted file mode 100644 index 6a5d7bdc09a16..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/sources.yml +++ /dev/null @@ -1,12 +0,0 @@ -version: 2 -sources: -- name: test_normalization - quoting: - database: true - schema: false - identifier: false - tables: - - name: _airbyte_raw_dedup_cdc_excluded - - name: _airbyte_raw_dedup_exchange_rate - - name: _airbyte_raw_exchange_rate - - name: _airbyte_raw_renamed_dedup_cdc_excluded diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql deleted file mode 100644 index 591dfe0b4c344..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ /dev/null @@ -1,27 +0,0 @@ - - - - - - - - merge into `dataline-integration-testing`.test_normalization.`dedup_exchange_rate_scd` as DBT_INTERNAL_DEST - using ( - select * from `dataline-integration-testing`.test_normalization.`dedup_exchange_rate_scd__dbt_tmp` - ) as DBT_INTERNAL_SOURCE - on - DBT_INTERNAL_SOURCE._airbyte_unique_key_scd = DBT_INTERNAL_DEST._airbyte_unique_key_scd - - - - when matched then update set - `_airbyte_unique_key` = DBT_INTERNAL_SOURCE.`_airbyte_unique_key`,`_airbyte_unique_key_scd` = DBT_INTERNAL_SOURCE.`_airbyte_unique_key_scd`,`id` = DBT_INTERNAL_SOURCE.`id`,`currency` = DBT_INTERNAL_SOURCE.`currency`,`date` = DBT_INTERNAL_SOURCE.`date`,`timestamp_col` = DBT_INTERNAL_SOURCE.`timestamp_col`,`HKD_special___characters` = DBT_INTERNAL_SOURCE.`HKD_special___characters`,`HKD_special___characters_1` = DBT_INTERNAL_SOURCE.`HKD_special___characters_1`,`NZD` = DBT_INTERNAL_SOURCE.`NZD`,`USD` = DBT_INTERNAL_SOURCE.`USD`,`_airbyte_start_at` = DBT_INTERNAL_SOURCE.`_airbyte_start_at`,`_airbyte_end_at` = DBT_INTERNAL_SOURCE.`_airbyte_end_at`,`_airbyte_active_row` = DBT_INTERNAL_SOURCE.`_airbyte_active_row`,`_airbyte_ab_id` = DBT_INTERNAL_SOURCE.`_airbyte_ab_id`,`_airbyte_emitted_at` = DBT_INTERNAL_SOURCE.`_airbyte_emitted_at`,`_airbyte_normalized_at` = DBT_INTERNAL_SOURCE.`_airbyte_normalized_at`,`_airbyte_dedup_exchange_rate_hashid` = DBT_INTERNAL_SOURCE.`_airbyte_dedup_exchange_rate_hashid` - - - when not matched then insert - (`_airbyte_unique_key`, `_airbyte_unique_key_scd`, `id`, `currency`, `date`, `timestamp_col`, `HKD_special___characters`, `HKD_special___characters_1`, `NZD`, `USD`, `_airbyte_start_at`, `_airbyte_end_at`, `_airbyte_active_row`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_dedup_exchange_rate_hashid`) - values - (`_airbyte_unique_key`, `_airbyte_unique_key_scd`, `id`, `currency`, `date`, `timestamp_col`, `HKD_special___characters`, `HKD_special___characters_1`, `NZD`, `USD`, `_airbyte_start_at`, `_airbyte_end_at`, `_airbyte_active_row`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_dedup_exchange_rate_hashid`) - - - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql deleted file mode 100644 index 0691294c98c3e..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql +++ /dev/null @@ -1,27 +0,0 @@ - - - - - - - - merge into `dataline-integration-testing`.test_normalization.`dedup_exchange_rate` as DBT_INTERNAL_DEST - using ( - select * from `dataline-integration-testing`.test_normalization.`dedup_exchange_rate__dbt_tmp` - ) as DBT_INTERNAL_SOURCE - on - DBT_INTERNAL_SOURCE._airbyte_unique_key = DBT_INTERNAL_DEST._airbyte_unique_key - - - - when matched then update set - `_airbyte_unique_key` = DBT_INTERNAL_SOURCE.`_airbyte_unique_key`,`id` = DBT_INTERNAL_SOURCE.`id`,`currency` = DBT_INTERNAL_SOURCE.`currency`,`date` = DBT_INTERNAL_SOURCE.`date`,`timestamp_col` = DBT_INTERNAL_SOURCE.`timestamp_col`,`HKD_special___characters` = DBT_INTERNAL_SOURCE.`HKD_special___characters`,`HKD_special___characters_1` = DBT_INTERNAL_SOURCE.`HKD_special___characters_1`,`NZD` = DBT_INTERNAL_SOURCE.`NZD`,`USD` = DBT_INTERNAL_SOURCE.`USD`,`_airbyte_ab_id` = DBT_INTERNAL_SOURCE.`_airbyte_ab_id`,`_airbyte_emitted_at` = DBT_INTERNAL_SOURCE.`_airbyte_emitted_at`,`_airbyte_normalized_at` = DBT_INTERNAL_SOURCE.`_airbyte_normalized_at`,`_airbyte_dedup_exchange_rate_hashid` = DBT_INTERNAL_SOURCE.`_airbyte_dedup_exchange_rate_hashid` - - - when not matched then insert - (`_airbyte_unique_key`, `id`, `currency`, `date`, `timestamp_col`, `HKD_special___characters`, `HKD_special___characters_1`, `NZD`, `USD`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_dedup_exchange_rate_hashid`) - values - (`_airbyte_unique_key`, `id`, `currency`, `date`, `timestamp_col`, `HKD_special___characters`, `HKD_special___characters_1`, `NZD`, `USD`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_dedup_exchange_rate_hashid`) - - - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql deleted file mode 100644 index 3d32bbb2838a9..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql +++ /dev/null @@ -1,145 +0,0 @@ - - - create or replace table `dataline-integration-testing`.test_normalization.`exchange_rate` - partition by timestamp_trunc(_airbyte_emitted_at, day) - cluster by _airbyte_emitted_at - OPTIONS() - as ( - -with __dbt__cte__exchange_rate_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: `dataline-integration-testing`.test_normalization._airbyte_raw_exchange_rate -select - json_extract_scalar(_airbyte_data, "$['id']") as id, - json_extract_scalar(_airbyte_data, "$['currency']") as currency, - json_extract_scalar(_airbyte_data, "$['date']") as date, - json_extract_scalar(_airbyte_data, "$['timestamp_col']") as timestamp_col, - json_extract_scalar(_airbyte_data, "$['HKD@spéçiäl & characters']") as HKD_special___characters, - json_extract_scalar(_airbyte_data, "$['HKD_special___characters']") as HKD_special___characters_1, - json_extract_scalar(_airbyte_data, "$['NZD']") as NZD, - json_extract_scalar(_airbyte_data, "$['USD']") as USD, - json_extract_scalar(_airbyte_data, "$['column___with__quotes']") as column___with__quotes, - json_extract_scalar(_airbyte_data, "$['datetime_tz']") as datetime_tz, - json_extract_scalar(_airbyte_data, "$['datetime_no_tz']") as datetime_no_tz, - json_extract_scalar(_airbyte_data, "$['time_tz']") as time_tz, - json_extract_scalar(_airbyte_data, "$['time_no_tz']") as time_no_tz, - _airbyte_ab_id, - _airbyte_emitted_at, - CURRENT_TIMESTAMP() as _airbyte_normalized_at -from `dataline-integration-testing`.test_normalization._airbyte_raw_exchange_rate as table_alias --- exchange_rate -where 1 = 1 -), __dbt__cte__exchange_rate_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__exchange_rate_ab1 -select - cast(id as - int64 -) as id, - cast(currency as - string -) as currency, - cast(nullif(date, '') as - date -) as date, - cast(nullif(timestamp_col, '') as - timestamp -) as timestamp_col, - cast(HKD_special___characters as - float64 -) as HKD_special___characters, - cast(HKD_special___characters_1 as - string -) as HKD_special___characters_1, - cast(NZD as - float64 -) as NZD, - cast(USD as - float64 -) as USD, - cast(column___with__quotes as - string -) as column___with__quotes, - cast(nullif(datetime_tz, '') as - timestamp -) as datetime_tz, - cast(nullif(datetime_no_tz, '') as - datetime -) as datetime_no_tz, - cast(nullif(time_tz, '') as - STRING -) as time_tz, - cast(nullif(time_no_tz, '') as - time -) as time_no_tz, - _airbyte_ab_id, - _airbyte_emitted_at, - CURRENT_TIMESTAMP() as _airbyte_normalized_at -from __dbt__cte__exchange_rate_ab1 --- exchange_rate -where 1 = 1 -), __dbt__cte__exchange_rate_ab3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__exchange_rate_ab2 -select - to_hex(md5(cast(concat(coalesce(cast(id as - string -), ''), '-', coalesce(cast(currency as - string -), ''), '-', coalesce(cast(date as - string -), ''), '-', coalesce(cast(timestamp_col as - string -), ''), '-', coalesce(cast(HKD_special___characters as - string -), ''), '-', coalesce(cast(HKD_special___characters_1 as - string -), ''), '-', coalesce(cast(NZD as - string -), ''), '-', coalesce(cast(USD as - string -), ''), '-', coalesce(cast(column___with__quotes as - string -), ''), '-', coalesce(cast(datetime_tz as - string -), ''), '-', coalesce(cast(datetime_no_tz as - string -), ''), '-', coalesce(cast(time_tz as - string -), ''), '-', coalesce(cast(time_no_tz as - string -), '')) as - string -))) as _airbyte_exchange_rate_hashid, - tmp.* -from __dbt__cte__exchange_rate_ab2 tmp --- exchange_rate -where 1 = 1 -)-- Final base SQL model --- depends_on: __dbt__cte__exchange_rate_ab3 -select - id, - currency, - date, - timestamp_col, - HKD_special___characters, - HKD_special___characters_1, - NZD, - USD, - column___with__quotes, - datetime_tz, - datetime_no_tz, - time_tz, - time_no_tz, - _airbyte_ab_id, - _airbyte_emitted_at, - CURRENT_TIMESTAMP() as _airbyte_normalized_at, - _airbyte_exchange_rate_hashid -from __dbt__cte__exchange_rate_ab3 --- exchange_rate from `dataline-integration-testing`.test_normalization._airbyte_raw_exchange_rate -where 1 = 1 - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql deleted file mode 100644 index 5f4138f62093a..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql +++ /dev/null @@ -1,89 +0,0 @@ - - - create or replace view `dataline-integration-testing`._airbyte_test_normalization.`dedup_exchange_rate_stg` - OPTIONS() - as -with __dbt__cte__dedup_exchange_rate_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: `dataline-integration-testing`.test_normalization._airbyte_raw_dedup_exchange_rate -select - json_extract_scalar(_airbyte_data, "$['id']") as id, - json_extract_scalar(_airbyte_data, "$['currency']") as currency, - json_extract_scalar(_airbyte_data, "$['date']") as date, - json_extract_scalar(_airbyte_data, "$['timestamp_col']") as timestamp_col, - json_extract_scalar(_airbyte_data, "$['HKD@spéçiäl & characters']") as HKD_special___characters, - json_extract_scalar(_airbyte_data, "$['HKD_special___characters']") as HKD_special___characters_1, - json_extract_scalar(_airbyte_data, "$['NZD']") as NZD, - json_extract_scalar(_airbyte_data, "$['USD']") as USD, - _airbyte_ab_id, - _airbyte_emitted_at, - CURRENT_TIMESTAMP() as _airbyte_normalized_at -from `dataline-integration-testing`.test_normalization._airbyte_raw_dedup_exchange_rate as table_alias --- dedup_exchange_rate -where 1 = 1 - -), __dbt__cte__dedup_exchange_rate_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__dedup_exchange_rate_ab1 -select - cast(id as - int64 -) as id, - cast(currency as - string -) as currency, - cast(nullif(date, '') as - date -) as date, - cast(nullif(timestamp_col, '') as - timestamp -) as timestamp_col, - cast(HKD_special___characters as - float64 -) as HKD_special___characters, - cast(HKD_special___characters_1 as - string -) as HKD_special___characters_1, - cast(NZD as - float64 -) as NZD, - cast(USD as - float64 -) as USD, - _airbyte_ab_id, - _airbyte_emitted_at, - CURRENT_TIMESTAMP() as _airbyte_normalized_at -from __dbt__cte__dedup_exchange_rate_ab1 --- dedup_exchange_rate -where 1 = 1 - -)-- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__dedup_exchange_rate_ab2 -select - to_hex(md5(cast(concat(coalesce(cast(id as - string -), ''), '-', coalesce(cast(currency as - string -), ''), '-', coalesce(cast(date as - string -), ''), '-', coalesce(cast(timestamp_col as - string -), ''), '-', coalesce(cast(HKD_special___characters as - string -), ''), '-', coalesce(cast(HKD_special___characters_1 as - string -), ''), '-', coalesce(cast(NZD as - string -), ''), '-', coalesce(cast(USD as - string -), '')) as - string -))) as _airbyte_dedup_exchange_rate_hashid, - tmp.* -from __dbt__cte__dedup_exchange_rate_ab2 tmp --- dedup_exchange_rate -where 1 = 1 -; - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql deleted file mode 100644 index ac1136c84b7ae..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ /dev/null @@ -1,27 +0,0 @@ - - - - - - - - merge into `dataline-integration-testing`.test_normalization.`dedup_exchange_rate_scd` as DBT_INTERNAL_DEST - using ( - select * from `dataline-integration-testing`.test_normalization.`dedup_exchange_rate_scd__dbt_tmp` - ) as DBT_INTERNAL_SOURCE - on - DBT_INTERNAL_SOURCE._airbyte_unique_key_scd = DBT_INTERNAL_DEST._airbyte_unique_key_scd - - - - when matched then update set - `_airbyte_unique_key` = DBT_INTERNAL_SOURCE.`_airbyte_unique_key`,`_airbyte_unique_key_scd` = DBT_INTERNAL_SOURCE.`_airbyte_unique_key_scd`,`id` = DBT_INTERNAL_SOURCE.`id`,`currency` = DBT_INTERNAL_SOURCE.`currency`,`new_column` = DBT_INTERNAL_SOURCE.`new_column`,`date` = DBT_INTERNAL_SOURCE.`date`,`timestamp_col` = DBT_INTERNAL_SOURCE.`timestamp_col`,`HKD_special___characters` = DBT_INTERNAL_SOURCE.`HKD_special___characters`,`NZD` = DBT_INTERNAL_SOURCE.`NZD`,`USD` = DBT_INTERNAL_SOURCE.`USD`,`_airbyte_start_at` = DBT_INTERNAL_SOURCE.`_airbyte_start_at`,`_airbyte_end_at` = DBT_INTERNAL_SOURCE.`_airbyte_end_at`,`_airbyte_active_row` = DBT_INTERNAL_SOURCE.`_airbyte_active_row`,`_airbyte_ab_id` = DBT_INTERNAL_SOURCE.`_airbyte_ab_id`,`_airbyte_emitted_at` = DBT_INTERNAL_SOURCE.`_airbyte_emitted_at`,`_airbyte_normalized_at` = DBT_INTERNAL_SOURCE.`_airbyte_normalized_at`,`_airbyte_dedup_exchange_rate_hashid` = DBT_INTERNAL_SOURCE.`_airbyte_dedup_exchange_rate_hashid` - - - when not matched then insert - (`_airbyte_unique_key`, `_airbyte_unique_key_scd`, `id`, `currency`, `new_column`, `date`, `timestamp_col`, `HKD_special___characters`, `NZD`, `USD`, `_airbyte_start_at`, `_airbyte_end_at`, `_airbyte_active_row`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_dedup_exchange_rate_hashid`) - values - (`_airbyte_unique_key`, `_airbyte_unique_key_scd`, `id`, `currency`, `new_column`, `date`, `timestamp_col`, `HKD_special___characters`, `NZD`, `USD`, `_airbyte_start_at`, `_airbyte_end_at`, `_airbyte_active_row`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_dedup_exchange_rate_hashid`) - - - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql deleted file mode 100644 index a36197a213f4e..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql +++ /dev/null @@ -1,27 +0,0 @@ - - - - - - - - merge into `dataline-integration-testing`.test_normalization.`dedup_exchange_rate` as DBT_INTERNAL_DEST - using ( - select * from `dataline-integration-testing`.test_normalization.`dedup_exchange_rate__dbt_tmp` - ) as DBT_INTERNAL_SOURCE - on - DBT_INTERNAL_SOURCE._airbyte_unique_key = DBT_INTERNAL_DEST._airbyte_unique_key - - - - when matched then update set - `_airbyte_unique_key` = DBT_INTERNAL_SOURCE.`_airbyte_unique_key`,`id` = DBT_INTERNAL_SOURCE.`id`,`currency` = DBT_INTERNAL_SOURCE.`currency`,`new_column` = DBT_INTERNAL_SOURCE.`new_column`,`date` = DBT_INTERNAL_SOURCE.`date`,`timestamp_col` = DBT_INTERNAL_SOURCE.`timestamp_col`,`HKD_special___characters` = DBT_INTERNAL_SOURCE.`HKD_special___characters`,`NZD` = DBT_INTERNAL_SOURCE.`NZD`,`USD` = DBT_INTERNAL_SOURCE.`USD`,`_airbyte_ab_id` = DBT_INTERNAL_SOURCE.`_airbyte_ab_id`,`_airbyte_emitted_at` = DBT_INTERNAL_SOURCE.`_airbyte_emitted_at`,`_airbyte_normalized_at` = DBT_INTERNAL_SOURCE.`_airbyte_normalized_at`,`_airbyte_dedup_exchange_rate_hashid` = DBT_INTERNAL_SOURCE.`_airbyte_dedup_exchange_rate_hashid` - - - when not matched then insert - (`_airbyte_unique_key`, `id`, `currency`, `new_column`, `date`, `timestamp_col`, `HKD_special___characters`, `NZD`, `USD`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_dedup_exchange_rate_hashid`) - values - (`_airbyte_unique_key`, `id`, `currency`, `new_column`, `date`, `timestamp_col`, `HKD_special___characters`, `NZD`, `USD`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_dedup_exchange_rate_hashid`) - - - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/third_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/third_output/airbyte_tables/test_normalization/exchange_rate.sql deleted file mode 100644 index 49688da71ec1a..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/third_output/airbyte_tables/test_normalization/exchange_rate.sql +++ /dev/null @@ -1,117 +0,0 @@ - - - create or replace table `dataline-integration-testing`.test_normalization.`exchange_rate` - partition by timestamp_trunc(_airbyte_emitted_at, day) - cluster by _airbyte_emitted_at - OPTIONS() - as ( - -with __dbt__cte__exchange_rate_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: `dataline-integration-testing`.test_normalization._airbyte_raw_exchange_rate -select - json_extract_scalar(_airbyte_data, "$['id']") as id, - json_extract_scalar(_airbyte_data, "$['currency']") as currency, - json_extract_scalar(_airbyte_data, "$['new_column']") as new_column, - json_extract_scalar(_airbyte_data, "$['date']") as date, - json_extract_scalar(_airbyte_data, "$['timestamp_col']") as timestamp_col, - json_extract_scalar(_airbyte_data, "$['HKD@spéçiäl & characters']") as HKD_special___characters, - json_extract_scalar(_airbyte_data, "$['NZD']") as NZD, - json_extract_scalar(_airbyte_data, "$['USD']") as USD, - json_extract_scalar(_airbyte_data, "$['column___with__quotes']") as column___with__quotes, - _airbyte_ab_id, - _airbyte_emitted_at, - CURRENT_TIMESTAMP() as _airbyte_normalized_at -from `dataline-integration-testing`.test_normalization._airbyte_raw_exchange_rate as table_alias --- exchange_rate -where 1 = 1 -), __dbt__cte__exchange_rate_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__exchange_rate_ab1 -select - cast(id as - float64 -) as id, - cast(currency as - string -) as currency, - cast(new_column as - float64 -) as new_column, - cast(nullif(date, '') as - date -) as date, - cast(nullif(timestamp_col, '') as - timestamp -) as timestamp_col, - cast(HKD_special___characters as - float64 -) as HKD_special___characters, - cast(NZD as - float64 -) as NZD, - cast(USD as - float64 -) as USD, - cast(column___with__quotes as - string -) as column___with__quotes, - _airbyte_ab_id, - _airbyte_emitted_at, - CURRENT_TIMESTAMP() as _airbyte_normalized_at -from __dbt__cte__exchange_rate_ab1 --- exchange_rate -where 1 = 1 -), __dbt__cte__exchange_rate_ab3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__exchange_rate_ab2 -select - to_hex(md5(cast(concat(coalesce(cast(id as - string -), ''), '-', coalesce(cast(currency as - string -), ''), '-', coalesce(cast(new_column as - string -), ''), '-', coalesce(cast(date as - string -), ''), '-', coalesce(cast(timestamp_col as - string -), ''), '-', coalesce(cast(HKD_special___characters as - string -), ''), '-', coalesce(cast(NZD as - string -), ''), '-', coalesce(cast(USD as - string -), ''), '-', coalesce(cast(column___with__quotes as - string -), '')) as - string -))) as _airbyte_exchange_rate_hashid, - tmp.* -from __dbt__cte__exchange_rate_ab2 tmp --- exchange_rate -where 1 = 1 -)-- Final base SQL model --- depends_on: __dbt__cte__exchange_rate_ab3 -select - id, - currency, - new_column, - date, - timestamp_col, - HKD_special___characters, - NZD, - USD, - column___with__quotes, - _airbyte_ab_id, - _airbyte_emitted_at, - CURRENT_TIMESTAMP() as _airbyte_normalized_at, - _airbyte_exchange_rate_hashid -from __dbt__cte__exchange_rate_ab3 --- exchange_rate from `dataline-integration-testing`.test_normalization._airbyte_raw_exchange_rate -where 1 = 1 - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/third_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/third_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql deleted file mode 100644 index 4b1d0e917e33f..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/third_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql +++ /dev/null @@ -1,89 +0,0 @@ - - - create or replace view `dataline-integration-testing`._airbyte_test_normalization.`dedup_exchange_rate_stg` - OPTIONS() - as -with __dbt__cte__dedup_exchange_rate_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: `dataline-integration-testing`.test_normalization._airbyte_raw_dedup_exchange_rate -select - json_extract_scalar(_airbyte_data, "$['id']") as id, - json_extract_scalar(_airbyte_data, "$['currency']") as currency, - json_extract_scalar(_airbyte_data, "$['new_column']") as new_column, - json_extract_scalar(_airbyte_data, "$['date']") as date, - json_extract_scalar(_airbyte_data, "$['timestamp_col']") as timestamp_col, - json_extract_scalar(_airbyte_data, "$['HKD@spéçiäl & characters']") as HKD_special___characters, - json_extract_scalar(_airbyte_data, "$['NZD']") as NZD, - json_extract_scalar(_airbyte_data, "$['USD']") as USD, - _airbyte_ab_id, - _airbyte_emitted_at, - CURRENT_TIMESTAMP() as _airbyte_normalized_at -from `dataline-integration-testing`.test_normalization._airbyte_raw_dedup_exchange_rate as table_alias --- dedup_exchange_rate -where 1 = 1 - -), __dbt__cte__dedup_exchange_rate_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__dedup_exchange_rate_ab1 -select - cast(id as - float64 -) as id, - cast(currency as - string -) as currency, - cast(new_column as - float64 -) as new_column, - cast(nullif(date, '') as - date -) as date, - cast(nullif(timestamp_col, '') as - timestamp -) as timestamp_col, - cast(HKD_special___characters as - float64 -) as HKD_special___characters, - cast(NZD as - float64 -) as NZD, - cast(USD as - int64 -) as USD, - _airbyte_ab_id, - _airbyte_emitted_at, - CURRENT_TIMESTAMP() as _airbyte_normalized_at -from __dbt__cte__dedup_exchange_rate_ab1 --- dedup_exchange_rate -where 1 = 1 - -)-- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__dedup_exchange_rate_ab2 -select - to_hex(md5(cast(concat(coalesce(cast(id as - string -), ''), '-', coalesce(cast(currency as - string -), ''), '-', coalesce(cast(new_column as - string -), ''), '-', coalesce(cast(date as - string -), ''), '-', coalesce(cast(timestamp_col as - string -), ''), '-', coalesce(cast(HKD_special___characters as - string -), ''), '-', coalesce(cast(NZD as - string -), ''), '-', coalesce(cast(USD as - string -), '')) as - string -))) as _airbyte_dedup_exchange_rate_hashid, - tmp.* -from __dbt__cte__dedup_exchange_rate_ab2 tmp --- dedup_exchange_rate -where 1 = 1 -; - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/dbt_project.yml deleted file mode 100755 index 4028a91611828..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/dbt_project.yml +++ /dev/null @@ -1,90 +0,0 @@ -name: airbyte_utils -version: '1.0' -config-version: 2 -profile: normalize -model-paths: -- models -docs-paths: -- docs -analysis-paths: -- analysis -test-paths: -- tests -seed-paths: -- data -macro-paths: -- macros -target-path: ../build -log-path: ../logs -packages-install-path: /dbt -clean-targets: -- build -- dbt_modules -quoting: - database: true - schema: true - identifier: true -models: - airbyte_utils: - +materialized: table - generated: - airbyte_ctes: - +tags: airbyte_internal_cte - +materialized: view - airbyte_incremental: - +tags: incremental_tables - +materialized: incremental - +on_schema_change: ignore - airbyte_tables: - +tags: normalized_tables - +materialized: table - airbyte_views: - +tags: airbyte_internal_views - +materialized: view -dispatch: -- macro_namespace: dbt_utils - search_order: - - airbyte_utils - - dbt_utils -vars: - json_column: _airbyte_data - models_to_source: - exchange_rate_ab1: test_normalization._airbyte_raw_exchange_rate - exchange_rate_ab2: test_normalization._airbyte_raw_exchange_rate - exchange_rate_ab3: test_normalization._airbyte_raw_exchange_rate - exchange_rate: test_normalization._airbyte_raw_exchange_rate - dedup_exchange_rate_ab1: test_normalization._airbyte_raw_dedup_exchange_rate - dedup_exchange_rate_ab2: test_normalization._airbyte_raw_dedup_exchange_rate - dedup_exchange_rate_stg: test_normalization._airbyte_raw_dedup_exchange_rate - dedup_exchange_rate_scd: test_normalization._airbyte_raw_dedup_exchange_rate - dedup_exchange_rate: test_normalization._airbyte_raw_dedup_exchange_rate - renamed_dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded - renamed_dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded - renamed_dedup_cdc_excluded_stg: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded - renamed_dedup_cdc_excluded_scd: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded - renamed_dedup_cdc_excluded: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded - dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_dedup_cdc_excluded - dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_dedup_cdc_excluded - dedup_cdc_excluded_stg: test_normalization._airbyte_raw_dedup_cdc_excluded - dedup_cdc_excluded_scd: test_normalization._airbyte_raw_dedup_cdc_excluded - dedup_cdc_excluded: test_normalization._airbyte_raw_dedup_cdc_excluded - pos_dedup_cdcx_ab1: test_normalization._airbyte_raw_pos_dedup_cdcx - pos_dedup_cdcx_ab2: test_normalization._airbyte_raw_pos_dedup_cdcx - pos_dedup_cdcx_stg: test_normalization._airbyte_raw_pos_dedup_cdcx - pos_dedup_cdcx_scd: test_normalization._airbyte_raw_pos_dedup_cdcx - pos_dedup_cdcx: test_normalization._airbyte_raw_pos_dedup_cdcx - 1_prefix_startwith_number_ab1: test_normalization._airbyte_raw_1_prefix_startwith_number - 1_prefix_startwith_number_ab2: test_normalization._airbyte_raw_1_prefix_startwith_number - 1_prefix_startwith_number_stg: test_normalization._airbyte_raw_1_prefix_startwith_number - 1_prefix_startwith_number_scd: test_normalization._airbyte_raw_1_prefix_startwith_number - 1_prefix_startwith_number: test_normalization._airbyte_raw_1_prefix_startwith_number - multiple_column_names_conflicts_ab1: test_normalization._airbyte_raw_multiple_column_names_conflicts - multiple_column_names_conflicts_ab2: test_normalization._airbyte_raw_multiple_column_names_conflicts - multiple_column_names_conflicts_stg: test_normalization._airbyte_raw_multiple_column_names_conflicts - multiple_column_names_conflicts_scd: test_normalization._airbyte_raw_multiple_column_names_conflicts - multiple_column_names_conflicts: test_normalization._airbyte_raw_multiple_column_names_conflicts - types_testing_ab1: test_normalization._airbyte_raw_types_testing - types_testing_ab2: test_normalization._airbyte_raw_types_testing - types_testing_stg: test_normalization._airbyte_raw_types_testing - types_testing_scd: test_normalization._airbyte_raw_types_testing - types_testing: test_normalization._airbyte_raw_types_testing diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql deleted file mode 100644 index 2609c12f32d36..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql +++ /dev/null @@ -1,25 +0,0 @@ - - - create view _airbyte_test_normalization.dedup_exchange_rate_ab1__dbt_tmp - - as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: test_normalization._airbyte_raw_dedup_exchange_rate -select - JSONExtractRaw(assumeNotNull(_airbyte_data), 'id') as id, - JSONExtractRaw(assumeNotNull(_airbyte_data), 'currency') as currency, - JSONExtractRaw(assumeNotNull(_airbyte_data), 'date') as date, - JSONExtractRaw(assumeNotNull(_airbyte_data), 'timestamp_col') as timestamp_col, - JSONExtractRaw(assumeNotNull(_airbyte_data), 'HKD@spéçiäl & characters') as "HKD@spéçiäl & characters", - JSONExtractRaw(assumeNotNull(_airbyte_data), 'HKD_special___characters') as HKD_special___characters, - JSONExtractRaw(assumeNotNull(_airbyte_data), 'NZD') as NZD, - JSONExtractRaw(assumeNotNull(_airbyte_data), 'USD') as USD, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from test_normalization._airbyte_raw_dedup_exchange_rate as table_alias --- dedup_exchange_rate -where 1 = 1 - - ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql deleted file mode 100644 index 07778080d6faa..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql +++ /dev/null @@ -1,33 +0,0 @@ - - - create view _airbyte_test_normalization.dedup_exchange_rate_ab2__dbt_tmp - - as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: _airbyte_test_normalization.dedup_exchange_rate_ab1 -select - accurateCastOrNull(id, ' - BIGINT -') as id, - nullif(accurateCastOrNull(trim(BOTH '"' from currency), 'String'), 'null') as currency, - toDate(parseDateTimeBestEffortOrNull(trim(BOTH '"' from nullif(date, '')))) as date, - parseDateTime64BestEffortOrNull(trim(BOTH '"' from nullif(timestamp_col, ''))) as timestamp_col, - accurateCastOrNull("HKD@spéçiäl & characters", ' - Float64 -') as "HKD@spéçiäl & characters", - nullif(accurateCastOrNull(trim(BOTH '"' from HKD_special___characters), 'String'), 'null') as HKD_special___characters, - accurateCastOrNull(NZD, ' - Float64 -') as NZD, - accurateCastOrNull(USD, ' - Float64 -') as USD, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from _airbyte_test_normalization.dedup_exchange_rate_ab1 --- dedup_exchange_rate -where 1 = 1 - - ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql deleted file mode 100644 index 7dac7b7d793f6..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql +++ /dev/null @@ -1,104 +0,0 @@ - - - - - insert into test_normalization.dedup_cdc_excluded_scd ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_cdc_excluded_hashid") - --- depends_on: ref('dedup_cdc_excluded_stg') -with - -input_data as ( - select * - from _airbyte_test_normalization.dedup_cdc_excluded_stg - -- dedup_cdc_excluded from test_normalization._airbyte_raw_dedup_cdc_excluded -), - -input_data_with_active_row_num as ( - select *, - row_number() over ( - partition by id - order by - _ab_cdc_lsn is null asc, - _ab_cdc_lsn desc, - _ab_cdc_updated_at desc, - _airbyte_emitted_at desc - ) as _airbyte_active_row_num - from input_data -), -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - assumeNotNull(hex(MD5( - - toString(id) - - ))) as _airbyte_unique_key, - id, - name, - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _ab_cdc_lsn as _airbyte_start_at, - case when _airbyte_active_row_num = 1 and _ab_cdc_deleted_at is null then 1 else 0 end as _airbyte_active_row, - anyOrNull(_ab_cdc_lsn) over ( - partition by id - order by - _ab_cdc_lsn is null asc, - _ab_cdc_lsn desc, - _ab_cdc_updated_at desc, - _airbyte_emitted_at desc - ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) as _airbyte_end_at, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_dedup_cdc_excluded_hashid - from input_data_with_active_row_num -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by - _airbyte_unique_key, - _airbyte_start_at, - _airbyte_emitted_at, accurateCastOrNull(_ab_cdc_deleted_at, 'String'), accurateCastOrNull(_ab_cdc_updated_at, 'String') - order by _airbyte_active_row desc, _airbyte_ab_id - ) as _airbyte_row_num, - assumeNotNull(hex(MD5( - - toString(_airbyte_unique_key) || '~' || - - - toString(_airbyte_start_at) || '~' || - - - toString(_airbyte_emitted_at) || '~' || - - - toString(_ab_cdc_deleted_at) || '~' || - - - toString(_ab_cdc_updated_at) - - ))) as _airbyte_unique_key_scd, - scd_data.* - from scd_data -) -select - _airbyte_unique_key, - _airbyte_unique_key_scd, - id, - name, - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _airbyte_start_at, - _airbyte_end_at, - _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_dedup_cdc_excluded_hashid -from dedup_data where _airbyte_row_num = 1 - - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql deleted file mode 100644 index a3527b053dc31..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ /dev/null @@ -1,108 +0,0 @@ - - - - - insert into test_normalization.dedup_exchange_rate_scd ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "HKD_special___characters", "NZD", "USD", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid") - --- depends_on: ref('dedup_exchange_rate_stg') -with - -input_data as ( - select * - from _airbyte_test_normalization.dedup_exchange_rate_stg - -- dedup_exchange_rate from test_normalization._airbyte_raw_dedup_exchange_rate -), - -input_data_with_active_row_num as ( - select *, - row_number() over ( - partition by id, currency, cast(NZD as String) - order by - date is null asc, - date desc, - _airbyte_emitted_at desc - ) as _airbyte_active_row_num - from input_data -), -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - assumeNotNull(hex(MD5( - - toString(id) || '~' || - - - toString(currency) || '~' || - - - toString(NZD) - - ))) as _airbyte_unique_key, - id, - currency, - date, - timestamp_col, - "HKD@spéçiäl & characters", - HKD_special___characters, - NZD, - USD, - date as _airbyte_start_at, - case when _airbyte_active_row_num = 1 then 1 else 0 end as _airbyte_active_row, - anyOrNull(date) over ( - partition by id, currency, cast(NZD as String) - order by - date is null asc, - date desc, - _airbyte_emitted_at desc - ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) as _airbyte_end_at, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_dedup_exchange_rate_hashid - from input_data_with_active_row_num -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by - _airbyte_unique_key, - _airbyte_start_at, - _airbyte_emitted_at - order by _airbyte_active_row desc, _airbyte_ab_id - ) as _airbyte_row_num, - assumeNotNull(hex(MD5( - - toString(_airbyte_unique_key) || '~' || - - - toString(_airbyte_start_at) || '~' || - - - toString(_airbyte_emitted_at) - - ))) as _airbyte_unique_key_scd, - scd_data.* - from scd_data -) -select - _airbyte_unique_key, - _airbyte_unique_key_scd, - id, - currency, - date, - timestamp_col, - "HKD@spéçiäl & characters", - HKD_special___characters, - NZD, - USD, - _airbyte_start_at, - _airbyte_end_at, - _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_dedup_exchange_rate_hashid -from dedup_data where _airbyte_row_num = 1 - - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql deleted file mode 100644 index cf48610f8b82c..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql +++ /dev/null @@ -1,90 +0,0 @@ - - - - - insert into test_normalization.renamed_dedup_cdc_excluded_scd ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "_ab_cdc_updated_at", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid") - --- depends_on: ref('renamed_dedup_cdc_excluded_stg') -with - -input_data as ( - select * - from _airbyte_test_normalization.renamed_dedup_cdc_excluded_stg - -- renamed_dedup_cdc_excluded from test_normalization._airbyte_raw_renamed_dedup_cdc_excluded -), - -input_data_with_active_row_num as ( - select *, - row_number() over ( - partition by id - order by - _ab_cdc_updated_at is null asc, - _ab_cdc_updated_at desc, - _airbyte_emitted_at desc - ) as _airbyte_active_row_num - from input_data -), -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - assumeNotNull(hex(MD5( - - toString(id) - - ))) as _airbyte_unique_key, - id, - _ab_cdc_updated_at, - _ab_cdc_updated_at as _airbyte_start_at, - case when _airbyte_active_row_num = 1 then 1 else 0 end as _airbyte_active_row, - anyOrNull(_ab_cdc_updated_at) over ( - partition by id - order by - _ab_cdc_updated_at is null asc, - _ab_cdc_updated_at desc, - _airbyte_emitted_at desc - ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) as _airbyte_end_at, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_renamed_dedup_cdc_excluded_hashid - from input_data_with_active_row_num -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by - _airbyte_unique_key, - _airbyte_start_at, - _airbyte_emitted_at - order by _airbyte_active_row desc, _airbyte_ab_id - ) as _airbyte_row_num, - assumeNotNull(hex(MD5( - - toString(_airbyte_unique_key) || '~' || - - - toString(_airbyte_start_at) || '~' || - - - toString(_airbyte_emitted_at) - - ))) as _airbyte_unique_key_scd, - scd_data.* - from scd_data -) -select - _airbyte_unique_key, - _airbyte_unique_key_scd, - id, - _ab_cdc_updated_at, - _airbyte_start_at, - _airbyte_end_at, - _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_renamed_dedup_cdc_excluded_hashid -from dedup_data where _airbyte_row_num = 1 - - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql deleted file mode 100644 index 11d81fef34b9b..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql +++ /dev/null @@ -1,29 +0,0 @@ - - - - - insert into test_normalization.dedup_exchange_rate ("_airbyte_unique_key", "id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "HKD_special___characters", "NZD", "USD", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid") - --- Final base SQL model --- depends_on: test_normalization.dedup_exchange_rate_scd -select - _airbyte_unique_key, - id, - currency, - date, - timestamp_col, - "HKD@spéçiäl & characters", - HKD_special___characters, - NZD, - USD, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_dedup_exchange_rate_hashid -from test_normalization.dedup_exchange_rate_scd --- dedup_exchange_rate from test_normalization._airbyte_raw_dedup_exchange_rate -where 1 = 1 -and _airbyte_active_row = 1 - - - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql deleted file mode 100644 index b237171bc7fe8..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql +++ /dev/null @@ -1,23 +0,0 @@ - - - - - insert into test_normalization.renamed_dedup_cdc_excluded ("_airbyte_unique_key", "id", "_ab_cdc_updated_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid") - --- Final base SQL model --- depends_on: test_normalization.renamed_dedup_cdc_excluded_scd -select - _airbyte_unique_key, - id, - _ab_cdc_updated_at, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_renamed_dedup_cdc_excluded_hashid -from test_normalization.renamed_dedup_cdc_excluded_scd --- renamed_dedup_cdc_excluded from test_normalization._airbyte_raw_renamed_dedup_cdc_excluded -where 1 = 1 -and _airbyte_active_row = 1 - - - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql deleted file mode 100644 index c2be71e63fc94..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql +++ /dev/null @@ -1,29 +0,0 @@ - - - - insert into test_normalization.exchange_rate__dbt_tmp ("id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "HKD_special___characters", "NZD", "USD", "column___with__quotes", "datetime_tz", "datetime_no_tz", "time_tz", "time_no_tz", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_exchange_rate_hashid") - --- Final base SQL model --- depends_on: _airbyte_test_normalization.exchange_rate_ab3 -select - id, - currency, - date, - timestamp_col, - "HKD@spéçiäl & characters", - HKD_special___characters, - NZD, - USD, - "column___with__quotes", - datetime_tz, - datetime_no_tz, - time_tz, - time_no_tz, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_exchange_rate_hashid -from _airbyte_test_normalization.exchange_rate_ab3 --- exchange_rate from test_normalization._airbyte_raw_exchange_rate -where 1 = 1 - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql deleted file mode 100644 index 9a932053975b7..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql +++ /dev/null @@ -1,41 +0,0 @@ - - - create view _airbyte_test_normalization.dedup_exchange_rate_stg__dbt_tmp - - as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: _airbyte_test_normalization.dedup_exchange_rate_ab2 -select - assumeNotNull(hex(MD5( - - toString(id) || '~' || - - - toString(currency) || '~' || - - - toString(date) || '~' || - - - toString(timestamp_col) || '~' || - - - toString("HKD@spéçiäl & characters") || '~' || - - - toString(HKD_special___characters) || '~' || - - - toString(NZD) || '~' || - - - toString(USD) - - ))) as _airbyte_dedup_exchange_rate_hashid, - tmp.* -from _airbyte_test_normalization.dedup_exchange_rate_ab2 tmp --- dedup_exchange_rate -where 1 = 1 - - ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql deleted file mode 100644 index 5f10629995793..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql +++ /dev/null @@ -1,38 +0,0 @@ - - - create view _airbyte_test_normalization.multiple_column_names_conflicts_stg__dbt_tmp - - as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: _airbyte_test_normalization.multiple_column_names_conflicts_ab2 -select - assumeNotNull(hex(MD5( - - toString(id) || '~' || - - - toString("User Id") || '~' || - - - toString(user_id) || '~' || - - - toString("User id") || '~' || - - - toString("user id") || '~' || - - - toString("User@Id") || '~' || - - - toString(UserId) - - ))) as _airbyte_multiple_co__ames_conflicts_hashid, - tmp.* -from _airbyte_test_normalization.multiple_column_names_conflicts_ab2 tmp --- multiple_column_names_conflicts -where 1 = 1 - - ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql deleted file mode 100644 index b0c2c4aa7fa33..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql +++ /dev/null @@ -1,24 +0,0 @@ -{{ config( - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} -select - {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as id, - {{ json_extract_scalar('_airbyte_data', ['currency'], ['currency']) }} as currency, - {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as date, - {{ json_extract_scalar('_airbyte_data', ['timestamp_col'], ['timestamp_col']) }} as timestamp_col, - {{ json_extract_scalar('_airbyte_data', ['HKD@spéçiäl & characters'], ['HKD@spéçiäl & characters']) }} as {{ quote('HKD@spéçiäl & characters') }}, - {{ json_extract_scalar('_airbyte_data', ['HKD_special___characters'], ['HKD_special___characters']) }} as HKD_special___characters, - {{ json_extract_scalar('_airbyte_data', ['NZD'], ['NZD']) }} as NZD, - {{ json_extract_scalar('_airbyte_data', ['USD'], ['USD']) }} as USD, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} as table_alias --- dedup_exchange_rate -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql deleted file mode 100644 index 22f82153a5cd8..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql +++ /dev/null @@ -1,24 +0,0 @@ -{{ config( - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: {{ ref('dedup_exchange_rate_ab1') }} -select - accurateCastOrNull(id, '{{ dbt_utils.type_bigint() }}') as id, - nullif(accurateCastOrNull(trim(BOTH '"' from currency), '{{ dbt_utils.type_string() }}'), 'null') as currency, - toDate(parseDateTimeBestEffortOrNull(trim(BOTH '"' from {{ empty_string_to_null('date') }}))) as date, - parseDateTime64BestEffortOrNull(trim(BOTH '"' from {{ empty_string_to_null('timestamp_col') }})) as timestamp_col, - accurateCastOrNull({{ quote('HKD@spéçiäl & characters') }}, '{{ dbt_utils.type_float() }}') as {{ quote('HKD@spéçiäl & characters') }}, - nullif(accurateCastOrNull(trim(BOTH '"' from HKD_special___characters), '{{ dbt_utils.type_string() }}'), 'null') as HKD_special___characters, - accurateCastOrNull(NZD, '{{ dbt_utils.type_float() }}') as NZD, - accurateCastOrNull(USD, '{{ dbt_utils.type_float() }}') as USD, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('dedup_exchange_rate_ab1') }} --- dedup_exchange_rate -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql deleted file mode 100644 index 5d3e0d7f6abf0..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql +++ /dev/null @@ -1,18 +0,0 @@ -{{ config( - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} -select - {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as id, - {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_updated_at'], ['_ab_cdc_updated_at']) }} as _ab_cdc_updated_at, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} as table_alias --- renamed_dedup_cdc_excluded -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql deleted file mode 100644 index c6885e98962eb..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql +++ /dev/null @@ -1,18 +0,0 @@ -{{ config( - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: {{ ref('renamed_dedup_cdc_excluded_ab1') }} -select - accurateCastOrNull(id, '{{ dbt_utils.type_bigint() }}') as id, - accurateCastOrNull(_ab_cdc_updated_at, '{{ dbt_utils.type_float() }}') as _ab_cdc_updated_at, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('renamed_dedup_cdc_excluded_ab1') }} --- renamed_dedup_cdc_excluded -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql deleted file mode 100644 index 1570a1b5fddf3..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql +++ /dev/null @@ -1,173 +0,0 @@ -{{ config( - unique_key = "_airbyte_unique_key_scd", - schema = "test_normalization", - post_hook = [" - {% - set final_table_relation = adapter.get_relation( - database=this.database, - schema=this.schema, - identifier='dedup_cdc_excluded' - ) - %} - {# - If the final table doesn't exist, then obviously we can't delete anything from it. - Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) - So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) - #} - {% - if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') - %} - -- Delete records which are no longer active: - -- This query is equivalent, but the left join version is more performant: - -- delete from final_table where unique_key in ( - -- select unique_key from scd_table where 1 = 1 - -- ) and unique_key not in ( - -- select unique_key from scd_table where active_row = 1 - -- ) - -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD - -- entries that were _updated_ recently. This is because a deleted record will have an SCD record - -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. - alter table {{ final_table_relation }} delete where _airbyte_unique_key in ( - select recent_records.unique_key - from ( - select distinct _airbyte_unique_key as unique_key - from {{ this }} - where 1=1 {{ incremental_clause('_airbyte_normalized_at', quote(this.schema) + '.' + quote('dedup_cdc_excluded')) }} - ) recent_records - left join ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', quote(this.schema) + '.' + quote('dedup_cdc_excluded')) }} - group by _airbyte_unique_key - ) active_counts - on recent_records.unique_key = active_counts.unique_key - where active_count is null or active_count = 0 - ) - {% else %} - -- We have to have a non-empty query, so just do a noop delete - alter table {{ this }} delete where 1=0 - {% endif %} - ","drop view _airbyte_test_normalization.dedup_cdc_excluded_stg"], - tags = [ "top-level" ] -) }} --- depends_on: ref('dedup_cdc_excluded_stg') -with -{% if is_incremental() %} -new_data as ( - -- retrieve incremental "new" data - select - * - from {{ ref('dedup_cdc_excluded_stg') }} - -- dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} - where 1 = 1 - {{ incremental_clause('_airbyte_emitted_at', this) }} -), -new_data_ids as ( - -- build a subset of _airbyte_unique_key from rows that are new - select distinct - {{ dbt_utils.surrogate_key([ - 'id', - ]) }} as _airbyte_unique_key - from new_data -), -empty_new_data as ( - -- build an empty table to only keep the table's column types - select * from new_data where 1 = 0 -), -previous_active_scd_data as ( - -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes - select - {{ star_intersect(ref('dedup_cdc_excluded_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} - from {{ this }} as this_data - -- make a join with new_data using primary key to filter active data that need to be updated only - join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key - -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) - --left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id - where _airbyte_active_row = 1 -), -input_data as ( - select {{ dbt_utils.star(ref('dedup_cdc_excluded_stg')) }} from new_data - union all - select {{ dbt_utils.star(ref('dedup_cdc_excluded_stg')) }} from previous_active_scd_data -), -{% else %} -input_data as ( - select * - from {{ ref('dedup_cdc_excluded_stg') }} - -- dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} -), -{% endif %} -input_data_with_active_row_num as ( - select *, - row_number() over ( - partition by id - order by - _ab_cdc_lsn is null asc, - _ab_cdc_lsn desc, - _ab_cdc_updated_at desc, - _airbyte_emitted_at desc - ) as _airbyte_active_row_num - from input_data -), -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - {{ dbt_utils.surrogate_key([ - 'id', - ]) }} as _airbyte_unique_key, - id, - name, - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _ab_cdc_lsn as _airbyte_start_at, - case when _airbyte_active_row_num = 1 and _ab_cdc_deleted_at is null then 1 else 0 end as _airbyte_active_row, - anyOrNull(_ab_cdc_lsn) over ( - partition by id - order by - _ab_cdc_lsn is null asc, - _ab_cdc_lsn desc, - _ab_cdc_updated_at desc, - _airbyte_emitted_at desc - ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) as _airbyte_end_at, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_dedup_cdc_excluded_hashid - from input_data_with_active_row_num -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by - _airbyte_unique_key, - _airbyte_start_at, - _airbyte_emitted_at, accurateCastOrNull(_ab_cdc_deleted_at, '{{ dbt_utils.type_string() }}'), accurateCastOrNull(_ab_cdc_updated_at, '{{ dbt_utils.type_string() }}') - order by _airbyte_active_row desc, _airbyte_ab_id - ) as _airbyte_row_num, - {{ dbt_utils.surrogate_key([ - '_airbyte_unique_key', - '_airbyte_start_at', - '_airbyte_emitted_at', '_ab_cdc_deleted_at', '_ab_cdc_updated_at' - ]) }} as _airbyte_unique_key_scd, - scd_data.* - from scd_data -) -select - _airbyte_unique_key, - _airbyte_unique_key_scd, - id, - name, - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _airbyte_start_at, - _airbyte_end_at, - _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_dedup_cdc_excluded_hashid -from dedup_data where _airbyte_row_num = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql deleted file mode 100644 index e29cf7f7906c9..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ /dev/null @@ -1,181 +0,0 @@ -{{ config( - unique_key = "_airbyte_unique_key_scd", - schema = "test_normalization", - post_hook = [" - {% - set final_table_relation = adapter.get_relation( - database=this.database, - schema=this.schema, - identifier='dedup_exchange_rate' - ) - %} - {# - If the final table doesn't exist, then obviously we can't delete anything from it. - Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) - So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) - #} - {% - if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') - %} - -- Delete records which are no longer active: - -- This query is equivalent, but the left join version is more performant: - -- delete from final_table where unique_key in ( - -- select unique_key from scd_table where 1 = 1 - -- ) and unique_key not in ( - -- select unique_key from scd_table where active_row = 1 - -- ) - -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD - -- entries that were _updated_ recently. This is because a deleted record will have an SCD record - -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. - alter table {{ final_table_relation }} delete where _airbyte_unique_key in ( - select recent_records.unique_key - from ( - select distinct _airbyte_unique_key as unique_key - from {{ this }} - where 1=1 {{ incremental_clause('_airbyte_normalized_at', quote(this.schema) + '.' + quote('dedup_exchange_rate')) }} - ) recent_records - left join ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', quote(this.schema) + '.' + quote('dedup_exchange_rate')) }} - group by _airbyte_unique_key - ) active_counts - on recent_records.unique_key = active_counts.unique_key - where active_count is null or active_count = 0 - ) - {% else %} - -- We have to have a non-empty query, so just do a noop delete - alter table {{ this }} delete where 1=0 - {% endif %} - ","drop view _airbyte_test_normalization.dedup_exchange_rate_stg"], - tags = [ "top-level" ] -) }} --- depends_on: ref('dedup_exchange_rate_stg') -with -{% if is_incremental() %} -new_data as ( - -- retrieve incremental "new" data - select - * - from {{ ref('dedup_exchange_rate_stg') }} - -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} - where 1 = 1 - {{ incremental_clause('_airbyte_emitted_at', this) }} -), -new_data_ids as ( - -- build a subset of _airbyte_unique_key from rows that are new - select distinct - {{ dbt_utils.surrogate_key([ - 'id', - 'currency', - 'NZD', - ]) }} as _airbyte_unique_key - from new_data -), -empty_new_data as ( - -- build an empty table to only keep the table's column types - select * from new_data where 1 = 0 -), -previous_active_scd_data as ( - -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes - select - {{ star_intersect(ref('dedup_exchange_rate_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} - from {{ this }} as this_data - -- make a join with new_data using primary key to filter active data that need to be updated only - join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key - -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) - --left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id - where _airbyte_active_row = 1 -), -input_data as ( - select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from new_data - union all - select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from previous_active_scd_data -), -{% else %} -input_data as ( - select * - from {{ ref('dedup_exchange_rate_stg') }} - -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} -), -{% endif %} -input_data_with_active_row_num as ( - select *, - row_number() over ( - partition by id, currency, cast(NZD as {{ dbt_utils.type_string() }}) - order by - date is null asc, - date desc, - _airbyte_emitted_at desc - ) as _airbyte_active_row_num - from input_data -), -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - {{ dbt_utils.surrogate_key([ - 'id', - 'currency', - 'NZD', - ]) }} as _airbyte_unique_key, - id, - currency, - date, - timestamp_col, - {{ quote('HKD@spéçiäl & characters') }}, - HKD_special___characters, - NZD, - USD, - date as _airbyte_start_at, - case when _airbyte_active_row_num = 1 then 1 else 0 end as _airbyte_active_row, - anyOrNull(date) over ( - partition by id, currency, cast(NZD as {{ dbt_utils.type_string() }}) - order by - date is null asc, - date desc, - _airbyte_emitted_at desc - ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) as _airbyte_end_at, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_dedup_exchange_rate_hashid - from input_data_with_active_row_num -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by - _airbyte_unique_key, - _airbyte_start_at, - _airbyte_emitted_at - order by _airbyte_active_row desc, _airbyte_ab_id - ) as _airbyte_row_num, - {{ dbt_utils.surrogate_key([ - '_airbyte_unique_key', - '_airbyte_start_at', - '_airbyte_emitted_at' - ]) }} as _airbyte_unique_key_scd, - scd_data.* - from scd_data -) -select - _airbyte_unique_key, - _airbyte_unique_key_scd, - id, - currency, - date, - timestamp_col, - {{ quote('HKD@spéçiäl & characters') }}, - HKD_special___characters, - NZD, - USD, - _airbyte_start_at, - _airbyte_end_at, - _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_dedup_exchange_rate_hashid -from dedup_data where _airbyte_row_num = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql deleted file mode 100644 index 5b8ff875d3a3b..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql +++ /dev/null @@ -1,27 +0,0 @@ -{{ config( - unique_key = "_airbyte_unique_key", - schema = "test_normalization", - tags = [ "top-level" ] -) }} --- Final base SQL model --- depends_on: {{ ref('dedup_exchange_rate_scd') }} -select - _airbyte_unique_key, - id, - currency, - date, - timestamp_col, - {{ quote('HKD@spéçiäl & characters') }}, - HKD_special___characters, - NZD, - USD, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_dedup_exchange_rate_hashid -from {{ ref('dedup_exchange_rate_scd') }} --- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} -where 1 = 1 -and _airbyte_active_row = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql deleted file mode 100644 index 4051dd3178c94..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql +++ /dev/null @@ -1,21 +0,0 @@ -{{ config( - unique_key = "_airbyte_unique_key", - schema = "test_normalization", - tags = [ "top-level" ] -) }} --- Final base SQL model --- depends_on: {{ ref('renamed_dedup_cdc_excluded_scd') }} -select - _airbyte_unique_key, - id, - _ab_cdc_updated_at, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_renamed_dedup_cdc_excluded_hashid -from {{ ref('renamed_dedup_cdc_excluded_scd') }} --- renamed_dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} -where 1 = 1 -and _airbyte_active_row = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql deleted file mode 100644 index c66443b3a1501..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql +++ /dev/null @@ -1,29 +0,0 @@ -{{ config( - unique_key = '_airbyte_ab_id', - schema = "test_normalization", - tags = [ "top-level" ] -) }} --- Final base SQL model --- depends_on: {{ ref('exchange_rate_ab3') }} -select - id, - currency, - date, - timestamp_col, - {{ quote('HKD@spéçiäl & characters') }}, - HKD_special___characters, - NZD, - USD, - {{ quote('column___with__quotes') }}, - datetime_tz, - datetime_no_tz, - time_tz, - time_no_tz, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_exchange_rate_hashid -from {{ ref('exchange_rate_ab3') }} --- exchange_rate from {{ source('test_normalization', '_airbyte_raw_exchange_rate') }} -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql deleted file mode 100644 index beb710676cb02..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql +++ /dev/null @@ -1,24 +0,0 @@ -{{ config( - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to build a hash column based on the values of this record --- depends_on: {{ ref('dedup_exchange_rate_ab2') }} -select - {{ dbt_utils.surrogate_key([ - 'id', - 'currency', - 'date', - 'timestamp_col', - quote('HKD@spéçiäl & characters'), - 'HKD_special___characters', - 'NZD', - 'USD', - ]) }} as _airbyte_dedup_exchange_rate_hashid, - tmp.* -from {{ ref('dedup_exchange_rate_ab2') }} tmp --- dedup_exchange_rate -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/sources.yml deleted file mode 100644 index f51802427655e..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/sources.yml +++ /dev/null @@ -1,16 +0,0 @@ -version: 2 -sources: -- name: test_normalization - quoting: - database: true - schema: false - identifier: false - tables: - - name: _airbyte_raw_1_prefix_startwith_number - - name: _airbyte_raw_dedup_cdc_excluded - - name: _airbyte_raw_dedup_exchange_rate - - name: _airbyte_raw_exchange_rate - - name: _airbyte_raw_multiple_column_names_conflicts - - name: _airbyte_raw_pos_dedup_cdcx - - name: _airbyte_raw_renamed_dedup_cdc_excluded - - name: _airbyte_raw_types_testing diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql deleted file mode 100644 index 2609c12f32d36..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql +++ /dev/null @@ -1,25 +0,0 @@ - - - create view _airbyte_test_normalization.dedup_exchange_rate_ab1__dbt_tmp - - as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: test_normalization._airbyte_raw_dedup_exchange_rate -select - JSONExtractRaw(assumeNotNull(_airbyte_data), 'id') as id, - JSONExtractRaw(assumeNotNull(_airbyte_data), 'currency') as currency, - JSONExtractRaw(assumeNotNull(_airbyte_data), 'date') as date, - JSONExtractRaw(assumeNotNull(_airbyte_data), 'timestamp_col') as timestamp_col, - JSONExtractRaw(assumeNotNull(_airbyte_data), 'HKD@spéçiäl & characters') as "HKD@spéçiäl & characters", - JSONExtractRaw(assumeNotNull(_airbyte_data), 'HKD_special___characters') as HKD_special___characters, - JSONExtractRaw(assumeNotNull(_airbyte_data), 'NZD') as NZD, - JSONExtractRaw(assumeNotNull(_airbyte_data), 'USD') as USD, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from test_normalization._airbyte_raw_dedup_exchange_rate as table_alias --- dedup_exchange_rate -where 1 = 1 - - ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql deleted file mode 100644 index 07778080d6faa..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql +++ /dev/null @@ -1,33 +0,0 @@ - - - create view _airbyte_test_normalization.dedup_exchange_rate_ab2__dbt_tmp - - as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: _airbyte_test_normalization.dedup_exchange_rate_ab1 -select - accurateCastOrNull(id, ' - BIGINT -') as id, - nullif(accurateCastOrNull(trim(BOTH '"' from currency), 'String'), 'null') as currency, - toDate(parseDateTimeBestEffortOrNull(trim(BOTH '"' from nullif(date, '')))) as date, - parseDateTime64BestEffortOrNull(trim(BOTH '"' from nullif(timestamp_col, ''))) as timestamp_col, - accurateCastOrNull("HKD@spéçiäl & characters", ' - Float64 -') as "HKD@spéçiäl & characters", - nullif(accurateCastOrNull(trim(BOTH '"' from HKD_special___characters), 'String'), 'null') as HKD_special___characters, - accurateCastOrNull(NZD, ' - Float64 -') as NZD, - accurateCastOrNull(USD, ' - Float64 -') as USD, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from _airbyte_test_normalization.dedup_exchange_rate_ab1 --- dedup_exchange_rate -where 1 = 1 - - ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql deleted file mode 100644 index a793d7412e483..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ /dev/null @@ -1,6 +0,0 @@ - - insert into test_normalization.dedup_exchange_rate_scd ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "HKD_special___characters", "NZD", "USD", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid") - select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "HKD_special___characters", "NZD", "USD", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid" - from dedup_exchange_rate_scd__dbt_tmp - - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql deleted file mode 100644 index 8f84c4f3c1620..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql +++ /dev/null @@ -1,6 +0,0 @@ - - insert into test_normalization.renamed_dedup_cdc_excluded_scd ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "_ab_cdc_updated_at", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid") - select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "_ab_cdc_updated_at", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid" - from renamed_dedup_cdc_excluded_scd__dbt_tmp - - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql deleted file mode 100644 index 4a895d6cf480a..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql +++ /dev/null @@ -1,6 +0,0 @@ - - insert into test_normalization.dedup_exchange_rate ("_airbyte_unique_key", "id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "HKD_special___characters", "NZD", "USD", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid") - select "_airbyte_unique_key", "id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "HKD_special___characters", "NZD", "USD", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid" - from dedup_exchange_rate__dbt_tmp - - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql deleted file mode 100644 index 1b96d3f87152e..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql +++ /dev/null @@ -1,6 +0,0 @@ - - insert into test_normalization.renamed_dedup_cdc_excluded ("_airbyte_unique_key", "id", "_ab_cdc_updated_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid") - select "_airbyte_unique_key", "id", "_ab_cdc_updated_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid" - from renamed_dedup_cdc_excluded__dbt_tmp - - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql deleted file mode 100644 index c2be71e63fc94..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql +++ /dev/null @@ -1,29 +0,0 @@ - - - - insert into test_normalization.exchange_rate__dbt_tmp ("id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "HKD_special___characters", "NZD", "USD", "column___with__quotes", "datetime_tz", "datetime_no_tz", "time_tz", "time_no_tz", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_exchange_rate_hashid") - --- Final base SQL model --- depends_on: _airbyte_test_normalization.exchange_rate_ab3 -select - id, - currency, - date, - timestamp_col, - "HKD@spéçiäl & characters", - HKD_special___characters, - NZD, - USD, - "column___with__quotes", - datetime_tz, - datetime_no_tz, - time_tz, - time_no_tz, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_exchange_rate_hashid -from _airbyte_test_normalization.exchange_rate_ab3 --- exchange_rate from test_normalization._airbyte_raw_exchange_rate -where 1 = 1 - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql deleted file mode 100644 index 9a932053975b7..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql +++ /dev/null @@ -1,41 +0,0 @@ - - - create view _airbyte_test_normalization.dedup_exchange_rate_stg__dbt_tmp - - as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: _airbyte_test_normalization.dedup_exchange_rate_ab2 -select - assumeNotNull(hex(MD5( - - toString(id) || '~' || - - - toString(currency) || '~' || - - - toString(date) || '~' || - - - toString(timestamp_col) || '~' || - - - toString("HKD@spéçiäl & characters") || '~' || - - - toString(HKD_special___characters) || '~' || - - - toString(NZD) || '~' || - - - toString(USD) - - ))) as _airbyte_dedup_exchange_rate_hashid, - tmp.* -from _airbyte_test_normalization.dedup_exchange_rate_ab2 tmp --- dedup_exchange_rate -where 1 = 1 - - ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/duckdb/test_nested_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/duckdb/test_nested_streams/dbt_project.yml deleted file mode 100755 index 7631ef356dc92..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/duckdb/test_nested_streams/dbt_project.yml +++ /dev/null @@ -1,63 +0,0 @@ -# This file is necessary to install dbt-utils with dbt deps -# the content will be overwritten by the transform function - -# Name your package! Package names should contain only lowercase characters -# and underscores. A good package name should reflect your organization's -# name or the intended use of these models -name: "airbyte_utils" -version: "1.0" -config-version: 2 - -# This setting configures which "profile" dbt uses for this project. Profiles contain -# database connection information, and should be configured in the ~/.dbt/profiles.yml file -profile: "normalize" - -# These configurations specify where dbt should look for different types of files. -# The `model-paths` config, for example, states that source models can be found -# in the "models/" directory. You probably won't need to change these! -model-paths: ["models"] -docs-paths: ["docs"] -analysis-paths: ["analysis"] -test-paths: ["tests"] -seed-paths: ["data"] -macro-paths: ["macros"] - -target-path: "../build" # directory which will store compiled SQL files -log-path: "../logs" # directory which will store DBT logs -packages-install-path: "/dbt" # directory which will store external DBT dependencies - -clean-targets: # directories to be removed by `dbt clean` - - "build" - - "dbt_modules" - -quoting: - database: true - # Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785) - # all schemas should be unquoted - schema: false - identifier: true - -# You can define configurations for models in the `model-paths` directory here. -# Using these configurations, you can enable or disable models, change how they -# are materialized, and more! -models: - airbyte_utils: - +materialized: table - generated: - airbyte_ctes: - +tags: airbyte_internal_cte - +materialized: ephemeral - airbyte_incremental: - +tags: incremental_tables - +materialized: incremental - +on_schema_change: sync_all_columns - airbyte_tables: - +tags: normalized_tables - +materialized: table - airbyte_views: - +tags: airbyte_internal_views - +materialized: view - -dispatch: - - macro_namespace: dbt_utils - search_order: ["airbyte_utils", "dbt_utils"] diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/duckdb/test_simple_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/duckdb/test_simple_streams/dbt_project.yml deleted file mode 100755 index 7631ef356dc92..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/duckdb/test_simple_streams/dbt_project.yml +++ /dev/null @@ -1,63 +0,0 @@ -# This file is necessary to install dbt-utils with dbt deps -# the content will be overwritten by the transform function - -# Name your package! Package names should contain only lowercase characters -# and underscores. A good package name should reflect your organization's -# name or the intended use of these models -name: "airbyte_utils" -version: "1.0" -config-version: 2 - -# This setting configures which "profile" dbt uses for this project. Profiles contain -# database connection information, and should be configured in the ~/.dbt/profiles.yml file -profile: "normalize" - -# These configurations specify where dbt should look for different types of files. -# The `model-paths` config, for example, states that source models can be found -# in the "models/" directory. You probably won't need to change these! -model-paths: ["models"] -docs-paths: ["docs"] -analysis-paths: ["analysis"] -test-paths: ["tests"] -seed-paths: ["data"] -macro-paths: ["macros"] - -target-path: "../build" # directory which will store compiled SQL files -log-path: "../logs" # directory which will store DBT logs -packages-install-path: "/dbt" # directory which will store external DBT dependencies - -clean-targets: # directories to be removed by `dbt clean` - - "build" - - "dbt_modules" - -quoting: - database: true - # Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785) - # all schemas should be unquoted - schema: false - identifier: true - -# You can define configurations for models in the `model-paths` directory here. -# Using these configurations, you can enable or disable models, change how they -# are materialized, and more! -models: - airbyte_utils: - +materialized: table - generated: - airbyte_ctes: - +tags: airbyte_internal_cte - +materialized: ephemeral - airbyte_incremental: - +tags: incremental_tables - +materialized: incremental - +on_schema_change: sync_all_columns - airbyte_tables: - +tags: normalized_tables - +materialized: table - airbyte_views: - +tags: airbyte_internal_views - +materialized: view - -dispatch: - - macro_namespace: dbt_utils - search_order: ["airbyte_utils", "dbt_utils"] diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/dbt_project.yml deleted file mode 100755 index 8ed082f367749..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/dbt_project.yml +++ /dev/null @@ -1,61 +0,0 @@ -# This file is necessary to install dbt-utils with dbt deps -# the content will be overwritten by the transform function - -# Name your package! Package names should contain only lowercase characters -# and underscores. A good package name should reflect your organization's -# name or the intended use of these models -name: "airbyte_utils" -version: "1.0" -config-version: 2 - -# This setting configures which "profile" dbt uses for this project. Profiles contain -# database connection information, and should be configured in the ~/.dbt/profiles.yml file -profile: "normalize" - -# These configurations specify where dbt should look for different types of files. -# The `model-paths` config, for example, states that source models can be found -# in the "models/" directory. You probably won't need to change these! -model-paths: ["models"] -docs-paths: ["docs"] -analysis-paths: ["analysis"] -test-paths: ["tests"] -seed-paths: ["data"] -macro-paths: ["macros"] - -target-path: "../build" # directory which will store compiled SQL files -log-path: "../logs" # directory which will store DBT logs -packages-install-path: "/dbt" # directory which will store external DBT dependencies - -clean-targets: # directories to be removed by `dbt clean` - - "build" - - "dbt_modules" - -quoting: - database: true - # Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785) - # all schemas should be unquoted - schema: false - identifier: true - -# You can define configurations for models in the `model-paths` directory here. -# Using these configurations, you can enable or disable models, change how they -# are materialized, and more! -models: - airbyte_utils: - +materialized: table - generated: - airbyte_ctes: - +tags: airbyte_internal_cte - +materialized: ephemeral - airbyte_incremental: - +tags: incremental_tables - +materialized: incremental - airbyte_tables: - +tags: normalized_tables - +materialized: table - airbyte_views: - +tags: airbyte_internal_views - +materialized: view - -vars: - dbt_utils_dispatch_list: ["airbyte_utils"] diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co__lting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co__lting_into_long_names_scd.sql deleted file mode 100644 index 3ea4e25cfc959..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co__lting_into_long_names_scd.sql +++ /dev/null @@ -1,127 +0,0 @@ - - - - USE [test_normalization]; - if object_id ('test_normalization."nested_stream_with_co__lting_into_long_names_scd_temp_view"','V') is not null - begin - drop view test_normalization."nested_stream_with_co__lting_into_long_names_scd_temp_view" - end - - - - - USE [test_normalization]; - if object_id ('test_normalization."nested_stream_with_co__lting_into_long_names_scd"','U') is not null - begin - drop table test_normalization."nested_stream_with_co__lting_into_long_names_scd" - end - - - USE [test_normalization]; - EXEC('create view test_normalization."nested_stream_with_co__lting_into_long_names_scd_temp_view" as - --- depends_on: ref(''nested_stream_with_co__lting_into_long_names_stg'') -with - -input_data as ( - select * - from "test_normalization"._airbyte_test_normalization."nested_stream_with_co__lting_into_long_names_stg" - -- nested_stream_with_co__lting_into_long_names from "test_normalization".test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names -), - -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - convert(varchar(32), HashBytes(''md5'', coalesce(cast( - - - - concat(concat(coalesce(cast(id as - NVARCHAR(max)), ''''),''''), '''') as - NVARCHAR(max)), '''')), 2) as _airbyte_unique_key, - id, - "date", - "partition", - "date" as _airbyte_start_at, - lag("date") over ( - partition by id - order by - "date" desc, - _airbyte_emitted_at desc - ) as _airbyte_end_at, - case when row_number() over ( - partition by id - order by - "date" desc, - _airbyte_emitted_at desc - ) = 1 then 1 else 0 end as _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_nested_strea__nto_long_names_hashid - from input_data -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by - _airbyte_unique_key, - _airbyte_start_at, - _airbyte_emitted_at - order by _airbyte_active_row desc, _airbyte_ab_id - ) as _airbyte_row_num, - convert(varchar(32), HashBytes(''md5'', coalesce(cast( - - - - concat(concat(coalesce(cast(_airbyte_unique_key as - NVARCHAR(max)), ''''), ''-'', coalesce(cast(_airbyte_start_at as - NVARCHAR(max)), ''''), ''-'', coalesce(cast(_airbyte_emitted_at as - NVARCHAR(max)), ''''),''''), '''') as - NVARCHAR(max)), '''')), 2) as _airbyte_unique_key_scd, - scd_data.* - from scd_data -) -select - _airbyte_unique_key, - _airbyte_unique_key_scd, - id, - "date", - "partition", - _airbyte_start_at, - _airbyte_end_at, - _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - SYSDATETIME() as _airbyte_normalized_at, - _airbyte_nested_strea__nto_long_names_hashid -from dedup_data where _airbyte_row_num = 1 - '); - - SELECT * INTO "test_normalization".test_normalization."nested_stream_with_co__lting_into_long_names_scd" FROM - "test_normalization".test_normalization."nested_stream_with_co__lting_into_long_names_scd_temp_view" - - - - USE [test_normalization]; - if object_id ('test_normalization."nested_stream_with_co__lting_into_long_names_scd_temp_view"','V') is not null - begin - drop view test_normalization."nested_stream_with_co__lting_into_long_names_scd_temp_view" - end - - - use [test_normalization]; - if EXISTS ( - SELECT * FROM - sys.indexes WHERE name = 'test_normalization_nested_stream_with_co__lting_into_long_names_scd_cci' - AND object_id=object_id('test_normalization_nested_stream_with_co__lting_into_long_names_scd') - ) - DROP index test_normalization.nested_stream_with_co__lting_into_long_names_scd.test_normalization_nested_stream_with_co__lting_into_long_names_scd_cci - CREATE CLUSTERED COLUMNSTORE INDEX test_normalization_nested_stream_with_co__lting_into_long_names_scd_cci - ON test_normalization.nested_stream_with_co__lting_into_long_names_scd - - - - - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co___long_names_partition.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co___long_names_partition.sql deleted file mode 100644 index 7cfc356688fbe..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co___long_names_partition.sql +++ /dev/null @@ -1,116 +0,0 @@ - - - - USE [test_normalization]; - if object_id ('test_normalization."nested_stream_with_co___long_names_partition_temp_view"','V') is not null - begin - drop view test_normalization."nested_stream_with_co___long_names_partition_temp_view" - end - - - - - USE [test_normalization]; - if object_id ('test_normalization."nested_stream_with_co___long_names_partition"','U') is not null - begin - drop table test_normalization."nested_stream_with_co___long_names_partition" - end - - - USE [test_normalization]; - EXEC('create view test_normalization."nested_stream_with_co___long_names_partition_temp_view" as - -with __dbt__cte__nested_stream_with_co___long_names_partition_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "test_normalization".test_normalization."nested_stream_with_co__lting_into_long_names_scd" -select - _airbyte_nested_strea__nto_long_names_hashid, - json_query("partition", ''$."double_array_data"'') as double_array_data, - json_query("partition", ''$."DATA"'') as "DATA", - _airbyte_ab_id, - _airbyte_emitted_at, - SYSDATETIME() as _airbyte_normalized_at -from "test_normalization".test_normalization."nested_stream_with_co__lting_into_long_names_scd" as table_alias --- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition -where 1 = 1 -and "partition" is not null - -), __dbt__cte__nested_stream_with_co___long_names_partition_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__nested_stream_with_co___long_names_partition_ab1 -select - _airbyte_nested_strea__nto_long_names_hashid, - double_array_data, - "DATA", - _airbyte_ab_id, - _airbyte_emitted_at, - SYSDATETIME() as _airbyte_normalized_at -from __dbt__cte__nested_stream_with_co___long_names_partition_ab1 --- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition -where 1 = 1 - -), __dbt__cte__nested_stream_with_co___long_names_partition_ab3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__nested_stream_with_co___long_names_partition_ab2 -select - convert(varchar(32), HashBytes(''md5'', coalesce(cast( - - - - concat(concat(coalesce(cast(_airbyte_nested_strea__nto_long_names_hashid as - NVARCHAR(max)), ''''), ''-'', coalesce(cast(cast(double_array_data as - NVARCHAR(max)) as - NVARCHAR(max)), ''''), ''-'', coalesce(cast(cast("DATA" as - NVARCHAR(max)) as - NVARCHAR(max)), ''''),''''), '''') as - NVARCHAR(max)), '''')), 2) as _airbyte_partition_hashid, - tmp.* -from __dbt__cte__nested_stream_with_co___long_names_partition_ab2 tmp --- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition -where 1 = 1 - -)-- Final base SQL model --- depends_on: __dbt__cte__nested_stream_with_co___long_names_partition_ab3 -select - _airbyte_nested_strea__nto_long_names_hashid, - double_array_data, - "DATA", - _airbyte_ab_id, - _airbyte_emitted_at, - SYSDATETIME() as _airbyte_normalized_at, - _airbyte_partition_hashid -from __dbt__cte__nested_stream_with_co___long_names_partition_ab3 --- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition from "test_normalization".test_normalization."nested_stream_with_co__lting_into_long_names_scd" -where 1 = 1 - - '); - - SELECT * INTO "test_normalization".test_normalization."nested_stream_with_co___long_names_partition" FROM - "test_normalization".test_normalization."nested_stream_with_co___long_names_partition_temp_view" - - - - USE [test_normalization]; - if object_id ('test_normalization."nested_stream_with_co___long_names_partition_temp_view"','V') is not null - begin - drop view test_normalization."nested_stream_with_co___long_names_partition_temp_view" - end - - - use [test_normalization]; - if EXISTS ( - SELECT * FROM - sys.indexes WHERE name = 'test_normalization_nested_stream_with_co___long_names_partition_cci' - AND object_id=object_id('test_normalization_nested_stream_with_co___long_names_partition') - ) - DROP index test_normalization.nested_stream_with_co___long_names_partition.test_normalization_nested_stream_with_co___long_names_partition_cci - CREATE CLUSTERED COLUMNSTORE INDEX test_normalization_nested_stream_with_co___long_names_partition_cci - ON test_normalization.nested_stream_with_co___long_names_partition - - - - - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co___names_partition_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co___names_partition_data.sql deleted file mode 100644 index d5b645c4e07e5..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co___names_partition_data.sql +++ /dev/null @@ -1,121 +0,0 @@ - - - - USE [test_normalization]; - if object_id ('test_normalization."nested_stream_with_co___names_partition_data_temp_view"','V') is not null - begin - drop view test_normalization."nested_stream_with_co___names_partition_data_temp_view" - end - - - - - USE [test_normalization]; - if object_id ('test_normalization."nested_stream_with_co___names_partition_data"','U') is not null - begin - drop table test_normalization."nested_stream_with_co___names_partition_data" - end - - - USE [test_normalization]; - EXEC('create view test_normalization."nested_stream_with_co___names_partition_data_temp_view" as - -with __dbt__cte__nested_stream_with_co___names_partition_data_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "test_normalization".test_normalization."nested_stream_with_co___long_names_partition" - -select - _airbyte_partition_hashid, - json_value( - "DATA".value, ''$."currency"'') as currency, - _airbyte_ab_id, - _airbyte_emitted_at, - SYSDATETIME() as _airbyte_normalized_at -from "test_normalization".test_normalization."nested_stream_with_co___long_names_partition" as table_alias --- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA - - CROSS APPLY ( - SELECT [value] = CASE - WHEN [type] = 4 THEN (SELECT [value] FROM OPENJSON([value])) - WHEN [type] = 5 THEN [value] - END - FROM OPENJSON("DATA") - ) AS "DATA" -where 1 = 1 -and "DATA" is not null - -), __dbt__cte__nested_stream_with_co___names_partition_data_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__nested_stream_with_co___names_partition_data_ab1 -select - _airbyte_partition_hashid, - cast(currency as - NVARCHAR(max)) as currency, - _airbyte_ab_id, - _airbyte_emitted_at, - SYSDATETIME() as _airbyte_normalized_at -from __dbt__cte__nested_stream_with_co___names_partition_data_ab1 --- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA -where 1 = 1 - -), __dbt__cte__nested_stream_with_co___names_partition_data_ab3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__nested_stream_with_co___names_partition_data_ab2 -select - convert(varchar(32), HashBytes(''md5'', coalesce(cast( - - - - concat(concat(coalesce(cast(_airbyte_partition_hashid as - NVARCHAR(max)), ''''), ''-'', coalesce(cast(currency as - NVARCHAR(max)), ''''),''''), '''') as - NVARCHAR(max)), '''')), 2) as _airbyte_data_hashid, - tmp.* -from __dbt__cte__nested_stream_with_co___names_partition_data_ab2 tmp --- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA -where 1 = 1 - -)-- Final base SQL model --- depends_on: __dbt__cte__nested_stream_with_co___names_partition_data_ab3 -select - _airbyte_partition_hashid, - currency, - _airbyte_ab_id, - _airbyte_emitted_at, - SYSDATETIME() as _airbyte_normalized_at, - _airbyte_data_hashid -from __dbt__cte__nested_stream_with_co___names_partition_data_ab3 --- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA from "test_normalization".test_normalization."nested_stream_with_co___long_names_partition" -where 1 = 1 - - '); - - SELECT * INTO "test_normalization".test_normalization."nested_stream_with_co___names_partition_data" FROM - "test_normalization".test_normalization."nested_stream_with_co___names_partition_data_temp_view" - - - - USE [test_normalization]; - if object_id ('test_normalization."nested_stream_with_co___names_partition_data_temp_view"','V') is not null - begin - drop view test_normalization."nested_stream_with_co___names_partition_data_temp_view" - end - - - use [test_normalization]; - if EXISTS ( - SELECT * FROM - sys.indexes WHERE name = 'test_normalization_nested_stream_with_co___names_partition_data_cci' - AND object_id=object_id('test_normalization_nested_stream_with_co___names_partition_data') - ) - DROP index test_normalization.nested_stream_with_co___names_partition_data.test_normalization_nested_stream_with_co___names_partition_data_cci - CREATE CLUSTERED COLUMNSTORE INDEX test_normalization_nested_stream_with_co___names_partition_data_cci - ON test_normalization.nested_stream_with_co___names_partition_data - - - - - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co__ion_double_array_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co__ion_double_array_data.sql deleted file mode 100644 index 6cb8120f52ca2..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co__ion_double_array_data.sql +++ /dev/null @@ -1,121 +0,0 @@ - - - - USE [test_normalization]; - if object_id ('test_normalization."nested_stream_with_co__ion_double_array_data_temp_view"','V') is not null - begin - drop view test_normalization."nested_stream_with_co__ion_double_array_data_temp_view" - end - - - - - USE [test_normalization]; - if object_id ('test_normalization."nested_stream_with_co__ion_double_array_data"','U') is not null - begin - drop table test_normalization."nested_stream_with_co__ion_double_array_data" - end - - - USE [test_normalization]; - EXEC('create view test_normalization."nested_stream_with_co__ion_double_array_data_temp_view" as - -with __dbt__cte__nested_stream_with_co__ion_double_array_data_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "test_normalization".test_normalization."nested_stream_with_co___long_names_partition" - -select - _airbyte_partition_hashid, - json_value( - double_array_data.value, ''$."id"'') as id, - _airbyte_ab_id, - _airbyte_emitted_at, - SYSDATETIME() as _airbyte_normalized_at -from "test_normalization".test_normalization."nested_stream_with_co___long_names_partition" as table_alias --- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data - - CROSS APPLY ( - SELECT [value] = CASE - WHEN [type] = 4 THEN (SELECT [value] FROM OPENJSON([value])) - WHEN [type] = 5 THEN [value] - END - FROM OPENJSON(double_array_data) - ) AS double_array_data -where 1 = 1 -and double_array_data is not null - -), __dbt__cte__nested_stream_with_co__ion_double_array_data_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__nested_stream_with_co__ion_double_array_data_ab1 -select - _airbyte_partition_hashid, - cast(id as - NVARCHAR(max)) as id, - _airbyte_ab_id, - _airbyte_emitted_at, - SYSDATETIME() as _airbyte_normalized_at -from __dbt__cte__nested_stream_with_co__ion_double_array_data_ab1 --- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data -where 1 = 1 - -), __dbt__cte__nested_stream_with_co__ion_double_array_data_ab3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__nested_stream_with_co__ion_double_array_data_ab2 -select - convert(varchar(32), HashBytes(''md5'', coalesce(cast( - - - - concat(concat(coalesce(cast(_airbyte_partition_hashid as - NVARCHAR(max)), ''''), ''-'', coalesce(cast(id as - NVARCHAR(max)), ''''),''''), '''') as - NVARCHAR(max)), '''')), 2) as _airbyte_double_array_data_hashid, - tmp.* -from __dbt__cte__nested_stream_with_co__ion_double_array_data_ab2 tmp --- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data -where 1 = 1 - -)-- Final base SQL model --- depends_on: __dbt__cte__nested_stream_with_co__ion_double_array_data_ab3 -select - _airbyte_partition_hashid, - id, - _airbyte_ab_id, - _airbyte_emitted_at, - SYSDATETIME() as _airbyte_normalized_at, - _airbyte_double_array_data_hashid -from __dbt__cte__nested_stream_with_co__ion_double_array_data_ab3 --- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data from "test_normalization".test_normalization."nested_stream_with_co___long_names_partition" -where 1 = 1 - - '); - - SELECT * INTO "test_normalization".test_normalization."nested_stream_with_co__ion_double_array_data" FROM - "test_normalization".test_normalization."nested_stream_with_co__ion_double_array_data_temp_view" - - - - USE [test_normalization]; - if object_id ('test_normalization."nested_stream_with_co__ion_double_array_data_temp_view"','V') is not null - begin - drop view test_normalization."nested_stream_with_co__ion_double_array_data_temp_view" - end - - - use [test_normalization]; - if EXISTS ( - SELECT * FROM - sys.indexes WHERE name = 'test_normalization_nested_stream_with_co__ion_double_array_data_cci' - AND object_id=object_id('test_normalization_nested_stream_with_co__ion_double_array_data') - ) - DROP index test_normalization.nested_stream_with_co__ion_double_array_data.test_normalization_nested_stream_with_co__ion_double_array_data_cci - CREATE CLUSTERED COLUMNSTORE INDEX test_normalization_nested_stream_with_co__ion_double_array_data_cci - ON test_normalization.nested_stream_with_co__ion_double_array_data - - - - - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co__lting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co__lting_into_long_names.sql deleted file mode 100644 index 492b941921216..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co__lting_into_long_names.sql +++ /dev/null @@ -1,66 +0,0 @@ - - - - USE [test_normalization]; - if object_id ('test_normalization."nested_stream_with_co__lting_into_long_names_temp_view"','V') is not null - begin - drop view test_normalization."nested_stream_with_co__lting_into_long_names_temp_view" - end - - - - - USE [test_normalization]; - if object_id ('test_normalization."nested_stream_with_co__lting_into_long_names"','U') is not null - begin - drop table test_normalization."nested_stream_with_co__lting_into_long_names" - end - - - USE [test_normalization]; - EXEC('create view test_normalization."nested_stream_with_co__lting_into_long_names_temp_view" as - --- Final base SQL model --- depends_on: "test_normalization".test_normalization."nested_stream_with_co__lting_into_long_names_scd" -select - _airbyte_unique_key, - id, - "date", - "partition", - _airbyte_ab_id, - _airbyte_emitted_at, - SYSDATETIME() as _airbyte_normalized_at, - _airbyte_nested_strea__nto_long_names_hashid -from "test_normalization".test_normalization."nested_stream_with_co__lting_into_long_names_scd" --- nested_stream_with_co__lting_into_long_names from "test_normalization".test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names -where 1 = 1 -and _airbyte_active_row = 1 - - '); - - SELECT * INTO "test_normalization".test_normalization."nested_stream_with_co__lting_into_long_names" FROM - "test_normalization".test_normalization."nested_stream_with_co__lting_into_long_names_temp_view" - - - - USE [test_normalization]; - if object_id ('test_normalization."nested_stream_with_co__lting_into_long_names_temp_view"','V') is not null - begin - drop view test_normalization."nested_stream_with_co__lting_into_long_names_temp_view" - end - - - use [test_normalization]; - if EXISTS ( - SELECT * FROM - sys.indexes WHERE name = 'test_normalization_nested_stream_with_co__lting_into_long_names_cci' - AND object_id=object_id('test_normalization_nested_stream_with_co__lting_into_long_names') - ) - DROP index test_normalization.nested_stream_with_co__lting_into_long_names.test_normalization_nested_stream_with_co__lting_into_long_names_cci - CREATE CLUSTERED COLUMNSTORE INDEX test_normalization_nested_stream_with_co__lting_into_long_names_cci - ON test_normalization.nested_stream_with_co__lting_into_long_names - - - - - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co___long_names_partition_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co___long_names_partition_ab1.sql deleted file mode 100644 index 35ebff8092c70..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co___long_names_partition_ab1.sql +++ /dev/null @@ -1,19 +0,0 @@ -{{ config( - schema = "_airbyte_test_normalization", - tags = [ "nested-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ ref('nested_stream_with_co__lting_into_long_names_scd') }} -select - _airbyte_nested_strea__nto_long_names_hashid, - {{ json_extract_array(adapter.quote('partition'), ['double_array_data'], ['double_array_data']) }} as double_array_data, - {{ json_extract_array(adapter.quote('partition'), ['DATA'], ['DATA']) }} as {{ adapter.quote('DATA') }}, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('nested_stream_with_co__lting_into_long_names_scd') }} as table_alias --- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition -where 1 = 1 -and {{ adapter.quote('partition') }} is not null -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co___names_partition_data_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co___names_partition_data_ab1.sql deleted file mode 100644 index cdf1151ee10d7..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co___names_partition_data_ab1.sql +++ /dev/null @@ -1,20 +0,0 @@ -{{ config( - schema = "_airbyte_test_normalization", - tags = [ "nested-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ ref('nested_stream_with_co___long_names_partition') }} -{{ unnest_cte(ref('nested_stream_with_co___long_names_partition'), 'partition', adapter.quote('DATA')) }} -select - _airbyte_partition_hashid, - {{ json_extract_scalar(unnested_column_value(adapter.quote('DATA')), ['currency'], ['currency']) }} as currency, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('nested_stream_with_co___long_names_partition') }} as table_alias --- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA -{{ cross_join_unnest('partition', adapter.quote('DATA')) }} -where 1 = 1 -and {{ adapter.quote('DATA') }} is not null -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co__ion_double_array_data_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co__ion_double_array_data_ab1.sql deleted file mode 100644 index a8ca4bbb7d40f..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co__ion_double_array_data_ab1.sql +++ /dev/null @@ -1,20 +0,0 @@ -{{ config( - schema = "_airbyte_test_normalization", - tags = [ "nested-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ ref('nested_stream_with_co___long_names_partition') }} -{{ unnest_cte(ref('nested_stream_with_co___long_names_partition'), 'partition', 'double_array_data') }} -select - _airbyte_partition_hashid, - {{ json_extract_scalar(unnested_column_value('double_array_data'), ['id'], ['id']) }} as id, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('nested_stream_with_co___long_names_partition') }} as table_alias --- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data -{{ cross_join_unnest('partition', 'double_array_data') }} -where 1 = 1 -and double_array_data is not null -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co__lting_into_long_names_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co__lting_into_long_names_ab1.sql deleted file mode 100644 index 3274f1fabcc1a..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co__lting_into_long_names_ab1.sql +++ /dev/null @@ -1,19 +0,0 @@ -{{ config( - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} -select - {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as id, - {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as {{ adapter.quote('date') }}, - {{ json_extract('table_alias', '_airbyte_data', ['partition'], ['partition']) }} as {{ adapter.quote('partition') }}, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} as table_alias --- nested_stream_with_co__lting_into_long_names -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co__lting_into_long_names_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co__lting_into_long_names_ab2.sql deleted file mode 100644 index b810108779e79..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co__lting_into_long_names_ab2.sql +++ /dev/null @@ -1,19 +0,0 @@ -{{ config( - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: {{ ref('nested_stream_with_co__lting_into_long_names_ab1') }} -select - cast(id as {{ dbt_utils.type_string() }}) as id, - cast({{ adapter.quote('date') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('date') }}, - cast({{ adapter.quote('partition') }} as {{ type_json() }}) as {{ adapter.quote('partition') }}, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('nested_stream_with_co__lting_into_long_names_ab1') }} --- nested_stream_with_co__lting_into_long_names -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co__lting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co__lting_into_long_names_scd.sql deleted file mode 100644 index b4683a3ea301c..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co__lting_into_long_names_scd.sql +++ /dev/null @@ -1,160 +0,0 @@ -{{ config( - unique_key = "_airbyte_unique_key_scd", - schema = "test_normalization", - post_hook = [" - {% - set final_table_relation = adapter.get_relation( - database=this.database, - schema=this.schema, - identifier='nested_stream_with_co__lting_into_long_names' - ) - %} - {# - If the final table doesn't exist, then obviously we can't delete anything from it. - Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) - So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) - #} - {% - if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') - %} - -- Delete records which are no longer active: - -- This query is equivalent, but the left join version is more performant: - -- delete from final_table where unique_key in ( - -- select unique_key from scd_table where 1 = 1 - -- ) and unique_key not in ( - -- select unique_key from scd_table where active_row = 1 - -- ) - -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD - -- entries that were _updated_ recently. This is because a deleted record will have an SCD record - -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. - delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select recent_records.unique_key - from ( - select distinct _airbyte_unique_key as unique_key - from {{ this }} - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_co__lting_into_long_names')) }} - ) recent_records - left join ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_co__lting_into_long_names')) }} - group by _airbyte_unique_key - ) active_counts - on recent_records.unique_key = active_counts.unique_key - where active_count is null or active_count = 0 - ) - {% else %} - -- We have to have a non-empty query, so just do a noop delete - delete from {{ this }} where 1=0 - {% endif %} - ","drop view _airbyte_test_normalization.nested_stream_with_co__lting_into_long_names_stg"], - tags = [ "top-level" ] -) }} --- depends_on: ref('nested_stream_with_co__lting_into_long_names_stg') -with -{% if is_incremental() %} -new_data as ( - -- retrieve incremental "new" data - select - * - from {{ ref('nested_stream_with_co__lting_into_long_names_stg') }} - -- nested_stream_with_co__lting_into_long_names from {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} - where 1 = 1 - {{ incremental_clause('_airbyte_emitted_at', this) }} -), -new_data_ids as ( - -- build a subset of _airbyte_unique_key from rows that are new - select distinct - {{ dbt_utils.surrogate_key([ - 'id', - ]) }} as _airbyte_unique_key - from new_data -), -empty_new_data as ( - -- build an empty table to only keep the table's column types - select * from new_data where 1 = 0 -), -previous_active_scd_data as ( - -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes - select - {{ star_intersect(ref('nested_stream_with_co__lting_into_long_names_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} - from {{ this }} as this_data - -- make a join with new_data using primary key to filter active data that need to be updated only - join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key - -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) - left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id - where _airbyte_active_row = 1 -), -input_data as ( - select {{ dbt_utils.star(ref('nested_stream_with_co__lting_into_long_names_stg')) }} from new_data - union all - select {{ dbt_utils.star(ref('nested_stream_with_co__lting_into_long_names_stg')) }} from previous_active_scd_data -), -{% else %} -input_data as ( - select * - from {{ ref('nested_stream_with_co__lting_into_long_names_stg') }} - -- nested_stream_with_co__lting_into_long_names from {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} -), -{% endif %} -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - {{ dbt_utils.surrogate_key([ - 'id', - ]) }} as _airbyte_unique_key, - id, - {{ adapter.quote('date') }}, - {{ adapter.quote('partition') }}, - {{ adapter.quote('date') }} as _airbyte_start_at, - lag({{ adapter.quote('date') }}) over ( - partition by id - order by - {{ adapter.quote('date') }} desc, - _airbyte_emitted_at desc - ) as _airbyte_end_at, - case when row_number() over ( - partition by id - order by - {{ adapter.quote('date') }} desc, - _airbyte_emitted_at desc - ) = 1 then 1 else 0 end as _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_nested_strea__nto_long_names_hashid - from input_data -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by - _airbyte_unique_key, - _airbyte_start_at, - _airbyte_emitted_at - order by _airbyte_active_row desc, _airbyte_ab_id - ) as _airbyte_row_num, - {{ dbt_utils.surrogate_key([ - '_airbyte_unique_key', - '_airbyte_start_at', - '_airbyte_emitted_at' - ]) }} as _airbyte_unique_key_scd, - scd_data.* - from scd_data -) -select - _airbyte_unique_key, - _airbyte_unique_key_scd, - id, - {{ adapter.quote('date') }}, - {{ adapter.quote('partition') }}, - _airbyte_start_at, - _airbyte_end_at, - _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_nested_strea__nto_long_names_hashid -from dedup_data where _airbyte_row_num = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co___long_names_partition.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co___long_names_partition.sql deleted file mode 100644 index 33830638af517..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co___long_names_partition.sql +++ /dev/null @@ -1,19 +0,0 @@ -{{ config( - schema = "test_normalization", - tags = [ "nested" ] -) }} --- Final base SQL model --- depends_on: {{ ref('nested_stream_with_co___long_names_partition_ab3') }} -select - _airbyte_nested_strea__nto_long_names_hashid, - double_array_data, - {{ adapter.quote('DATA') }}, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_partition_hashid -from {{ ref('nested_stream_with_co___long_names_partition_ab3') }} --- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition from {{ ref('nested_stream_with_co__lting_into_long_names_scd') }} -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co___names_partition_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co___names_partition_data.sql deleted file mode 100644 index 3a0dedfa076e5..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co___names_partition_data.sql +++ /dev/null @@ -1,18 +0,0 @@ -{{ config( - schema = "test_normalization", - tags = [ "nested" ] -) }} --- Final base SQL model --- depends_on: {{ ref('nested_stream_with_co___names_partition_data_ab3') }} -select - _airbyte_partition_hashid, - currency, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_data_hashid -from {{ ref('nested_stream_with_co___names_partition_data_ab3') }} --- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA from {{ ref('nested_stream_with_co___long_names_partition') }} -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co__ion_double_array_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co__ion_double_array_data.sql deleted file mode 100644 index 74323fef10e6e..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co__ion_double_array_data.sql +++ /dev/null @@ -1,18 +0,0 @@ -{{ config( - schema = "test_normalization", - tags = [ "nested" ] -) }} --- Final base SQL model --- depends_on: {{ ref('nested_stream_with_co__ion_double_array_data_ab3') }} -select - _airbyte_partition_hashid, - id, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_double_array_data_hashid -from {{ ref('nested_stream_with_co__ion_double_array_data_ab3') }} --- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data from {{ ref('nested_stream_with_co___long_names_partition') }} -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co__lting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co__lting_into_long_names.sql deleted file mode 100644 index aa2caf12165ce..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co__lting_into_long_names.sql +++ /dev/null @@ -1,22 +0,0 @@ -{{ config( - unique_key = "_airbyte_unique_key", - schema = "test_normalization", - tags = [ "top-level" ] -) }} --- Final base SQL model --- depends_on: {{ ref('nested_stream_with_co__lting_into_long_names_scd') }} -select - _airbyte_unique_key, - id, - {{ adapter.quote('date') }}, - {{ adapter.quote('partition') }}, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_nested_strea__nto_long_names_hashid -from {{ ref('nested_stream_with_co__lting_into_long_names_scd') }} --- nested_stream_with_co__lting_into_long_names from {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} -where 1 = 1 -and _airbyte_active_row = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/sources.yml deleted file mode 100644 index 92fa4c9a2580e..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/sources.yml +++ /dev/null @@ -1,22 +0,0 @@ -version: 2 -sources: -- name: test_normalization - quoting: - database: true - schema: false - identifier: false - tables: - - name: _airbyte_raw_conflict_stream_array - - name: _airbyte_raw_conflict_stream_name - - name: _airbyte_raw_conflict_stream_scalar - - name: _airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - - name: _airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names - - name: _airbyte_raw_some_stream_that_was_empty - - name: _airbyte_raw_unnest_alias -- name: test_normalization_namespace - quoting: - database: true - schema: false - identifier: false - tables: - - name: _airbyte_raw_simple_stream_with_namespace_resulting_into_long_names diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co__lting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co__lting_into_long_names_scd.sql deleted file mode 100644 index 22684ecf70c29..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co__lting_into_long_names_scd.sql +++ /dev/null @@ -1,17 +0,0 @@ - - - - delete from "test_normalization".test_normalization."nested_stream_with_co__lting_into_long_names_scd" - where (_airbyte_unique_key_scd) in ( - select (_airbyte_unique_key_scd) - from "test_normalization".test_normalization."#nested_stream_with_co__lting_into_long_names_scd__dbt_tmp" - ); - - - insert into "test_normalization".test_normalization."nested_stream_with_co__lting_into_long_names_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "date", "partition", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_nested_strea__nto_long_names_hashid") - ( - select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "date", "partition", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_nested_strea__nto_long_names_hashid" - from "test_normalization".test_normalization."#nested_stream_with_co__lting_into_long_names_scd__dbt_tmp" - ); - - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co___long_names_partition.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co___long_names_partition.sql deleted file mode 100644 index 8eacd04b88489..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co___long_names_partition.sql +++ /dev/null @@ -1,11 +0,0 @@ - - - - - insert into "test_normalization".test_normalization."nested_stream_with_co___long_names_partition" ("_airbyte_nested_strea__nto_long_names_hashid", "double_array_data", "DATA", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_partition_hashid") - ( - select "_airbyte_nested_strea__nto_long_names_hashid", "double_array_data", "DATA", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_partition_hashid" - from "test_normalization".test_normalization."#nested_stream_with_co___long_names_partition__dbt_tmp" - ); - - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co___names_partition_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co___names_partition_data.sql deleted file mode 100644 index 3d7b97c0c96bf..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co___names_partition_data.sql +++ /dev/null @@ -1,11 +0,0 @@ - - - - - insert into "test_normalization".test_normalization."nested_stream_with_co___names_partition_data" ("_airbyte_partition_hashid", "currency", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_data_hashid") - ( - select "_airbyte_partition_hashid", "currency", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_data_hashid" - from "test_normalization".test_normalization."#nested_stream_with_co___names_partition_data__dbt_tmp" - ); - - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co__ion_double_array_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co__ion_double_array_data.sql deleted file mode 100644 index d3c525c77c34d..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co__ion_double_array_data.sql +++ /dev/null @@ -1,11 +0,0 @@ - - - - - insert into "test_normalization".test_normalization."nested_stream_with_co__ion_double_array_data" ("_airbyte_partition_hashid", "id", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_double_array_data_hashid") - ( - select "_airbyte_partition_hashid", "id", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_double_array_data_hashid" - from "test_normalization".test_normalization."#nested_stream_with_co__ion_double_array_data__dbt_tmp" - ); - - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co__lting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co__lting_into_long_names.sql deleted file mode 100644 index befc1e8d025db..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co__lting_into_long_names.sql +++ /dev/null @@ -1,17 +0,0 @@ - - - - delete from "test_normalization".test_normalization."nested_stream_with_co__lting_into_long_names" - where (_airbyte_unique_key) in ( - select (_airbyte_unique_key) - from "test_normalization".test_normalization."#nested_stream_with_co__lting_into_long_names__dbt_tmp" - ); - - - insert into "test_normalization".test_normalization."nested_stream_with_co__lting_into_long_names" ("_airbyte_unique_key", "id", "date", "partition", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_nested_strea__nto_long_names_hashid") - ( - select "_airbyte_unique_key", "id", "date", "partition", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_nested_strea__nto_long_names_hashid" - from "test_normalization".test_normalization."#nested_stream_with_co__lting_into_long_names__dbt_tmp" - ); - - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/dbt_project.yml deleted file mode 100755 index 8ed082f367749..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/dbt_project.yml +++ /dev/null @@ -1,61 +0,0 @@ -# This file is necessary to install dbt-utils with dbt deps -# the content will be overwritten by the transform function - -# Name your package! Package names should contain only lowercase characters -# and underscores. A good package name should reflect your organization's -# name or the intended use of these models -name: "airbyte_utils" -version: "1.0" -config-version: 2 - -# This setting configures which "profile" dbt uses for this project. Profiles contain -# database connection information, and should be configured in the ~/.dbt/profiles.yml file -profile: "normalize" - -# These configurations specify where dbt should look for different types of files. -# The `model-paths` config, for example, states that source models can be found -# in the "models/" directory. You probably won't need to change these! -model-paths: ["models"] -docs-paths: ["docs"] -analysis-paths: ["analysis"] -test-paths: ["tests"] -seed-paths: ["data"] -macro-paths: ["macros"] - -target-path: "../build" # directory which will store compiled SQL files -log-path: "../logs" # directory which will store DBT logs -packages-install-path: "/dbt" # directory which will store external DBT dependencies - -clean-targets: # directories to be removed by `dbt clean` - - "build" - - "dbt_modules" - -quoting: - database: true - # Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785) - # all schemas should be unquoted - schema: false - identifier: true - -# You can define configurations for models in the `model-paths` directory here. -# Using these configurations, you can enable or disable models, change how they -# are materialized, and more! -models: - airbyte_utils: - +materialized: table - generated: - airbyte_ctes: - +tags: airbyte_internal_cte - +materialized: ephemeral - airbyte_incremental: - +tags: incremental_tables - +materialized: incremental - airbyte_tables: - +tags: normalized_tables - +materialized: table - airbyte_views: - +tags: airbyte_internal_views - +materialized: view - -vars: - dbt_utils_dispatch_list: ["airbyte_utils"] diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql deleted file mode 100644 index 36e1fe2b4afb4..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ /dev/null @@ -1,141 +0,0 @@ - - - - USE [test_normalization]; - if object_id ('test_normalization."dedup_exchange_rate_scd_temp_view"','V') is not null - begin - drop view test_normalization."dedup_exchange_rate_scd_temp_view" - end - - - - - USE [test_normalization]; - if object_id ('test_normalization."dedup_exchange_rate_scd"','U') is not null - begin - drop table test_normalization."dedup_exchange_rate_scd" - end - - - USE [test_normalization]; - EXEC('create view test_normalization."dedup_exchange_rate_scd_temp_view" as - --- depends_on: ref(''dedup_exchange_rate_stg'') -with - -input_data as ( - select * - from "test_normalization"._airbyte_test_normalization."dedup_exchange_rate_stg" - -- dedup_exchange_rate from "test_normalization".test_normalization._airbyte_raw_dedup_exchange_rate -), - -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - convert(varchar(32), HashBytes(''md5'', coalesce(cast( - - - - concat(concat(coalesce(cast(id as - NVARCHAR(max)), ''''), ''-'', coalesce(cast(currency as - NVARCHAR(max)), ''''), ''-'', coalesce(cast(nzd as - NVARCHAR(max)), ''''),''''), '''') as - NVARCHAR(max)), '''')), 2) as _airbyte_unique_key, - id, - currency, - "date", - timestamp_col, - "HKD@spéçiäl & characters", - hkd_special___characters, - nzd, - usd, - "date" as _airbyte_start_at, - lag("date") over ( - partition by id, currency, cast(nzd as - NVARCHAR(max)) - order by - "date" desc, - _airbyte_emitted_at desc - ) as _airbyte_end_at, - case when row_number() over ( - partition by id, currency, cast(nzd as - NVARCHAR(max)) - order by - "date" desc, - _airbyte_emitted_at desc - ) = 1 then 1 else 0 end as _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_dedup_exchange_rate_hashid - from input_data -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by - _airbyte_unique_key, - _airbyte_start_at, - _airbyte_emitted_at - order by _airbyte_active_row desc, _airbyte_ab_id - ) as _airbyte_row_num, - convert(varchar(32), HashBytes(''md5'', coalesce(cast( - - - - concat(concat(coalesce(cast(_airbyte_unique_key as - NVARCHAR(max)), ''''), ''-'', coalesce(cast(_airbyte_start_at as - NVARCHAR(max)), ''''), ''-'', coalesce(cast(_airbyte_emitted_at as - NVARCHAR(max)), ''''),''''), '''') as - NVARCHAR(max)), '''')), 2) as _airbyte_unique_key_scd, - scd_data.* - from scd_data -) -select - _airbyte_unique_key, - _airbyte_unique_key_scd, - id, - currency, - "date", - timestamp_col, - "HKD@spéçiäl & characters", - hkd_special___characters, - nzd, - usd, - _airbyte_start_at, - _airbyte_end_at, - _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - SYSDATETIME() as _airbyte_normalized_at, - _airbyte_dedup_exchange_rate_hashid -from dedup_data where _airbyte_row_num = 1 - '); - - SELECT * INTO "test_normalization".test_normalization."dedup_exchange_rate_scd" FROM - "test_normalization".test_normalization."dedup_exchange_rate_scd_temp_view" - - - - USE [test_normalization]; - if object_id ('test_normalization."dedup_exchange_rate_scd_temp_view"','V') is not null - begin - drop view test_normalization."dedup_exchange_rate_scd_temp_view" - end - - - use [test_normalization]; - if EXISTS ( - SELECT * FROM - sys.indexes WHERE name = 'test_normalization_dedup_exchange_rate_scd_cci' - AND object_id=object_id('test_normalization_dedup_exchange_rate_scd') - ) - DROP index test_normalization.dedup_exchange_rate_scd.test_normalization_dedup_exchange_rate_scd_cci - CREATE CLUSTERED COLUMNSTORE INDEX test_normalization_dedup_exchange_rate_scd_cci - ON test_normalization.dedup_exchange_rate_scd - - - - - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql deleted file mode 100644 index b1600851cf4bb..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql +++ /dev/null @@ -1,71 +0,0 @@ - - - - USE [test_normalization]; - if object_id ('test_normalization."dedup_exchange_rate_temp_view"','V') is not null - begin - drop view test_normalization."dedup_exchange_rate_temp_view" - end - - - - - USE [test_normalization]; - if object_id ('test_normalization."dedup_exchange_rate"','U') is not null - begin - drop table test_normalization."dedup_exchange_rate" - end - - - USE [test_normalization]; - EXEC('create view test_normalization."dedup_exchange_rate_temp_view" as - --- Final base SQL model --- depends_on: "test_normalization".test_normalization."dedup_exchange_rate_scd" -select - _airbyte_unique_key, - id, - currency, - "date", - timestamp_col, - "HKD@spéçiäl & characters", - hkd_special___characters, - nzd, - usd, - _airbyte_ab_id, - _airbyte_emitted_at, - SYSDATETIME() as _airbyte_normalized_at, - _airbyte_dedup_exchange_rate_hashid -from "test_normalization".test_normalization."dedup_exchange_rate_scd" --- dedup_exchange_rate from "test_normalization".test_normalization._airbyte_raw_dedup_exchange_rate -where 1 = 1 -and _airbyte_active_row = 1 - - '); - - SELECT * INTO "test_normalization".test_normalization."dedup_exchange_rate" FROM - "test_normalization".test_normalization."dedup_exchange_rate_temp_view" - - - - USE [test_normalization]; - if object_id ('test_normalization."dedup_exchange_rate_temp_view"','V') is not null - begin - drop view test_normalization."dedup_exchange_rate_temp_view" - end - - - use [test_normalization]; - if EXISTS ( - SELECT * FROM - sys.indexes WHERE name = 'test_normalization_dedup_exchange_rate_cci' - AND object_id=object_id('test_normalization_dedup_exchange_rate') - ) - DROP index test_normalization.dedup_exchange_rate.test_normalization_dedup_exchange_rate_cci - CREATE CLUSTERED COLUMNSTORE INDEX test_normalization_dedup_exchange_rate_cci - ON test_normalization.dedup_exchange_rate - - - - - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql deleted file mode 100644 index 830e76c6f0ef8..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql +++ /dev/null @@ -1,159 +0,0 @@ - - - USE [test_normalization]; - if object_id ('test_normalization."exchange_rate__dbt_tmp_temp_view"','V') is not null - begin - drop view test_normalization."exchange_rate__dbt_tmp_temp_view" - end - - - - - USE [test_normalization]; - if object_id ('test_normalization."exchange_rate__dbt_tmp"','U') is not null - begin - drop table test_normalization."exchange_rate__dbt_tmp" - end - - - USE [test_normalization]; - EXEC('create view test_normalization."exchange_rate__dbt_tmp_temp_view" as - -with __dbt__cte__exchange_rate_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "test_normalization".test_normalization._airbyte_raw_exchange_rate -select - json_value(_airbyte_data, ''$."id"'') as id, - json_value(_airbyte_data, ''$."currency"'') as currency, - json_value(_airbyte_data, ''$."date"'') as "date", - json_value(_airbyte_data, ''$."timestamp_col"'') as timestamp_col, - json_value(_airbyte_data, ''$."HKD@spéçiäl & characters"'') as "HKD@spéçiäl & characters", - json_value(_airbyte_data, ''$."HKD_special___characters"'') as hkd_special___characters, - json_value(_airbyte_data, ''$."NZD"'') as nzd, - json_value(_airbyte_data, ''$."USD"'') as usd, - json_value(_airbyte_data, ''$."column`_''''with\"_quotes"'') as "column`_''with""_quotes", - json_value(_airbyte_data, ''$."datetime_tz"'') as datetime_tz, - json_value(_airbyte_data, ''$."datetime_no_tz"'') as datetime_no_tz, - json_value(_airbyte_data, ''$."time_tz"'') as time_tz, - json_value(_airbyte_data, ''$."time_no_tz"'') as time_no_tz, - _airbyte_ab_id, - _airbyte_emitted_at, - SYSDATETIME() as _airbyte_normalized_at -from "test_normalization".test_normalization._airbyte_raw_exchange_rate as table_alias --- exchange_rate -where 1 = 1 -), __dbt__cte__exchange_rate_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__exchange_rate_ab1 -select - cast(id as - bigint -) as id, - cast(currency as - NVARCHAR(max)) as currency, - try_parse(nullif("date", '''') as date) as "date", - try_parse(nullif(timestamp_col, '''') as datetimeoffset) as timestamp_col, - cast("HKD@spéçiäl & characters" as - float -) as "HKD@spéçiäl & characters", - cast(hkd_special___characters as - NVARCHAR(max)) as hkd_special___characters, - cast(nzd as - float -) as nzd, - cast(usd as - float -) as usd, - cast("column`_''with""_quotes" as - NVARCHAR(max)) as "column`_''with""_quotes", - try_parse(nullif(datetime_tz, '''') as datetimeoffset) as datetime_tz, - try_parse(nullif(datetime_no_tz, '''') as datetime2) as datetime_no_tz, - cast(nullif(time_tz, '''') as NVARCHAR(max)) as time_tz, - cast(nullif(time_no_tz, '''') as - time -) as time_no_tz, - _airbyte_ab_id, - _airbyte_emitted_at, - SYSDATETIME() as _airbyte_normalized_at -from __dbt__cte__exchange_rate_ab1 --- exchange_rate -where 1 = 1 -), __dbt__cte__exchange_rate_ab3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__exchange_rate_ab2 -select - convert(varchar(32), HashBytes(''md5'', coalesce(cast( - - - - concat(concat(coalesce(cast(id as - NVARCHAR(max)), ''''), ''-'', coalesce(cast(currency as - NVARCHAR(max)), ''''), ''-'', coalesce(cast("date" as - NVARCHAR(max)), ''''), ''-'', coalesce(cast(timestamp_col as - NVARCHAR(max)), ''''), ''-'', coalesce(cast("HKD@spéçiäl & characters" as - NVARCHAR(max)), ''''), ''-'', coalesce(cast(hkd_special___characters as - NVARCHAR(max)), ''''), ''-'', coalesce(cast(nzd as - NVARCHAR(max)), ''''), ''-'', coalesce(cast(usd as - NVARCHAR(max)), ''''), ''-'', coalesce(cast("column`_''with""_quotes" as - NVARCHAR(max)), ''''), ''-'', coalesce(cast(datetime_tz as - NVARCHAR(max)), ''''), ''-'', coalesce(cast(datetime_no_tz as - NVARCHAR(max)), ''''), ''-'', coalesce(cast(time_tz as - NVARCHAR(max)), ''''), ''-'', coalesce(cast(time_no_tz as - NVARCHAR(max)), ''''),''''), '''') as - NVARCHAR(max)), '''')), 2) as _airbyte_exchange_rate_hashid, - tmp.* -from __dbt__cte__exchange_rate_ab2 tmp --- exchange_rate -where 1 = 1 -)-- Final base SQL model --- depends_on: __dbt__cte__exchange_rate_ab3 -select - id, - currency, - "date", - timestamp_col, - "HKD@spéçiäl & characters", - hkd_special___characters, - nzd, - usd, - "column`_''with""_quotes", - datetime_tz, - datetime_no_tz, - time_tz, - time_no_tz, - _airbyte_ab_id, - _airbyte_emitted_at, - SYSDATETIME() as _airbyte_normalized_at, - _airbyte_exchange_rate_hashid -from __dbt__cte__exchange_rate_ab3 --- exchange_rate from "test_normalization".test_normalization._airbyte_raw_exchange_rate -where 1 = 1 - '); - - SELECT * INTO "test_normalization".test_normalization."exchange_rate__dbt_tmp" FROM - "test_normalization".test_normalization."exchange_rate__dbt_tmp_temp_view" - - - - USE [test_normalization]; - if object_id ('test_normalization."exchange_rate__dbt_tmp_temp_view"','V') is not null - begin - drop view test_normalization."exchange_rate__dbt_tmp_temp_view" - end - - - use [test_normalization]; - if EXISTS ( - SELECT * FROM - sys.indexes WHERE name = 'test_normalization_exchange_rate__dbt_tmp_cci' - AND object_id=object_id('test_normalization_exchange_rate__dbt_tmp') - ) - DROP index test_normalization.exchange_rate__dbt_tmp.test_normalization_exchange_rate__dbt_tmp_cci - CREATE CLUSTERED COLUMNSTORE INDEX test_normalization_exchange_rate__dbt_tmp_cci - ON test_normalization.exchange_rate__dbt_tmp - - - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql deleted file mode 100644 index ed018a2680b4c..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql +++ /dev/null @@ -1,77 +0,0 @@ -USE [test_normalization]; - execute('create view _airbyte_test_normalization."dedup_exchange_rate_stg__dbt_tmp" as - -with __dbt__cte__dedup_exchange_rate_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "test_normalization".test_normalization._airbyte_raw_dedup_exchange_rate -select - json_value(_airbyte_data, ''$."id"'') as id, - json_value(_airbyte_data, ''$."currency"'') as currency, - json_value(_airbyte_data, ''$."date"'') as "date", - json_value(_airbyte_data, ''$."timestamp_col"'') as timestamp_col, - json_value(_airbyte_data, ''$."HKD@spéçiäl & characters"'') as "HKD@spéçiäl & characters", - json_value(_airbyte_data, ''$."HKD_special___characters"'') as hkd_special___characters, - json_value(_airbyte_data, ''$."NZD"'') as nzd, - json_value(_airbyte_data, ''$."USD"'') as usd, - _airbyte_ab_id, - _airbyte_emitted_at, - SYSDATETIME() as _airbyte_normalized_at -from "test_normalization".test_normalization._airbyte_raw_dedup_exchange_rate as table_alias --- dedup_exchange_rate -where 1 = 1 - -), __dbt__cte__dedup_exchange_rate_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__dedup_exchange_rate_ab1 -select - cast(id as - bigint -) as id, - cast(currency as - NVARCHAR(max)) as currency, - try_parse(nullif("date", '''') as date) as "date", - try_parse(nullif(timestamp_col, '''') as datetimeoffset) as timestamp_col, - cast("HKD@spéçiäl & characters" as - float -) as "HKD@spéçiäl & characters", - cast(hkd_special___characters as - NVARCHAR(max)) as hkd_special___characters, - cast(nzd as - float -) as nzd, - cast(usd as - float -) as usd, - _airbyte_ab_id, - _airbyte_emitted_at, - SYSDATETIME() as _airbyte_normalized_at -from __dbt__cte__dedup_exchange_rate_ab1 --- dedup_exchange_rate -where 1 = 1 - -)-- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__dedup_exchange_rate_ab2 -select - convert(varchar(32), HashBytes(''md5'', coalesce(cast( - - - - concat(concat(coalesce(cast(id as - NVARCHAR(max)), ''''), ''-'', coalesce(cast(currency as - NVARCHAR(max)), ''''), ''-'', coalesce(cast("date" as - NVARCHAR(max)), ''''), ''-'', coalesce(cast(timestamp_col as - NVARCHAR(max)), ''''), ''-'', coalesce(cast("HKD@spéçiäl & characters" as - NVARCHAR(max)), ''''), ''-'', coalesce(cast(hkd_special___characters as - NVARCHAR(max)), ''''), ''-'', coalesce(cast(nzd as - NVARCHAR(max)), ''''), ''-'', coalesce(cast(usd as - NVARCHAR(max)), ''''),''''), '''') as - NVARCHAR(max)), '''')), 2) as _airbyte_dedup_exchange_rate_hashid, - tmp.* -from __dbt__cte__dedup_exchange_rate_ab2 tmp --- dedup_exchange_rate -where 1 = 1 - - '); - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql deleted file mode 100644 index b15582c5ec555..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql +++ /dev/null @@ -1,76 +0,0 @@ -USE [test_normalization]; - execute('create view _airbyte_test_normalization."multiple_column_names_conflicts_stg__dbt_tmp" as - -with __dbt__cte__multiple_column_names_conflicts_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "test_normalization".test_normalization._airbyte_raw_multiple_column_names_conflicts -select - json_value(_airbyte_data, ''$."id"'') as id, - json_value(_airbyte_data, ''$."User Id"'') as "User Id", - json_value(_airbyte_data, ''$."user_id"'') as user_id, - json_value(_airbyte_data, ''$."User id"'') as "User id_1", - json_value(_airbyte_data, ''$."user id"'') as "user id_2", - json_value(_airbyte_data, ''$."User@Id"'') as "User@Id", - json_value(_airbyte_data, ''$."UserId"'') as userid, - _airbyte_ab_id, - _airbyte_emitted_at, - SYSDATETIME() as _airbyte_normalized_at -from "test_normalization".test_normalization._airbyte_raw_multiple_column_names_conflicts as table_alias --- multiple_column_names_conflicts -where 1 = 1 - -), __dbt__cte__multiple_column_names_conflicts_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__multiple_column_names_conflicts_ab1 -select - cast(id as - bigint -) as id, - cast("User Id" as - NVARCHAR(max)) as "User Id", - cast(user_id as - float -) as user_id, - cast("User id_1" as - float -) as "User id_1", - cast("user id_2" as - float -) as "user id_2", - cast("User@Id" as - NVARCHAR(max)) as "User@Id", - cast(userid as - float -) as userid, - _airbyte_ab_id, - _airbyte_emitted_at, - SYSDATETIME() as _airbyte_normalized_at -from __dbt__cte__multiple_column_names_conflicts_ab1 --- multiple_column_names_conflicts -where 1 = 1 - -)-- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__multiple_column_names_conflicts_ab2 -select - convert(varchar(32), HashBytes(''md5'', coalesce(cast( - - - - concat(concat(coalesce(cast(id as - NVARCHAR(max)), ''''), ''-'', coalesce(cast("User Id" as - NVARCHAR(max)), ''''), ''-'', coalesce(cast(user_id as - NVARCHAR(max)), ''''), ''-'', coalesce(cast("User id_1" as - NVARCHAR(max)), ''''), ''-'', coalesce(cast("user id_2" as - NVARCHAR(max)), ''''), ''-'', coalesce(cast("User@Id" as - NVARCHAR(max)), ''''), ''-'', coalesce(cast(userid as - NVARCHAR(max)), ''''),''''), '''') as - NVARCHAR(max)), '''')), 2) as _airbyte_multiple_col__ames_conflicts_hashid, - tmp.* -from __dbt__cte__multiple_column_names_conflicts_ab2 tmp --- multiple_column_names_conflicts -where 1 = 1 - - '); - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql deleted file mode 100644 index 670db0869ae22..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql +++ /dev/null @@ -1,24 +0,0 @@ -{{ config( - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} -select - {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as id, - {{ json_extract_scalar('_airbyte_data', ['currency'], ['currency']) }} as currency, - {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as {{ adapter.quote('date') }}, - {{ json_extract_scalar('_airbyte_data', ['timestamp_col'], ['timestamp_col']) }} as timestamp_col, - {{ json_extract_scalar('_airbyte_data', ['HKD@spéçiäl & characters'], ['HKD@spéçiäl & characters']) }} as {{ adapter.quote('HKD@spéçiäl & characters') }}, - {{ json_extract_scalar('_airbyte_data', ['HKD_special___characters'], ['HKD_special___characters']) }} as hkd_special___characters, - {{ json_extract_scalar('_airbyte_data', ['NZD'], ['NZD']) }} as nzd, - {{ json_extract_scalar('_airbyte_data', ['USD'], ['USD']) }} as usd, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} as table_alias --- dedup_exchange_rate -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql deleted file mode 100644 index 556ece9aaaeaf..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql +++ /dev/null @@ -1,24 +0,0 @@ -{{ config( - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: {{ ref('dedup_exchange_rate_ab1') }} -select - cast(id as {{ dbt_utils.type_bigint() }}) as id, - cast(currency as {{ dbt_utils.type_string() }}) as currency, - try_parse({{ empty_string_to_null(adapter.quote('date')) }} as {{ type_date() }}) as {{ adapter.quote('date') }}, - try_parse({{ empty_string_to_null('timestamp_col') }} as {{ type_timestamp_with_timezone() }}) as timestamp_col, - cast({{ adapter.quote('HKD@spéçiäl & characters') }} as {{ dbt_utils.type_float() }}) as {{ adapter.quote('HKD@spéçiäl & characters') }}, - cast(hkd_special___characters as {{ dbt_utils.type_string() }}) as hkd_special___characters, - cast(nzd as {{ dbt_utils.type_float() }}) as nzd, - cast(usd as {{ dbt_utils.type_float() }}) as usd, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('dedup_exchange_rate_ab1') }} --- dedup_exchange_rate -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql deleted file mode 100644 index 8d96481142613..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ /dev/null @@ -1,174 +0,0 @@ -{{ config( - unique_key = "_airbyte_unique_key_scd", - schema = "test_normalization", - post_hook = [" - {% - set final_table_relation = adapter.get_relation( - database=this.database, - schema=this.schema, - identifier='dedup_exchange_rate' - ) - %} - {# - If the final table doesn't exist, then obviously we can't delete anything from it. - Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) - So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) - #} - {% - if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') - %} - -- Delete records which are no longer active: - -- This query is equivalent, but the left join version is more performant: - -- delete from final_table where unique_key in ( - -- select unique_key from scd_table where 1 = 1 - -- ) and unique_key not in ( - -- select unique_key from scd_table where active_row = 1 - -- ) - -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD - -- entries that were _updated_ recently. This is because a deleted record will have an SCD record - -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. - delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select recent_records.unique_key - from ( - select distinct _airbyte_unique_key as unique_key - from {{ this }} - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - ) recent_records - left join ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - group by _airbyte_unique_key - ) active_counts - on recent_records.unique_key = active_counts.unique_key - where active_count is null or active_count = 0 - ) - {% else %} - -- We have to have a non-empty query, so just do a noop delete - delete from {{ this }} where 1=0 - {% endif %} - ","drop view _airbyte_test_normalization.dedup_exchange_rate_stg"], - tags = [ "top-level" ] -) }} --- depends_on: ref('dedup_exchange_rate_stg') -with -{% if is_incremental() %} -new_data as ( - -- retrieve incremental "new" data - select - * - from {{ ref('dedup_exchange_rate_stg') }} - -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} - where 1 = 1 - {{ incremental_clause('_airbyte_emitted_at', this) }} -), -new_data_ids as ( - -- build a subset of _airbyte_unique_key from rows that are new - select distinct - {{ dbt_utils.surrogate_key([ - 'id', - 'currency', - 'nzd', - ]) }} as _airbyte_unique_key - from new_data -), -empty_new_data as ( - -- build an empty table to only keep the table's column types - select * from new_data where 1 = 0 -), -previous_active_scd_data as ( - -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes - select - {{ star_intersect(ref('dedup_exchange_rate_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} - from {{ this }} as this_data - -- make a join with new_data using primary key to filter active data that need to be updated only - join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key - -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) - left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id - where _airbyte_active_row = 1 -), -input_data as ( - select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from new_data - union all - select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from previous_active_scd_data -), -{% else %} -input_data as ( - select * - from {{ ref('dedup_exchange_rate_stg') }} - -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} -), -{% endif %} -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - {{ dbt_utils.surrogate_key([ - 'id', - 'currency', - 'nzd', - ]) }} as _airbyte_unique_key, - id, - currency, - {{ adapter.quote('date') }}, - timestamp_col, - {{ adapter.quote('HKD@spéçiäl & characters') }}, - hkd_special___characters, - nzd, - usd, - {{ adapter.quote('date') }} as _airbyte_start_at, - lag({{ adapter.quote('date') }}) over ( - partition by id, currency, cast(nzd as {{ dbt_utils.type_string() }}) - order by - {{ adapter.quote('date') }} desc, - _airbyte_emitted_at desc - ) as _airbyte_end_at, - case when row_number() over ( - partition by id, currency, cast(nzd as {{ dbt_utils.type_string() }}) - order by - {{ adapter.quote('date') }} desc, - _airbyte_emitted_at desc - ) = 1 then 1 else 0 end as _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_dedup_exchange_rate_hashid - from input_data -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by - _airbyte_unique_key, - _airbyte_start_at, - _airbyte_emitted_at - order by _airbyte_active_row desc, _airbyte_ab_id - ) as _airbyte_row_num, - {{ dbt_utils.surrogate_key([ - '_airbyte_unique_key', - '_airbyte_start_at', - '_airbyte_emitted_at' - ]) }} as _airbyte_unique_key_scd, - scd_data.* - from scd_data -) -select - _airbyte_unique_key, - _airbyte_unique_key_scd, - id, - currency, - {{ adapter.quote('date') }}, - timestamp_col, - {{ adapter.quote('HKD@spéçiäl & characters') }}, - hkd_special___characters, - nzd, - usd, - _airbyte_start_at, - _airbyte_end_at, - _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_dedup_exchange_rate_hashid -from dedup_data where _airbyte_row_num = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql deleted file mode 100644 index dd4432bd60a5e..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql +++ /dev/null @@ -1,27 +0,0 @@ -{{ config( - unique_key = "_airbyte_unique_key", - schema = "test_normalization", - tags = [ "top-level" ] -) }} --- Final base SQL model --- depends_on: {{ ref('dedup_exchange_rate_scd') }} -select - _airbyte_unique_key, - id, - currency, - {{ adapter.quote('date') }}, - timestamp_col, - {{ adapter.quote('HKD@spéçiäl & characters') }}, - hkd_special___characters, - nzd, - usd, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_dedup_exchange_rate_hashid -from {{ ref('dedup_exchange_rate_scd') }} --- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} -where 1 = 1 -and _airbyte_active_row = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql deleted file mode 100644 index 8a74de4c15332..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql +++ /dev/null @@ -1,29 +0,0 @@ -{{ config( - unique_key = '_airbyte_ab_id', - schema = "test_normalization", - tags = [ "top-level" ] -) }} --- Final base SQL model --- depends_on: {{ ref('exchange_rate_ab3') }} -select - id, - currency, - {{ adapter.quote('date') }}, - timestamp_col, - {{ adapter.quote('HKD@spéçiäl & characters') }}, - hkd_special___characters, - nzd, - usd, - {{ adapter.quote('column`_\'with""_quotes') }}, - datetime_tz, - datetime_no_tz, - time_tz, - time_no_tz, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_exchange_rate_hashid -from {{ ref('exchange_rate_ab3') }} --- exchange_rate from {{ source('test_normalization', '_airbyte_raw_exchange_rate') }} -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql deleted file mode 100644 index 86ec2c9e8b1b7..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql +++ /dev/null @@ -1,24 +0,0 @@ -{{ config( - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to build a hash column based on the values of this record --- depends_on: {{ ref('dedup_exchange_rate_ab2') }} -select - {{ dbt_utils.surrogate_key([ - 'id', - 'currency', - adapter.quote('date'), - 'timestamp_col', - adapter.quote('HKD@spéçiäl & characters'), - 'hkd_special___characters', - 'nzd', - 'usd', - ]) }} as _airbyte_dedup_exchange_rate_hashid, - tmp.* -from {{ ref('dedup_exchange_rate_ab2') }} tmp --- dedup_exchange_rate -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/sources.yml deleted file mode 100644 index 97bf0d05cbd40..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/sources.yml +++ /dev/null @@ -1,15 +0,0 @@ -version: 2 -sources: -- name: test_normalization - quoting: - database: true - schema: false - identifier: false - tables: - - name: _airbyte_raw_1_prefix_startwith_number - - name: _airbyte_raw_dedup_cdc_excluded - - name: _airbyte_raw_dedup_exchange_rate - - name: _airbyte_raw_exchange_rate - - name: _airbyte_raw_multiple_column_names_conflicts - - name: _airbyte_raw_pos_dedup_cdcx - - name: _airbyte_raw_renamed_dedup_cdc_excluded diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql deleted file mode 100644 index 1b22ba3f4a729..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ /dev/null @@ -1,17 +0,0 @@ - - - - delete from "test_normalization".test_normalization."dedup_exchange_rate_scd" - where (_airbyte_unique_key_scd) in ( - select (_airbyte_unique_key_scd) - from "test_normalization".test_normalization."#dedup_exchange_rate_scd__dbt_tmp" - ); - - - insert into "test_normalization".test_normalization."dedup_exchange_rate_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "hkd_special___characters", "nzd", "usd", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid") - ( - select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "hkd_special___characters", "nzd", "usd", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid" - from "test_normalization".test_normalization."#dedup_exchange_rate_scd__dbt_tmp" - ); - - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql deleted file mode 100644 index 1315385a34459..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql +++ /dev/null @@ -1,17 +0,0 @@ - - - - delete from "test_normalization".test_normalization."dedup_exchange_rate" - where (_airbyte_unique_key) in ( - select (_airbyte_unique_key) - from "test_normalization".test_normalization."#dedup_exchange_rate__dbt_tmp" - ); - - - insert into "test_normalization".test_normalization."dedup_exchange_rate" ("_airbyte_unique_key", "id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "hkd_special___characters", "nzd", "usd", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid") - ( - select "_airbyte_unique_key", "id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "hkd_special___characters", "nzd", "usd", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid" - from "test_normalization".test_normalization."#dedup_exchange_rate__dbt_tmp" - ); - - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql deleted file mode 100644 index 830e76c6f0ef8..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql +++ /dev/null @@ -1,159 +0,0 @@ - - - USE [test_normalization]; - if object_id ('test_normalization."exchange_rate__dbt_tmp_temp_view"','V') is not null - begin - drop view test_normalization."exchange_rate__dbt_tmp_temp_view" - end - - - - - USE [test_normalization]; - if object_id ('test_normalization."exchange_rate__dbt_tmp"','U') is not null - begin - drop table test_normalization."exchange_rate__dbt_tmp" - end - - - USE [test_normalization]; - EXEC('create view test_normalization."exchange_rate__dbt_tmp_temp_view" as - -with __dbt__cte__exchange_rate_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "test_normalization".test_normalization._airbyte_raw_exchange_rate -select - json_value(_airbyte_data, ''$."id"'') as id, - json_value(_airbyte_data, ''$."currency"'') as currency, - json_value(_airbyte_data, ''$."date"'') as "date", - json_value(_airbyte_data, ''$."timestamp_col"'') as timestamp_col, - json_value(_airbyte_data, ''$."HKD@spéçiäl & characters"'') as "HKD@spéçiäl & characters", - json_value(_airbyte_data, ''$."HKD_special___characters"'') as hkd_special___characters, - json_value(_airbyte_data, ''$."NZD"'') as nzd, - json_value(_airbyte_data, ''$."USD"'') as usd, - json_value(_airbyte_data, ''$."column`_''''with\"_quotes"'') as "column`_''with""_quotes", - json_value(_airbyte_data, ''$."datetime_tz"'') as datetime_tz, - json_value(_airbyte_data, ''$."datetime_no_tz"'') as datetime_no_tz, - json_value(_airbyte_data, ''$."time_tz"'') as time_tz, - json_value(_airbyte_data, ''$."time_no_tz"'') as time_no_tz, - _airbyte_ab_id, - _airbyte_emitted_at, - SYSDATETIME() as _airbyte_normalized_at -from "test_normalization".test_normalization._airbyte_raw_exchange_rate as table_alias --- exchange_rate -where 1 = 1 -), __dbt__cte__exchange_rate_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__exchange_rate_ab1 -select - cast(id as - bigint -) as id, - cast(currency as - NVARCHAR(max)) as currency, - try_parse(nullif("date", '''') as date) as "date", - try_parse(nullif(timestamp_col, '''') as datetimeoffset) as timestamp_col, - cast("HKD@spéçiäl & characters" as - float -) as "HKD@spéçiäl & characters", - cast(hkd_special___characters as - NVARCHAR(max)) as hkd_special___characters, - cast(nzd as - float -) as nzd, - cast(usd as - float -) as usd, - cast("column`_''with""_quotes" as - NVARCHAR(max)) as "column`_''with""_quotes", - try_parse(nullif(datetime_tz, '''') as datetimeoffset) as datetime_tz, - try_parse(nullif(datetime_no_tz, '''') as datetime2) as datetime_no_tz, - cast(nullif(time_tz, '''') as NVARCHAR(max)) as time_tz, - cast(nullif(time_no_tz, '''') as - time -) as time_no_tz, - _airbyte_ab_id, - _airbyte_emitted_at, - SYSDATETIME() as _airbyte_normalized_at -from __dbt__cte__exchange_rate_ab1 --- exchange_rate -where 1 = 1 -), __dbt__cte__exchange_rate_ab3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__exchange_rate_ab2 -select - convert(varchar(32), HashBytes(''md5'', coalesce(cast( - - - - concat(concat(coalesce(cast(id as - NVARCHAR(max)), ''''), ''-'', coalesce(cast(currency as - NVARCHAR(max)), ''''), ''-'', coalesce(cast("date" as - NVARCHAR(max)), ''''), ''-'', coalesce(cast(timestamp_col as - NVARCHAR(max)), ''''), ''-'', coalesce(cast("HKD@spéçiäl & characters" as - NVARCHAR(max)), ''''), ''-'', coalesce(cast(hkd_special___characters as - NVARCHAR(max)), ''''), ''-'', coalesce(cast(nzd as - NVARCHAR(max)), ''''), ''-'', coalesce(cast(usd as - NVARCHAR(max)), ''''), ''-'', coalesce(cast("column`_''with""_quotes" as - NVARCHAR(max)), ''''), ''-'', coalesce(cast(datetime_tz as - NVARCHAR(max)), ''''), ''-'', coalesce(cast(datetime_no_tz as - NVARCHAR(max)), ''''), ''-'', coalesce(cast(time_tz as - NVARCHAR(max)), ''''), ''-'', coalesce(cast(time_no_tz as - NVARCHAR(max)), ''''),''''), '''') as - NVARCHAR(max)), '''')), 2) as _airbyte_exchange_rate_hashid, - tmp.* -from __dbt__cte__exchange_rate_ab2 tmp --- exchange_rate -where 1 = 1 -)-- Final base SQL model --- depends_on: __dbt__cte__exchange_rate_ab3 -select - id, - currency, - "date", - timestamp_col, - "HKD@spéçiäl & characters", - hkd_special___characters, - nzd, - usd, - "column`_''with""_quotes", - datetime_tz, - datetime_no_tz, - time_tz, - time_no_tz, - _airbyte_ab_id, - _airbyte_emitted_at, - SYSDATETIME() as _airbyte_normalized_at, - _airbyte_exchange_rate_hashid -from __dbt__cte__exchange_rate_ab3 --- exchange_rate from "test_normalization".test_normalization._airbyte_raw_exchange_rate -where 1 = 1 - '); - - SELECT * INTO "test_normalization".test_normalization."exchange_rate__dbt_tmp" FROM - "test_normalization".test_normalization."exchange_rate__dbt_tmp_temp_view" - - - - USE [test_normalization]; - if object_id ('test_normalization."exchange_rate__dbt_tmp_temp_view"','V') is not null - begin - drop view test_normalization."exchange_rate__dbt_tmp_temp_view" - end - - - use [test_normalization]; - if EXISTS ( - SELECT * FROM - sys.indexes WHERE name = 'test_normalization_exchange_rate__dbt_tmp_cci' - AND object_id=object_id('test_normalization_exchange_rate__dbt_tmp') - ) - DROP index test_normalization.exchange_rate__dbt_tmp.test_normalization_exchange_rate__dbt_tmp_cci - CREATE CLUSTERED COLUMNSTORE INDEX test_normalization_exchange_rate__dbt_tmp_cci - ON test_normalization.exchange_rate__dbt_tmp - - - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql deleted file mode 100644 index ed018a2680b4c..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql +++ /dev/null @@ -1,77 +0,0 @@ -USE [test_normalization]; - execute('create view _airbyte_test_normalization."dedup_exchange_rate_stg__dbt_tmp" as - -with __dbt__cte__dedup_exchange_rate_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "test_normalization".test_normalization._airbyte_raw_dedup_exchange_rate -select - json_value(_airbyte_data, ''$."id"'') as id, - json_value(_airbyte_data, ''$."currency"'') as currency, - json_value(_airbyte_data, ''$."date"'') as "date", - json_value(_airbyte_data, ''$."timestamp_col"'') as timestamp_col, - json_value(_airbyte_data, ''$."HKD@spéçiäl & characters"'') as "HKD@spéçiäl & characters", - json_value(_airbyte_data, ''$."HKD_special___characters"'') as hkd_special___characters, - json_value(_airbyte_data, ''$."NZD"'') as nzd, - json_value(_airbyte_data, ''$."USD"'') as usd, - _airbyte_ab_id, - _airbyte_emitted_at, - SYSDATETIME() as _airbyte_normalized_at -from "test_normalization".test_normalization._airbyte_raw_dedup_exchange_rate as table_alias --- dedup_exchange_rate -where 1 = 1 - -), __dbt__cte__dedup_exchange_rate_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__dedup_exchange_rate_ab1 -select - cast(id as - bigint -) as id, - cast(currency as - NVARCHAR(max)) as currency, - try_parse(nullif("date", '''') as date) as "date", - try_parse(nullif(timestamp_col, '''') as datetimeoffset) as timestamp_col, - cast("HKD@spéçiäl & characters" as - float -) as "HKD@spéçiäl & characters", - cast(hkd_special___characters as - NVARCHAR(max)) as hkd_special___characters, - cast(nzd as - float -) as nzd, - cast(usd as - float -) as usd, - _airbyte_ab_id, - _airbyte_emitted_at, - SYSDATETIME() as _airbyte_normalized_at -from __dbt__cte__dedup_exchange_rate_ab1 --- dedup_exchange_rate -where 1 = 1 - -)-- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__dedup_exchange_rate_ab2 -select - convert(varchar(32), HashBytes(''md5'', coalesce(cast( - - - - concat(concat(coalesce(cast(id as - NVARCHAR(max)), ''''), ''-'', coalesce(cast(currency as - NVARCHAR(max)), ''''), ''-'', coalesce(cast("date" as - NVARCHAR(max)), ''''), ''-'', coalesce(cast(timestamp_col as - NVARCHAR(max)), ''''), ''-'', coalesce(cast("HKD@spéçiäl & characters" as - NVARCHAR(max)), ''''), ''-'', coalesce(cast(hkd_special___characters as - NVARCHAR(max)), ''''), ''-'', coalesce(cast(nzd as - NVARCHAR(max)), ''''), ''-'', coalesce(cast(usd as - NVARCHAR(max)), ''''),''''), '''') as - NVARCHAR(max)), '''')), 2) as _airbyte_dedup_exchange_rate_hashid, - tmp.* -from __dbt__cte__dedup_exchange_rate_ab2 tmp --- dedup_exchange_rate -where 1 = 1 - - '); - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/dbt_project.yml deleted file mode 100755 index f187620c7c7c9..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/dbt_project.yml +++ /dev/null @@ -1,121 +0,0 @@ -name: airbyte_utils -version: '1.0' -config-version: 2 -profile: normalize -model-paths: -- models -docs-paths: -- docs -analysis-paths: -- analysis -test-paths: -- tests -seed-paths: -- data -macro-paths: -- macros -target-path: ../build -log-path: ../logs -packages-install-path: /dbt -clean-targets: -- build -- dbt_modules -quoting: - database: true - schema: false - identifier: true -models: - airbyte_utils: - +materialized: table - generated: - airbyte_ctes: - +tags: airbyte_internal_cte - +materialized: ephemeral - airbyte_incremental: - +tags: incremental_tables - +materialized: table - airbyte_tables: - +tags: normalized_tables - +materialized: table - airbyte_views: - +tags: airbyte_internal_views - +materialized: view -vars: - dbt_utils_dispatch_list: - - airbyte_utils - json_column: _airbyte_data - models_to_source: - nested_stream_with_co_1g_into_long_names_ab1: test_normalization._airbyte_raw_nested_s__lting_into_long_names - nested_stream_with_co_1g_into_long_names_ab2: test_normalization._airbyte_raw_nested_s__lting_into_long_names - nested_stream_with_co_1g_into_long_names_stg: test_normalization._airbyte_raw_nested_s__lting_into_long_names - nested_stream_with_co_1g_into_long_names_scd: test_normalization._airbyte_raw_nested_s__lting_into_long_names - nested_stream_with_co__lting_into_long_names: test_normalization._airbyte_raw_nested_s__lting_into_long_names - non_nested_stream_wit_1g_into_long_names_ab1: test_normalization._airbyte_raw_non_nest__lting_into_long_names - non_nested_stream_wit_1g_into_long_names_ab2: test_normalization._airbyte_raw_non_nest__lting_into_long_names - non_nested_stream_wit_1g_into_long_names_ab3: test_normalization._airbyte_raw_non_nest__lting_into_long_names - non_nested_stream_wit__lting_into_long_names: test_normalization._airbyte_raw_non_nest__lting_into_long_names - some_stream_that_was_empty_ab1: test_normalization._airbyte_raw_some_stream_that_was_empty - some_stream_that_was_empty_ab2: test_normalization._airbyte_raw_some_stream_that_was_empty - some_stream_that_was_empty_stg: test_normalization._airbyte_raw_some_stream_that_was_empty - some_stream_that_was_empty_scd: test_normalization._airbyte_raw_some_stream_that_was_empty - some_stream_that_was_empty: test_normalization._airbyte_raw_some_stream_that_was_empty - simple_stream_with_na_1g_into_long_names_ab1: test_normalization_namespace._airbyte_raw_simple_s__lting_into_long_names - simple_stream_with_na_1g_into_long_names_ab2: test_normalization_namespace._airbyte_raw_simple_s__lting_into_long_names - simple_stream_with_na_1g_into_long_names_ab3: test_normalization_namespace._airbyte_raw_simple_s__lting_into_long_names - simple_stream_with_na__lting_into_long_names: test_normalization_namespace._airbyte_raw_simple_s__lting_into_long_names - conflict_stream_name_ab1: test_normalization._airbyte_raw_conflict_stream_name - conflict_stream_name_ab2: test_normalization._airbyte_raw_conflict_stream_name - conflict_stream_name_ab3: test_normalization._airbyte_raw_conflict_stream_name - conflict_stream_name: test_normalization._airbyte_raw_conflict_stream_name - conflict_stream_scalar_ab1: test_normalization._airbyte_raw_conflict_stream_scalar - conflict_stream_scalar_ab2: test_normalization._airbyte_raw_conflict_stream_scalar - conflict_stream_scalar_ab3: test_normalization._airbyte_raw_conflict_stream_scalar - conflict_stream_scalar: test_normalization._airbyte_raw_conflict_stream_scalar - conflict_stream_array_ab1: test_normalization._airbyte_raw_conflict_stream_array - conflict_stream_array_ab2: test_normalization._airbyte_raw_conflict_stream_array - conflict_stream_array_ab3: test_normalization._airbyte_raw_conflict_stream_array - conflict_stream_array: test_normalization._airbyte_raw_conflict_stream_array - unnest_alias_ab1: test_normalization._airbyte_raw_unnest_alias - unnest_alias_ab2: test_normalization._airbyte_raw_unnest_alias - unnest_alias_ab3: test_normalization._airbyte_raw_unnest_alias - unnest_alias: test_normalization._airbyte_raw_unnest_alias - arrays_ab1: test_normalization._airbyte_raw_arrays - arrays_ab2: test_normalization._airbyte_raw_arrays - arrays_ab3: test_normalization._airbyte_raw_arrays - arrays: test_normalization._airbyte_raw_arrays - nested_stream_with_co_2g_names_partition_ab1: test_normalization._airbyte_raw_nested_s__lting_into_long_names - nested_stream_with_co_2g_names_partition_ab2: test_normalization._airbyte_raw_nested_s__lting_into_long_names - nested_stream_with_co_2g_names_partition_ab3: test_normalization._airbyte_raw_nested_s__lting_into_long_names - nested_stream_with_co___long_names_partition: test_normalization._airbyte_raw_nested_s__lting_into_long_names - conflict_stream_name__2flict_stream_name_ab1: test_normalization._airbyte_raw_conflict_stream_name - conflict_stream_name__2flict_stream_name_ab2: test_normalization._airbyte_raw_conflict_stream_name - conflict_stream_name__2flict_stream_name_ab3: test_normalization._airbyte_raw_conflict_stream_name - conflict_stream_name_conflict_stream_name: test_normalization._airbyte_raw_conflict_stream_name - unnest_alias_children_ab1: test_normalization._airbyte_raw_unnest_alias - unnest_alias_children_ab2: test_normalization._airbyte_raw_unnest_alias - unnest_alias_children_ab3: test_normalization._airbyte_raw_unnest_alias - unnest_alias_children: test_normalization._airbyte_raw_unnest_alias - arrays_nested_array_parent_ab1: test_normalization._airbyte_raw_arrays - arrays_nested_array_parent_ab2: test_normalization._airbyte_raw_arrays - arrays_nested_array_parent_ab3: test_normalization._airbyte_raw_arrays - arrays_nested_array_parent: test_normalization._airbyte_raw_arrays - nested_stream_with_co_3double_array_data_ab1: test_normalization._airbyte_raw_nested_s__lting_into_long_names - nested_stream_with_co_3double_array_data_ab2: test_normalization._airbyte_raw_nested_s__lting_into_long_names - nested_stream_with_co_3double_array_data_ab3: test_normalization._airbyte_raw_nested_s__lting_into_long_names - nested_stream_with_co__ion_double_array_data: test_normalization._airbyte_raw_nested_s__lting_into_long_names - nested_stream_with_co_3es_partition_data_ab1: test_normalization._airbyte_raw_nested_s__lting_into_long_names - nested_stream_with_co_3es_partition_data_ab2: test_normalization._airbyte_raw_nested_s__lting_into_long_names - nested_stream_with_co_3es_partition_data_ab3: test_normalization._airbyte_raw_nested_s__lting_into_long_names - nested_stream_with_co___names_partition_data: test_normalization._airbyte_raw_nested_s__lting_into_long_names - conflict_stream_name__3flict_stream_name_ab1: test_normalization._airbyte_raw_conflict_stream_name - conflict_stream_name__3flict_stream_name_ab2: test_normalization._airbyte_raw_conflict_stream_name - conflict_stream_name__3flict_stream_name_ab3: test_normalization._airbyte_raw_conflict_stream_name - conflict_stream_name____conflict_stream_name: test_normalization._airbyte_raw_conflict_stream_name - unnest_alias_children_owner_ab1: test_normalization._airbyte_raw_unnest_alias - unnest_alias_children_owner_ab2: test_normalization._airbyte_raw_unnest_alias - unnest_alias_children_owner_ab3: test_normalization._airbyte_raw_unnest_alias - unnest_alias_children_owner: test_normalization._airbyte_raw_unnest_alias - unnest_alias_children_4mn___with__quotes_ab1: test_normalization._airbyte_raw_unnest_alias - unnest_alias_children_4mn___with__quotes_ab2: test_normalization._airbyte_raw_unnest_alias - unnest_alias_children_4mn___with__quotes_ab3: test_normalization._airbyte_raw_unnest_alias - unnest_alias_children__column___with__quotes: test_normalization._airbyte_raw_unnest_alias diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql deleted file mode 100644 index e5f3e4859deba..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql +++ /dev/null @@ -1,74 +0,0 @@ - - - create table - test_normalization.`nested_stream_with_co_1g_into_long_names_scd__dbt_tmp` - as ( - --- depends_on: ref('nested_stream_with_co_1g_into_long_names_stg') -with - -input_data as ( - select * - from _airbyte_test_normalization.`nested_stream_with_co_1g_into_long_names_stg` - -- nested_stream_with_co__lting_into_long_names from test_normalization._airbyte_raw_nested_s__lting_into_long_names -), - -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - md5(cast(concat(coalesce(cast(id as char), '')) as char)) as _airbyte_unique_key, - id, - `date`, - `partition`, - `date` as _airbyte_start_at, - lag(`date`) over ( - partition by id - order by - `date` is null asc, - `date` desc, - _airbyte_emitted_at desc - ) as _airbyte_end_at, - case when row_number() over ( - partition by id - order by - `date` is null asc, - `date` desc, - _airbyte_emitted_at desc - ) = 1 then 1 else 0 end as _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_nested_strea__nto_long_names_hashid - from input_data -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by - _airbyte_unique_key, - _airbyte_start_at, - _airbyte_emitted_at - order by _airbyte_active_row desc, _airbyte_ab_id - ) as _airbyte_row_num, - md5(cast(concat(coalesce(cast(_airbyte_unique_key as char), ''), '-', coalesce(cast(_airbyte_start_at as char), ''), '-', coalesce(cast(_airbyte_emitted_at as char), '')) as char)) as _airbyte_unique_key_scd, - scd_data.* - from scd_data -) -select - _airbyte_unique_key, - _airbyte_unique_key_scd, - id, - `date`, - `partition`, - _airbyte_start_at, - _airbyte_end_at, - _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - - CURRENT_TIMESTAMP - as _airbyte_normalized_at, - _airbyte_nested_strea__nto_long_names_hashid -from dedup_data where _airbyte_row_num = 1 - ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co___long_names_partition.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co___long_names_partition.sql deleted file mode 100644 index 9d4975c21dac1..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co___long_names_partition.sql +++ /dev/null @@ -1,71 +0,0 @@ - - - create table - test_normalization.`nested_stream_with_co___long_names_partition__dbt_tmp` - as ( - -with __dbt__cte__nested_stream_with_co_2g_names_partition_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: test_normalization.`nested_stream_with_co_1g_into_long_names_scd` -select - _airbyte_nested_strea__nto_long_names_hashid, - json_extract(`partition`, - '$."double_array_data"') as double_array_data, - json_extract(`partition`, - '$."DATA"') as `DATA`, - _airbyte_ab_id, - _airbyte_emitted_at, - - CURRENT_TIMESTAMP - as _airbyte_normalized_at -from test_normalization.`nested_stream_with_co_1g_into_long_names_scd` as table_alias --- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition -where 1 = 1 -and `partition` is not null - -), __dbt__cte__nested_stream_with_co_2g_names_partition_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__nested_stream_with_co_2g_names_partition_ab1 -select - _airbyte_nested_strea__nto_long_names_hashid, - double_array_data, - `DATA`, - _airbyte_ab_id, - _airbyte_emitted_at, - - CURRENT_TIMESTAMP - as _airbyte_normalized_at -from __dbt__cte__nested_stream_with_co_2g_names_partition_ab1 --- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition -where 1 = 1 - -), __dbt__cte__nested_stream_with_co_2g_names_partition_ab3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__nested_stream_with_co_2g_names_partition_ab2 -select - md5(cast(concat(coalesce(cast(_airbyte_nested_strea__nto_long_names_hashid as char), ''), '-', coalesce(cast(double_array_data as char), ''), '-', coalesce(cast(`DATA` as char), '')) as char)) as _airbyte_partition_hashid, - tmp.* -from __dbt__cte__nested_stream_with_co_2g_names_partition_ab2 tmp --- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition -where 1 = 1 - -)-- Final base SQL model --- depends_on: __dbt__cte__nested_stream_with_co_2g_names_partition_ab3 -select - _airbyte_nested_strea__nto_long_names_hashid, - double_array_data, - `DATA`, - _airbyte_ab_id, - _airbyte_emitted_at, - - CURRENT_TIMESTAMP - as _airbyte_normalized_at, - _airbyte_partition_hashid -from __dbt__cte__nested_stream_with_co_2g_names_partition_ab3 --- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition from test_normalization.`nested_stream_with_co_1g_into_long_names_scd` -where 1 = 1 - - ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co___names_partition_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co___names_partition_data.sql deleted file mode 100644 index e68283420cfdc..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co___names_partition_data.sql +++ /dev/null @@ -1,113 +0,0 @@ - - - create table - test_normalization.`nested_stream_with_co___names_partition_data__dbt_tmp` - as ( - -with __dbt__cte__nested_stream_with_co_3es_partition_data_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: test_normalization.`nested_stream_with_co___long_names_partition` -with numbers as ( - - - - - with p as ( - select 0 as generated_number union all select 1 - ), unioned as ( - - select - - - p0.generated_number * power(2, 0) - - - + 1 - as generated_number - - from - - - p as p0 - - - - ) - - select * - from unioned - where generated_number <= 1 - order by generated_number - - - ), - joined as ( - select - _airbyte_partition_hashid as _airbyte_hashid, - - json_extract(`DATA`, concat("$[", numbers.generated_number - 1, "][0]")) as _airbyte_nested_data - from test_normalization.`nested_stream_with_co___long_names_partition` - cross join numbers - -- only generate the number of records in the cross join that corresponds - -- to the number of items in test_normalization.`nested_stream_with_co___long_names_partition`.`DATA` - where numbers.generated_number <= json_length(`DATA`) - ) -select - _airbyte_partition_hashid, - json_value(_airbyte_nested_data, - '$."currency"' RETURNING CHAR) as currency, - _airbyte_ab_id, - _airbyte_emitted_at, - - CURRENT_TIMESTAMP - as _airbyte_normalized_at -from test_normalization.`nested_stream_with_co___long_names_partition` as table_alias --- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA -left join joined on _airbyte_partition_hashid = joined._airbyte_hashid -where 1 = 1 -and `DATA` is not null - -), __dbt__cte__nested_stream_with_co_3es_partition_data_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__nested_stream_with_co_3es_partition_data_ab1 -select - _airbyte_partition_hashid, - cast(currency as char(1024)) as currency, - _airbyte_ab_id, - _airbyte_emitted_at, - - CURRENT_TIMESTAMP - as _airbyte_normalized_at -from __dbt__cte__nested_stream_with_co_3es_partition_data_ab1 --- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA -where 1 = 1 - -), __dbt__cte__nested_stream_with_co_3es_partition_data_ab3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__nested_stream_with_co_3es_partition_data_ab2 -select - md5(cast(concat(coalesce(cast(_airbyte_partition_hashid as char), ''), '-', coalesce(cast(currency as char), '')) as char)) as _airbyte_data_hashid, - tmp.* -from __dbt__cte__nested_stream_with_co_3es_partition_data_ab2 tmp --- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA -where 1 = 1 - -)-- Final base SQL model --- depends_on: __dbt__cte__nested_stream_with_co_3es_partition_data_ab3 -select - _airbyte_partition_hashid, - currency, - _airbyte_ab_id, - _airbyte_emitted_at, - - CURRENT_TIMESTAMP - as _airbyte_normalized_at, - _airbyte_data_hashid -from __dbt__cte__nested_stream_with_co_3es_partition_data_ab3 --- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA from test_normalization.`nested_stream_with_co___long_names_partition` -where 1 = 1 - - ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co__ion_double_array_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co__ion_double_array_data.sql deleted file mode 100644 index 4b276edcc316f..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co__ion_double_array_data.sql +++ /dev/null @@ -1,113 +0,0 @@ - - - create table - test_normalization.`nested_stream_with_co__ion_double_array_data__dbt_tmp` - as ( - -with __dbt__cte__nested_stream_with_co_3double_array_data_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: test_normalization.`nested_stream_with_co___long_names_partition` -with numbers as ( - - - - - with p as ( - select 0 as generated_number union all select 1 - ), unioned as ( - - select - - - p0.generated_number * power(2, 0) - - - + 1 - as generated_number - - from - - - p as p0 - - - - ) - - select * - from unioned - where generated_number <= 2 - order by generated_number - - - ), - joined as ( - select - _airbyte_partition_hashid as _airbyte_hashid, - - json_extract(double_array_data, concat("$[", numbers.generated_number - 1, "][0]")) as _airbyte_nested_data - from test_normalization.`nested_stream_with_co___long_names_partition` - cross join numbers - -- only generate the number of records in the cross join that corresponds - -- to the number of items in test_normalization.`nested_stream_with_co___long_names_partition`.double_array_data - where numbers.generated_number <= json_length(double_array_data) - ) -select - _airbyte_partition_hashid, - json_value(_airbyte_nested_data, - '$."id"' RETURNING CHAR) as id, - _airbyte_ab_id, - _airbyte_emitted_at, - - CURRENT_TIMESTAMP - as _airbyte_normalized_at -from test_normalization.`nested_stream_with_co___long_names_partition` as table_alias --- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data -left join joined on _airbyte_partition_hashid = joined._airbyte_hashid -where 1 = 1 -and double_array_data is not null - -), __dbt__cte__nested_stream_with_co_3double_array_data_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__nested_stream_with_co_3double_array_data_ab1 -select - _airbyte_partition_hashid, - cast(id as char(1024)) as id, - _airbyte_ab_id, - _airbyte_emitted_at, - - CURRENT_TIMESTAMP - as _airbyte_normalized_at -from __dbt__cte__nested_stream_with_co_3double_array_data_ab1 --- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data -where 1 = 1 - -), __dbt__cte__nested_stream_with_co_3double_array_data_ab3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__nested_stream_with_co_3double_array_data_ab2 -select - md5(cast(concat(coalesce(cast(_airbyte_partition_hashid as char), ''), '-', coalesce(cast(id as char), '')) as char)) as _airbyte_double_array_data_hashid, - tmp.* -from __dbt__cte__nested_stream_with_co_3double_array_data_ab2 tmp --- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data -where 1 = 1 - -)-- Final base SQL model --- depends_on: __dbt__cte__nested_stream_with_co_3double_array_data_ab3 -select - _airbyte_partition_hashid, - id, - _airbyte_ab_id, - _airbyte_emitted_at, - - CURRENT_TIMESTAMP - as _airbyte_normalized_at, - _airbyte_double_array_data_hashid -from __dbt__cte__nested_stream_with_co_3double_array_data_ab3 --- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data from test_normalization.`nested_stream_with_co___long_names_partition` -where 1 = 1 - - ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co__lting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co__lting_into_long_names.sql deleted file mode 100644 index 8be6ef88d622a..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co__lting_into_long_names.sql +++ /dev/null @@ -1,25 +0,0 @@ - - - create table - test_normalization.`nested_stream_with_co__lting_into_long_names__dbt_tmp` - as ( - --- Final base SQL model --- depends_on: test_normalization.`nested_stream_with_co_1g_into_long_names_scd` -select - _airbyte_unique_key, - id, - `date`, - `partition`, - _airbyte_ab_id, - _airbyte_emitted_at, - - CURRENT_TIMESTAMP - as _airbyte_normalized_at, - _airbyte_nested_strea__nto_long_names_hashid -from test_normalization.`nested_stream_with_co_1g_into_long_names_scd` --- nested_stream_with_co__lting_into_long_names from test_normalization._airbyte_raw_nested_s__lting_into_long_names -where 1 = 1 -and _airbyte_active_row = 1 - - ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_1g_into_long_names_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_1g_into_long_names_ab1.sql deleted file mode 100644 index d638e7a898ff3..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_1g_into_long_names_ab1.sql +++ /dev/null @@ -1,19 +0,0 @@ -{{ config( - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ source('test_normalization', '_airbyte_raw_nested_s__lting_into_long_names') }} -select - {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as id, - {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as {{ adapter.quote('date') }}, - {{ json_extract('table_alias', '_airbyte_data', ['partition'], ['partition']) }} as {{ adapter.quote('partition') }}, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ source('test_normalization', '_airbyte_raw_nested_s__lting_into_long_names') }} as table_alias --- nested_stream_with_co__lting_into_long_names -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_1g_into_long_names_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_1g_into_long_names_ab2.sql deleted file mode 100644 index a86a84248a87c..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_1g_into_long_names_ab2.sql +++ /dev/null @@ -1,19 +0,0 @@ -{{ config( - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: {{ ref('nested_stream_with_co_1g_into_long_names_ab1') }} -select - cast(id as {{ dbt_utils.type_string() }}(1024)) as id, - cast({{ adapter.quote('date') }} as {{ dbt_utils.type_string() }}(1024)) as {{ adapter.quote('date') }}, - cast({{ adapter.quote('partition') }} as {{ type_json() }}) as {{ adapter.quote('partition') }}, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('nested_stream_with_co_1g_into_long_names_ab1') }} --- nested_stream_with_co__lting_into_long_names -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_2g_names_partition_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_2g_names_partition_ab1.sql deleted file mode 100644 index 427a929211b27..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_2g_names_partition_ab1.sql +++ /dev/null @@ -1,19 +0,0 @@ -{{ config( - schema = "_airbyte_test_normalization", - tags = [ "nested-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ ref('nested_stream_with_co_1g_into_long_names_scd') }} -select - _airbyte_nested_strea__nto_long_names_hashid, - {{ json_extract_array(adapter.quote('partition'), ['double_array_data'], ['double_array_data']) }} as double_array_data, - {{ json_extract_array(adapter.quote('partition'), ['DATA'], ['DATA']) }} as {{ adapter.quote('DATA') }}, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('nested_stream_with_co_1g_into_long_names_scd') }} as table_alias --- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition -where 1 = 1 -and {{ adapter.quote('partition') }} is not null -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_3double_array_data_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_3double_array_data_ab1.sql deleted file mode 100644 index a8ca4bbb7d40f..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_3double_array_data_ab1.sql +++ /dev/null @@ -1,20 +0,0 @@ -{{ config( - schema = "_airbyte_test_normalization", - tags = [ "nested-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ ref('nested_stream_with_co___long_names_partition') }} -{{ unnest_cte(ref('nested_stream_with_co___long_names_partition'), 'partition', 'double_array_data') }} -select - _airbyte_partition_hashid, - {{ json_extract_scalar(unnested_column_value('double_array_data'), ['id'], ['id']) }} as id, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('nested_stream_with_co___long_names_partition') }} as table_alias --- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data -{{ cross_join_unnest('partition', 'double_array_data') }} -where 1 = 1 -and double_array_data is not null -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_3es_partition_data_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_3es_partition_data_ab1.sql deleted file mode 100644 index cdf1151ee10d7..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_3es_partition_data_ab1.sql +++ /dev/null @@ -1,20 +0,0 @@ -{{ config( - schema = "_airbyte_test_normalization", - tags = [ "nested-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ ref('nested_stream_with_co___long_names_partition') }} -{{ unnest_cte(ref('nested_stream_with_co___long_names_partition'), 'partition', adapter.quote('DATA')) }} -select - _airbyte_partition_hashid, - {{ json_extract_scalar(unnested_column_value(adapter.quote('DATA')), ['currency'], ['currency']) }} as currency, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('nested_stream_with_co___long_names_partition') }} as table_alias --- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA -{{ cross_join_unnest('partition', adapter.quote('DATA')) }} -where 1 = 1 -and {{ adapter.quote('DATA') }} is not null -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql deleted file mode 100644 index 9ffb6bd5558cc..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql +++ /dev/null @@ -1,162 +0,0 @@ -{{ config( - unique_key = "_airbyte_unique_key_scd", - schema = "test_normalization", - post_hook = [" - {% - set final_table_relation = adapter.get_relation( - database=this.database, - schema=this.schema, - identifier='nested_stream_with_co__lting_into_long_names' - ) - %} - {# - If the final table doesn't exist, then obviously we can't delete anything from it. - Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) - So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) - #} - {% - if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') - %} - -- Delete records which are no longer active: - -- This query is equivalent, but the left join version is more performant: - -- delete from final_table where unique_key in ( - -- select unique_key from scd_table where 1 = 1 - -- ) and unique_key not in ( - -- select unique_key from scd_table where active_row = 1 - -- ) - -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD - -- entries that were _updated_ recently. This is because a deleted record will have an SCD record - -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. - delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select recent_records.unique_key - from ( - select distinct _airbyte_unique_key as unique_key - from {{ this }} - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_co__lting_into_long_names')) }} - ) recent_records - left join ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_co__lting_into_long_names')) }} - group by _airbyte_unique_key - ) active_counts - on recent_records.unique_key = active_counts.unique_key - where active_count is null or active_count = 0 - ) - {% else %} - -- We have to have a non-empty query, so just do a noop delete - delete from {{ this }} where 1=0 - {% endif %} - ","drop view _airbyte_test_normalization.nested_stream_with_co_1g_into_long_names_stg"], - tags = [ "top-level" ] -) }} --- depends_on: ref('nested_stream_with_co_1g_into_long_names_stg') -with -{% if is_incremental() %} -new_data as ( - -- retrieve incremental "new" data - select - * - from {{ ref('nested_stream_with_co_1g_into_long_names_stg') }} - -- nested_stream_with_co__lting_into_long_names from {{ source('test_normalization', '_airbyte_raw_nested_s__lting_into_long_names') }} - where 1 = 1 - {{ incremental_clause('_airbyte_emitted_at', this) }} -), -new_data_ids as ( - -- build a subset of _airbyte_unique_key from rows that are new - select distinct - {{ dbt_utils.surrogate_key([ - 'id', - ]) }} as _airbyte_unique_key - from new_data -), -empty_new_data as ( - -- build an empty table to only keep the table's column types - select * from new_data where 1 = 0 -), -previous_active_scd_data as ( - -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes - select - {{ star_intersect(ref('nested_stream_with_co_1g_into_long_names_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} - from {{ this }} as this_data - -- make a join with new_data using primary key to filter active data that need to be updated only - join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key - -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) - left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id - where _airbyte_active_row = 1 -), -input_data as ( - select {{ dbt_utils.star(ref('nested_stream_with_co_1g_into_long_names_stg')) }} from new_data - union all - select {{ dbt_utils.star(ref('nested_stream_with_co_1g_into_long_names_stg')) }} from previous_active_scd_data -), -{% else %} -input_data as ( - select * - from {{ ref('nested_stream_with_co_1g_into_long_names_stg') }} - -- nested_stream_with_co__lting_into_long_names from {{ source('test_normalization', '_airbyte_raw_nested_s__lting_into_long_names') }} -), -{% endif %} -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - {{ dbt_utils.surrogate_key([ - 'id', - ]) }} as _airbyte_unique_key, - id, - {{ adapter.quote('date') }}, - {{ adapter.quote('partition') }}, - {{ adapter.quote('date') }} as _airbyte_start_at, - lag({{ adapter.quote('date') }}) over ( - partition by id - order by - {{ adapter.quote('date') }} is null asc, - {{ adapter.quote('date') }} desc, - _airbyte_emitted_at desc - ) as _airbyte_end_at, - case when row_number() over ( - partition by id - order by - {{ adapter.quote('date') }} is null asc, - {{ adapter.quote('date') }} desc, - _airbyte_emitted_at desc - ) = 1 then 1 else 0 end as _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_nested_strea__nto_long_names_hashid - from input_data -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by - _airbyte_unique_key, - _airbyte_start_at, - _airbyte_emitted_at - order by _airbyte_active_row desc, _airbyte_ab_id - ) as _airbyte_row_num, - {{ dbt_utils.surrogate_key([ - '_airbyte_unique_key', - '_airbyte_start_at', - '_airbyte_emitted_at' - ]) }} as _airbyte_unique_key_scd, - scd_data.* - from scd_data -) -select - _airbyte_unique_key, - _airbyte_unique_key_scd, - id, - {{ adapter.quote('date') }}, - {{ adapter.quote('partition') }}, - _airbyte_start_at, - _airbyte_end_at, - _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_nested_strea__nto_long_names_hashid -from dedup_data where _airbyte_row_num = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co___long_names_partition.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co___long_names_partition.sql deleted file mode 100644 index 0c8adc779de9f..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co___long_names_partition.sql +++ /dev/null @@ -1,19 +0,0 @@ -{{ config( - schema = "test_normalization", - tags = [ "nested" ] -) }} --- Final base SQL model --- depends_on: {{ ref('nested_stream_with_co_2g_names_partition_ab3') }} -select - _airbyte_nested_strea__nto_long_names_hashid, - double_array_data, - {{ adapter.quote('DATA') }}, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_partition_hashid -from {{ ref('nested_stream_with_co_2g_names_partition_ab3') }} --- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition from {{ ref('nested_stream_with_co_1g_into_long_names_scd') }} -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co___names_partition_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co___names_partition_data.sql deleted file mode 100644 index 92e44abc92988..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co___names_partition_data.sql +++ /dev/null @@ -1,18 +0,0 @@ -{{ config( - schema = "test_normalization", - tags = [ "nested" ] -) }} --- Final base SQL model --- depends_on: {{ ref('nested_stream_with_co_3es_partition_data_ab3') }} -select - _airbyte_partition_hashid, - currency, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_data_hashid -from {{ ref('nested_stream_with_co_3es_partition_data_ab3') }} --- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA from {{ ref('nested_stream_with_co___long_names_partition') }} -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co__ion_double_array_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co__ion_double_array_data.sql deleted file mode 100644 index 6a17d6258b3e6..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co__ion_double_array_data.sql +++ /dev/null @@ -1,18 +0,0 @@ -{{ config( - schema = "test_normalization", - tags = [ "nested" ] -) }} --- Final base SQL model --- depends_on: {{ ref('nested_stream_with_co_3double_array_data_ab3') }} -select - _airbyte_partition_hashid, - id, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_double_array_data_hashid -from {{ ref('nested_stream_with_co_3double_array_data_ab3') }} --- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data from {{ ref('nested_stream_with_co___long_names_partition') }} -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co__lting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co__lting_into_long_names.sql deleted file mode 100644 index 0ea84390902e9..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co__lting_into_long_names.sql +++ /dev/null @@ -1,22 +0,0 @@ -{{ config( - unique_key = "_airbyte_unique_key", - schema = "test_normalization", - tags = [ "top-level" ] -) }} --- Final base SQL model --- depends_on: {{ ref('nested_stream_with_co_1g_into_long_names_scd') }} -select - _airbyte_unique_key, - id, - {{ adapter.quote('date') }}, - {{ adapter.quote('partition') }}, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_nested_strea__nto_long_names_hashid -from {{ ref('nested_stream_with_co_1g_into_long_names_scd') }} --- nested_stream_with_co__lting_into_long_names from {{ source('test_normalization', '_airbyte_raw_nested_s__lting_into_long_names') }} -where 1 = 1 -and _airbyte_active_row = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/sources.yml deleted file mode 100644 index 50def309c8c44..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/sources.yml +++ /dev/null @@ -1,23 +0,0 @@ -version: 2 -sources: -- name: test_normalization - quoting: - database: true - schema: false - identifier: false - tables: - - name: _airbyte_raw_arrays - - name: _airbyte_raw_conflict_stream_array - - name: _airbyte_raw_conflict_stream_name - - name: _airbyte_raw_conflict_stream_scalar - - name: _airbyte_raw_nested_s__lting_into_long_names - - name: _airbyte_raw_non_nest__lting_into_long_names - - name: _airbyte_raw_some_stream_that_was_empty - - name: _airbyte_raw_unnest_alias -- name: test_normalization_namespace - quoting: - database: true - schema: false - identifier: false - tables: - - name: _airbyte_raw_simple_s__lting_into_long_names diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql deleted file mode 100644 index e5f3e4859deba..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql +++ /dev/null @@ -1,74 +0,0 @@ - - - create table - test_normalization.`nested_stream_with_co_1g_into_long_names_scd__dbt_tmp` - as ( - --- depends_on: ref('nested_stream_with_co_1g_into_long_names_stg') -with - -input_data as ( - select * - from _airbyte_test_normalization.`nested_stream_with_co_1g_into_long_names_stg` - -- nested_stream_with_co__lting_into_long_names from test_normalization._airbyte_raw_nested_s__lting_into_long_names -), - -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - md5(cast(concat(coalesce(cast(id as char), '')) as char)) as _airbyte_unique_key, - id, - `date`, - `partition`, - `date` as _airbyte_start_at, - lag(`date`) over ( - partition by id - order by - `date` is null asc, - `date` desc, - _airbyte_emitted_at desc - ) as _airbyte_end_at, - case when row_number() over ( - partition by id - order by - `date` is null asc, - `date` desc, - _airbyte_emitted_at desc - ) = 1 then 1 else 0 end as _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_nested_strea__nto_long_names_hashid - from input_data -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by - _airbyte_unique_key, - _airbyte_start_at, - _airbyte_emitted_at - order by _airbyte_active_row desc, _airbyte_ab_id - ) as _airbyte_row_num, - md5(cast(concat(coalesce(cast(_airbyte_unique_key as char), ''), '-', coalesce(cast(_airbyte_start_at as char), ''), '-', coalesce(cast(_airbyte_emitted_at as char), '')) as char)) as _airbyte_unique_key_scd, - scd_data.* - from scd_data -) -select - _airbyte_unique_key, - _airbyte_unique_key_scd, - id, - `date`, - `partition`, - _airbyte_start_at, - _airbyte_end_at, - _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - - CURRENT_TIMESTAMP - as _airbyte_normalized_at, - _airbyte_nested_strea__nto_long_names_hashid -from dedup_data where _airbyte_row_num = 1 - ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co___long_names_partition.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co___long_names_partition.sql deleted file mode 100644 index 9d4975c21dac1..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co___long_names_partition.sql +++ /dev/null @@ -1,71 +0,0 @@ - - - create table - test_normalization.`nested_stream_with_co___long_names_partition__dbt_tmp` - as ( - -with __dbt__cte__nested_stream_with_co_2g_names_partition_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: test_normalization.`nested_stream_with_co_1g_into_long_names_scd` -select - _airbyte_nested_strea__nto_long_names_hashid, - json_extract(`partition`, - '$."double_array_data"') as double_array_data, - json_extract(`partition`, - '$."DATA"') as `DATA`, - _airbyte_ab_id, - _airbyte_emitted_at, - - CURRENT_TIMESTAMP - as _airbyte_normalized_at -from test_normalization.`nested_stream_with_co_1g_into_long_names_scd` as table_alias --- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition -where 1 = 1 -and `partition` is not null - -), __dbt__cte__nested_stream_with_co_2g_names_partition_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__nested_stream_with_co_2g_names_partition_ab1 -select - _airbyte_nested_strea__nto_long_names_hashid, - double_array_data, - `DATA`, - _airbyte_ab_id, - _airbyte_emitted_at, - - CURRENT_TIMESTAMP - as _airbyte_normalized_at -from __dbt__cte__nested_stream_with_co_2g_names_partition_ab1 --- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition -where 1 = 1 - -), __dbt__cte__nested_stream_with_co_2g_names_partition_ab3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__nested_stream_with_co_2g_names_partition_ab2 -select - md5(cast(concat(coalesce(cast(_airbyte_nested_strea__nto_long_names_hashid as char), ''), '-', coalesce(cast(double_array_data as char), ''), '-', coalesce(cast(`DATA` as char), '')) as char)) as _airbyte_partition_hashid, - tmp.* -from __dbt__cte__nested_stream_with_co_2g_names_partition_ab2 tmp --- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition -where 1 = 1 - -)-- Final base SQL model --- depends_on: __dbt__cte__nested_stream_with_co_2g_names_partition_ab3 -select - _airbyte_nested_strea__nto_long_names_hashid, - double_array_data, - `DATA`, - _airbyte_ab_id, - _airbyte_emitted_at, - - CURRENT_TIMESTAMP - as _airbyte_normalized_at, - _airbyte_partition_hashid -from __dbt__cte__nested_stream_with_co_2g_names_partition_ab3 --- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition from test_normalization.`nested_stream_with_co_1g_into_long_names_scd` -where 1 = 1 - - ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co___names_partition_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co___names_partition_data.sql deleted file mode 100644 index e68283420cfdc..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co___names_partition_data.sql +++ /dev/null @@ -1,113 +0,0 @@ - - - create table - test_normalization.`nested_stream_with_co___names_partition_data__dbt_tmp` - as ( - -with __dbt__cte__nested_stream_with_co_3es_partition_data_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: test_normalization.`nested_stream_with_co___long_names_partition` -with numbers as ( - - - - - with p as ( - select 0 as generated_number union all select 1 - ), unioned as ( - - select - - - p0.generated_number * power(2, 0) - - - + 1 - as generated_number - - from - - - p as p0 - - - - ) - - select * - from unioned - where generated_number <= 1 - order by generated_number - - - ), - joined as ( - select - _airbyte_partition_hashid as _airbyte_hashid, - - json_extract(`DATA`, concat("$[", numbers.generated_number - 1, "][0]")) as _airbyte_nested_data - from test_normalization.`nested_stream_with_co___long_names_partition` - cross join numbers - -- only generate the number of records in the cross join that corresponds - -- to the number of items in test_normalization.`nested_stream_with_co___long_names_partition`.`DATA` - where numbers.generated_number <= json_length(`DATA`) - ) -select - _airbyte_partition_hashid, - json_value(_airbyte_nested_data, - '$."currency"' RETURNING CHAR) as currency, - _airbyte_ab_id, - _airbyte_emitted_at, - - CURRENT_TIMESTAMP - as _airbyte_normalized_at -from test_normalization.`nested_stream_with_co___long_names_partition` as table_alias --- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA -left join joined on _airbyte_partition_hashid = joined._airbyte_hashid -where 1 = 1 -and `DATA` is not null - -), __dbt__cte__nested_stream_with_co_3es_partition_data_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__nested_stream_with_co_3es_partition_data_ab1 -select - _airbyte_partition_hashid, - cast(currency as char(1024)) as currency, - _airbyte_ab_id, - _airbyte_emitted_at, - - CURRENT_TIMESTAMP - as _airbyte_normalized_at -from __dbt__cte__nested_stream_with_co_3es_partition_data_ab1 --- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA -where 1 = 1 - -), __dbt__cte__nested_stream_with_co_3es_partition_data_ab3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__nested_stream_with_co_3es_partition_data_ab2 -select - md5(cast(concat(coalesce(cast(_airbyte_partition_hashid as char), ''), '-', coalesce(cast(currency as char), '')) as char)) as _airbyte_data_hashid, - tmp.* -from __dbt__cte__nested_stream_with_co_3es_partition_data_ab2 tmp --- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA -where 1 = 1 - -)-- Final base SQL model --- depends_on: __dbt__cte__nested_stream_with_co_3es_partition_data_ab3 -select - _airbyte_partition_hashid, - currency, - _airbyte_ab_id, - _airbyte_emitted_at, - - CURRENT_TIMESTAMP - as _airbyte_normalized_at, - _airbyte_data_hashid -from __dbt__cte__nested_stream_with_co_3es_partition_data_ab3 --- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA from test_normalization.`nested_stream_with_co___long_names_partition` -where 1 = 1 - - ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co__ion_double_array_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co__ion_double_array_data.sql deleted file mode 100644 index 4b276edcc316f..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co__ion_double_array_data.sql +++ /dev/null @@ -1,113 +0,0 @@ - - - create table - test_normalization.`nested_stream_with_co__ion_double_array_data__dbt_tmp` - as ( - -with __dbt__cte__nested_stream_with_co_3double_array_data_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: test_normalization.`nested_stream_with_co___long_names_partition` -with numbers as ( - - - - - with p as ( - select 0 as generated_number union all select 1 - ), unioned as ( - - select - - - p0.generated_number * power(2, 0) - - - + 1 - as generated_number - - from - - - p as p0 - - - - ) - - select * - from unioned - where generated_number <= 2 - order by generated_number - - - ), - joined as ( - select - _airbyte_partition_hashid as _airbyte_hashid, - - json_extract(double_array_data, concat("$[", numbers.generated_number - 1, "][0]")) as _airbyte_nested_data - from test_normalization.`nested_stream_with_co___long_names_partition` - cross join numbers - -- only generate the number of records in the cross join that corresponds - -- to the number of items in test_normalization.`nested_stream_with_co___long_names_partition`.double_array_data - where numbers.generated_number <= json_length(double_array_data) - ) -select - _airbyte_partition_hashid, - json_value(_airbyte_nested_data, - '$."id"' RETURNING CHAR) as id, - _airbyte_ab_id, - _airbyte_emitted_at, - - CURRENT_TIMESTAMP - as _airbyte_normalized_at -from test_normalization.`nested_stream_with_co___long_names_partition` as table_alias --- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data -left join joined on _airbyte_partition_hashid = joined._airbyte_hashid -where 1 = 1 -and double_array_data is not null - -), __dbt__cte__nested_stream_with_co_3double_array_data_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__nested_stream_with_co_3double_array_data_ab1 -select - _airbyte_partition_hashid, - cast(id as char(1024)) as id, - _airbyte_ab_id, - _airbyte_emitted_at, - - CURRENT_TIMESTAMP - as _airbyte_normalized_at -from __dbt__cte__nested_stream_with_co_3double_array_data_ab1 --- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data -where 1 = 1 - -), __dbt__cte__nested_stream_with_co_3double_array_data_ab3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__nested_stream_with_co_3double_array_data_ab2 -select - md5(cast(concat(coalesce(cast(_airbyte_partition_hashid as char), ''), '-', coalesce(cast(id as char), '')) as char)) as _airbyte_double_array_data_hashid, - tmp.* -from __dbt__cte__nested_stream_with_co_3double_array_data_ab2 tmp --- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data -where 1 = 1 - -)-- Final base SQL model --- depends_on: __dbt__cte__nested_stream_with_co_3double_array_data_ab3 -select - _airbyte_partition_hashid, - id, - _airbyte_ab_id, - _airbyte_emitted_at, - - CURRENT_TIMESTAMP - as _airbyte_normalized_at, - _airbyte_double_array_data_hashid -from __dbt__cte__nested_stream_with_co_3double_array_data_ab3 --- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data from test_normalization.`nested_stream_with_co___long_names_partition` -where 1 = 1 - - ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co__lting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co__lting_into_long_names.sql deleted file mode 100644 index 8be6ef88d622a..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co__lting_into_long_names.sql +++ /dev/null @@ -1,25 +0,0 @@ - - - create table - test_normalization.`nested_stream_with_co__lting_into_long_names__dbt_tmp` - as ( - --- Final base SQL model --- depends_on: test_normalization.`nested_stream_with_co_1g_into_long_names_scd` -select - _airbyte_unique_key, - id, - `date`, - `partition`, - _airbyte_ab_id, - _airbyte_emitted_at, - - CURRENT_TIMESTAMP - as _airbyte_normalized_at, - _airbyte_nested_strea__nto_long_names_hashid -from test_normalization.`nested_stream_with_co_1g_into_long_names_scd` --- nested_stream_with_co__lting_into_long_names from test_normalization._airbyte_raw_nested_s__lting_into_long_names -where 1 = 1 -and _airbyte_active_row = 1 - - ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/dbt_project.yml deleted file mode 100755 index bc7fa6d501663..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/dbt_project.yml +++ /dev/null @@ -1,86 +0,0 @@ -name: airbyte_utils -version: '1.0' -config-version: 2 -profile: normalize -model-paths: -- models -docs-paths: -- docs -analysis-paths: -- analysis -test-paths: -- tests -seed-paths: -- data -macro-paths: -- macros -target-path: ../build -log-path: ../logs -packages-install-path: /dbt -clean-targets: -- build -- dbt_modules -quoting: - database: true - schema: false - identifier: true -models: - airbyte_utils: - +materialized: table - generated: - airbyte_ctes: - +tags: airbyte_internal_cte - +materialized: ephemeral - airbyte_incremental: - +tags: incremental_tables - +materialized: table - airbyte_tables: - +tags: normalized_tables - +materialized: table - airbyte_views: - +tags: airbyte_internal_views - +materialized: view -vars: - dbt_utils_dispatch_list: - - airbyte_utils - json_column: _airbyte_data - models_to_source: - exchange_rate_ab1: test_normalization._airbyte_raw_exchange_rate - exchange_rate_ab2: test_normalization._airbyte_raw_exchange_rate - exchange_rate_ab3: test_normalization._airbyte_raw_exchange_rate - exchange_rate: test_normalization._airbyte_raw_exchange_rate - dedup_exchange_rate_ab1: test_normalization._airbyte_raw_dedup_exchange_rate - dedup_exchange_rate_ab2: test_normalization._airbyte_raw_dedup_exchange_rate - dedup_exchange_rate_stg: test_normalization._airbyte_raw_dedup_exchange_rate - dedup_exchange_rate_scd: test_normalization._airbyte_raw_dedup_exchange_rate - dedup_exchange_rate: test_normalization._airbyte_raw_dedup_exchange_rate - renamed_dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded - renamed_dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded - renamed_dedup_cdc_excluded_stg: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded - renamed_dedup_cdc_excluded_scd: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded - renamed_dedup_cdc_excluded: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded - dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_dedup_cdc_excluded - dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_dedup_cdc_excluded - dedup_cdc_excluded_stg: test_normalization._airbyte_raw_dedup_cdc_excluded - dedup_cdc_excluded_scd: test_normalization._airbyte_raw_dedup_cdc_excluded - dedup_cdc_excluded: test_normalization._airbyte_raw_dedup_cdc_excluded - pos_dedup_cdcx_ab1: test_normalization._airbyte_raw_pos_dedup_cdcx - pos_dedup_cdcx_ab2: test_normalization._airbyte_raw_pos_dedup_cdcx - pos_dedup_cdcx_stg: test_normalization._airbyte_raw_pos_dedup_cdcx - pos_dedup_cdcx_scd: test_normalization._airbyte_raw_pos_dedup_cdcx - pos_dedup_cdcx: test_normalization._airbyte_raw_pos_dedup_cdcx - 1_prefix_startwith_number_ab1: test_normalization._airbyte_raw_1_prefix_startwith_number - 1_prefix_startwith_number_ab2: test_normalization._airbyte_raw_1_prefix_startwith_number - 1_prefix_startwith_number_stg: test_normalization._airbyte_raw_1_prefix_startwith_number - 1_prefix_startwith_number_scd: test_normalization._airbyte_raw_1_prefix_startwith_number - 1_prefix_startwith_number: test_normalization._airbyte_raw_1_prefix_startwith_number - multiple_column_names_conflicts_ab1: test_normalization._airbyte_raw_multiple_column_names_conflicts - multiple_column_names_conflicts_ab2: test_normalization._airbyte_raw_multiple_column_names_conflicts - multiple_column_names_conflicts_stg: test_normalization._airbyte_raw_multiple_column_names_conflicts - multiple_column_names_conflicts_scd: test_normalization._airbyte_raw_multiple_column_names_conflicts - multiple_column_names_conflicts: test_normalization._airbyte_raw_multiple_column_names_conflicts - types_testing_ab1: test_normalization._airbyte_raw_types_testing - types_testing_ab2: test_normalization._airbyte_raw_types_testing - types_testing_stg: test_normalization._airbyte_raw_types_testing - types_testing_scd: test_normalization._airbyte_raw_types_testing - types_testing: test_normalization._airbyte_raw_types_testing diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql deleted file mode 100644 index 59d722cb4f381..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ /dev/null @@ -1,84 +0,0 @@ - - - create table - test_normalization.`dedup_exchange_rate_scd__dbt_tmp` - as ( - --- depends_on: ref('dedup_exchange_rate_stg') -with - -input_data as ( - select * - from _airbyte_test_normalization.`dedup_exchange_rate_stg` - -- dedup_exchange_rate from test_normalization._airbyte_raw_dedup_exchange_rate -), - -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - md5(cast(concat(coalesce(cast(id as char), ''), '-', coalesce(cast(currency as char), ''), '-', coalesce(cast(nzd as char), '')) as char)) as _airbyte_unique_key, - id, - currency, - `date`, - timestamp_col, - `HKD@spéçiäl & characters`, - hkd_special___characters, - nzd, - usd, - `date` as _airbyte_start_at, - lag(`date`) over ( - partition by id, currency, cast(nzd as char) - order by - `date` is null asc, - `date` desc, - _airbyte_emitted_at desc - ) as _airbyte_end_at, - case when row_number() over ( - partition by id, currency, cast(nzd as char) - order by - `date` is null asc, - `date` desc, - _airbyte_emitted_at desc - ) = 1 then 1 else 0 end as _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_dedup_exchange_rate_hashid - from input_data -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by - _airbyte_unique_key, - _airbyte_start_at, - _airbyte_emitted_at - order by _airbyte_active_row desc, _airbyte_ab_id - ) as _airbyte_row_num, - md5(cast(concat(coalesce(cast(_airbyte_unique_key as char), ''), '-', coalesce(cast(_airbyte_start_at as char), ''), '-', coalesce(cast(_airbyte_emitted_at as char), '')) as char)) as _airbyte_unique_key_scd, - scd_data.* - from scd_data -) -select - _airbyte_unique_key, - _airbyte_unique_key_scd, - id, - currency, - `date`, - timestamp_col, - `HKD@spéçiäl & characters`, - hkd_special___characters, - nzd, - usd, - _airbyte_start_at, - _airbyte_end_at, - _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - - CURRENT_TIMESTAMP - as _airbyte_normalized_at, - _airbyte_dedup_exchange_rate_hashid -from dedup_data where _airbyte_row_num = 1 - ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql deleted file mode 100644 index d6ab488f2f636..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql +++ /dev/null @@ -1,30 +0,0 @@ - - - create table - test_normalization.`dedup_exchange_rate__dbt_tmp` - as ( - --- Final base SQL model --- depends_on: test_normalization.`dedup_exchange_rate_scd` -select - _airbyte_unique_key, - id, - currency, - `date`, - timestamp_col, - `HKD@spéçiäl & characters`, - hkd_special___characters, - nzd, - usd, - _airbyte_ab_id, - _airbyte_emitted_at, - - CURRENT_TIMESTAMP - as _airbyte_normalized_at, - _airbyte_dedup_exchange_rate_hashid -from test_normalization.`dedup_exchange_rate_scd` --- dedup_exchange_rate from test_normalization._airbyte_raw_dedup_exchange_rate -where 1 = 1 -and _airbyte_active_row = 1 - - ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql deleted file mode 100644 index 540fc0e7911f6..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql +++ /dev/null @@ -1,123 +0,0 @@ - - - create table - test_normalization.`exchange_rate__dbt_tmp` - as ( - -with __dbt__cte__exchange_rate_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: test_normalization._airbyte_raw_exchange_rate -select - json_value(_airbyte_data, - '$."id"' RETURNING CHAR) as id, - json_value(_airbyte_data, - '$."currency"' RETURNING CHAR) as currency, - json_value(_airbyte_data, - '$."date"' RETURNING CHAR) as `date`, - json_value(_airbyte_data, - '$."timestamp_col"' RETURNING CHAR) as timestamp_col, - json_value(_airbyte_data, - '$."HKD@spéçiäl & characters"' RETURNING CHAR) as `HKD@spéçiäl & characters`, - json_value(_airbyte_data, - '$."HKD_special___characters"' RETURNING CHAR) as hkd_special___characters, - json_value(_airbyte_data, - '$."NZD"' RETURNING CHAR) as nzd, - json_value(_airbyte_data, - '$."USD"' RETURNING CHAR) as usd, - json_value(_airbyte_data, - '$."column___with__quotes"' RETURNING CHAR) as `column__'with"_quotes`, - json_value(_airbyte_data, - '$."datetime_tz"' RETURNING CHAR) as datetime_tz, - json_value(_airbyte_data, - '$."datetime_no_tz"' RETURNING CHAR) as datetime_no_tz, - json_value(_airbyte_data, - '$."time_tz"' RETURNING CHAR) as time_tz, - json_value(_airbyte_data, - '$."time_no_tz"' RETURNING CHAR) as time_no_tz, - _airbyte_ab_id, - _airbyte_emitted_at, - - CURRENT_TIMESTAMP - as _airbyte_normalized_at -from test_normalization._airbyte_raw_exchange_rate as table_alias --- exchange_rate -where 1 = 1 -), __dbt__cte__exchange_rate_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__exchange_rate_ab1 -select - cast(id as - signed -) as id, - cast(currency as char(1024)) as currency, - case when `date` = '' then NULL - else cast(`date` as date) - end as `date` - , - cast(nullif(timestamp_col, '') as char(1024)) as timestamp_col, - cast(`HKD@spéçiäl & characters` as - float -) as `HKD@spéçiäl & characters`, - cast(hkd_special___characters as char(1024)) as hkd_special___characters, - cast(nzd as - float -) as nzd, - cast(usd as - float -) as usd, - cast(`column__'with"_quotes` as char(1024)) as `column__'with"_quotes`, - cast(nullif(datetime_tz, '') as char(1024)) as datetime_tz, - case when datetime_no_tz regexp '\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}.*' THEN STR_TO_DATE(SUBSTR(datetime_no_tz, 1, 19), '%Y-%m-%dT%H:%i:%S') - else cast(if(datetime_no_tz = '', NULL, datetime_no_tz) as datetime) - end as datetime_no_tz - , - nullif(cast(time_tz as char(1024)), "") as time_tz, - nullif(cast(time_no_tz as - time -), "") as time_no_tz, - _airbyte_ab_id, - _airbyte_emitted_at, - - CURRENT_TIMESTAMP - as _airbyte_normalized_at -from __dbt__cte__exchange_rate_ab1 --- exchange_rate -where 1 = 1 -), __dbt__cte__exchange_rate_ab3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__exchange_rate_ab2 -select - md5(cast(concat(coalesce(cast(id as char), ''), '-', coalesce(cast(currency as char), ''), '-', coalesce(cast(`date` as char), ''), '-', coalesce(cast(timestamp_col as char), ''), '-', coalesce(cast(`HKD@spéçiäl & characters` as char), ''), '-', coalesce(cast(hkd_special___characters as char), ''), '-', coalesce(cast(nzd as char), ''), '-', coalesce(cast(usd as char), ''), '-', coalesce(cast(`column__'with"_quotes` as char), ''), '-', coalesce(cast(datetime_tz as char), ''), '-', coalesce(cast(datetime_no_tz as char), ''), '-', coalesce(cast(time_tz as char), ''), '-', coalesce(cast(time_no_tz as char), '')) as char)) as _airbyte_exchange_rate_hashid, - tmp.* -from __dbt__cte__exchange_rate_ab2 tmp --- exchange_rate -where 1 = 1 -)-- Final base SQL model --- depends_on: __dbt__cte__exchange_rate_ab3 -select - id, - currency, - `date`, - timestamp_col, - `HKD@spéçiäl & characters`, - hkd_special___characters, - nzd, - usd, - `column__'with"_quotes`, - datetime_tz, - datetime_no_tz, - time_tz, - time_no_tz, - _airbyte_ab_id, - _airbyte_emitted_at, - - CURRENT_TIMESTAMP - as _airbyte_normalized_at, - _airbyte_exchange_rate_hashid -from __dbt__cte__exchange_rate_ab3 --- exchange_rate from test_normalization._airbyte_raw_exchange_rate -where 1 = 1 - ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql deleted file mode 100644 index 367544ad79b7b..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql +++ /dev/null @@ -1,76 +0,0 @@ - - create view _airbyte_test_normalization.`dedup_exchange_rate_stg__dbt_tmp` as ( - -with __dbt__cte__dedup_exchange_rate_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: test_normalization._airbyte_raw_dedup_exchange_rate -select - json_value(_airbyte_data, - '$."id"' RETURNING CHAR) as id, - json_value(_airbyte_data, - '$."currency"' RETURNING CHAR) as currency, - json_value(_airbyte_data, - '$."date"' RETURNING CHAR) as `date`, - json_value(_airbyte_data, - '$."timestamp_col"' RETURNING CHAR) as timestamp_col, - json_value(_airbyte_data, - '$."HKD@spéçiäl & characters"' RETURNING CHAR) as `HKD@spéçiäl & characters`, - json_value(_airbyte_data, - '$."HKD_special___characters"' RETURNING CHAR) as hkd_special___characters, - json_value(_airbyte_data, - '$."NZD"' RETURNING CHAR) as nzd, - json_value(_airbyte_data, - '$."USD"' RETURNING CHAR) as usd, - _airbyte_ab_id, - _airbyte_emitted_at, - - CURRENT_TIMESTAMP - as _airbyte_normalized_at -from test_normalization._airbyte_raw_dedup_exchange_rate as table_alias --- dedup_exchange_rate -where 1 = 1 - -), __dbt__cte__dedup_exchange_rate_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__dedup_exchange_rate_ab1 -select - cast(id as - signed -) as id, - cast(currency as char(1024)) as currency, - case when `date` = '' then NULL - else cast(`date` as date) - end as `date` - , - cast(nullif(timestamp_col, '') as char(1024)) as timestamp_col, - cast(`HKD@spéçiäl & characters` as - float -) as `HKD@spéçiäl & characters`, - cast(hkd_special___characters as char(1024)) as hkd_special___characters, - cast(nzd as - float -) as nzd, - cast(usd as - float -) as usd, - _airbyte_ab_id, - _airbyte_emitted_at, - - CURRENT_TIMESTAMP - as _airbyte_normalized_at -from __dbt__cte__dedup_exchange_rate_ab1 --- dedup_exchange_rate -where 1 = 1 - -)-- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__dedup_exchange_rate_ab2 -select - md5(cast(concat(coalesce(cast(id as char), ''), '-', coalesce(cast(currency as char), ''), '-', coalesce(cast(`date` as char), ''), '-', coalesce(cast(timestamp_col as char), ''), '-', coalesce(cast(`HKD@spéçiäl & characters` as char), ''), '-', coalesce(cast(hkd_special___characters as char), ''), '-', coalesce(cast(nzd as char), ''), '-', coalesce(cast(usd as char), '')) as char)) as _airbyte_dedup_exchange_rate_hashid, - tmp.* -from __dbt__cte__dedup_exchange_rate_ab2 tmp --- dedup_exchange_rate -where 1 = 1 - - ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql deleted file mode 100644 index 1bd990b39925d..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql +++ /dev/null @@ -1,72 +0,0 @@ - - create view _airbyte_test_normalization.`multiple_column_names_conflicts_stg__dbt_tmp` as ( - -with __dbt__cte__multiple_column_names_conflicts_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: test_normalization._airbyte_raw_multiple_column_names_conflicts -select - json_value(_airbyte_data, - '$."id"' RETURNING CHAR) as id, - json_value(_airbyte_data, - '$."User Id"' RETURNING CHAR) as `User Id`, - json_value(_airbyte_data, - '$."user_id"' RETURNING CHAR) as user_id, - json_value(_airbyte_data, - '$."User id"' RETURNING CHAR) as `User id_1`, - json_value(_airbyte_data, - '$."user id"' RETURNING CHAR) as `user id_2`, - json_value(_airbyte_data, - '$."User@Id"' RETURNING CHAR) as `User@Id`, - json_value(_airbyte_data, - '$."UserId"' RETURNING CHAR) as userid, - _airbyte_ab_id, - _airbyte_emitted_at, - - CURRENT_TIMESTAMP - as _airbyte_normalized_at -from test_normalization._airbyte_raw_multiple_column_names_conflicts as table_alias --- multiple_column_names_conflicts -where 1 = 1 - -), __dbt__cte__multiple_column_names_conflicts_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__multiple_column_names_conflicts_ab1 -select - cast(id as - signed -) as id, - cast(`User Id` as char(1024)) as `User Id`, - cast(user_id as - float -) as user_id, - cast(`User id_1` as - float -) as `User id_1`, - cast(`user id_2` as - float -) as `user id_2`, - cast(`User@Id` as char(1024)) as `User@Id`, - cast(userid as - float -) as userid, - _airbyte_ab_id, - _airbyte_emitted_at, - - CURRENT_TIMESTAMP - as _airbyte_normalized_at -from __dbt__cte__multiple_column_names_conflicts_ab1 --- multiple_column_names_conflicts -where 1 = 1 - -)-- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__multiple_column_names_conflicts_ab2 -select - md5(cast(concat(coalesce(cast(id as char), ''), '-', coalesce(cast(`User Id` as char), ''), '-', coalesce(cast(user_id as char), ''), '-', coalesce(cast(`User id_1` as char), ''), '-', coalesce(cast(`user id_2` as char), ''), '-', coalesce(cast(`User@Id` as char), ''), '-', coalesce(cast(userid as char), '')) as char)) as _airbyte_multiple_col__ames_conflicts_hashid, - tmp.* -from __dbt__cte__multiple_column_names_conflicts_ab2 tmp --- multiple_column_names_conflicts -where 1 = 1 - - ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql deleted file mode 100644 index 670db0869ae22..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql +++ /dev/null @@ -1,24 +0,0 @@ -{{ config( - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} -select - {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as id, - {{ json_extract_scalar('_airbyte_data', ['currency'], ['currency']) }} as currency, - {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as {{ adapter.quote('date') }}, - {{ json_extract_scalar('_airbyte_data', ['timestamp_col'], ['timestamp_col']) }} as timestamp_col, - {{ json_extract_scalar('_airbyte_data', ['HKD@spéçiäl & characters'], ['HKD@spéçiäl & characters']) }} as {{ adapter.quote('HKD@spéçiäl & characters') }}, - {{ json_extract_scalar('_airbyte_data', ['HKD_special___characters'], ['HKD_special___characters']) }} as hkd_special___characters, - {{ json_extract_scalar('_airbyte_data', ['NZD'], ['NZD']) }} as nzd, - {{ json_extract_scalar('_airbyte_data', ['USD'], ['USD']) }} as usd, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} as table_alias --- dedup_exchange_rate -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql deleted file mode 100644 index 11466fe3ffd5e..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql +++ /dev/null @@ -1,27 +0,0 @@ -{{ config( - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: {{ ref('dedup_exchange_rate_ab1') }} -select - cast(id as {{ dbt_utils.type_bigint() }}) as id, - cast(currency as {{ dbt_utils.type_string() }}(1024)) as currency, - case when {{ adapter.quote('date') }} = '' then NULL - else cast({{ adapter.quote('date') }} as date) - end as {{ adapter.quote('date') }} - , - cast({{ empty_string_to_null('timestamp_col') }} as {{ type_timestamp_with_timezone() }}) as timestamp_col, - cast({{ adapter.quote('HKD@spéçiäl & characters') }} as {{ dbt_utils.type_float() }}) as {{ adapter.quote('HKD@spéçiäl & characters') }}, - cast(hkd_special___characters as {{ dbt_utils.type_string() }}(1024)) as hkd_special___characters, - cast(nzd as {{ dbt_utils.type_float() }}) as nzd, - cast(usd as {{ dbt_utils.type_float() }}) as usd, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('dedup_exchange_rate_ab1') }} --- dedup_exchange_rate -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql deleted file mode 100644 index b1c2af62e4bf1..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ /dev/null @@ -1,176 +0,0 @@ -{{ config( - unique_key = "_airbyte_unique_key_scd", - schema = "test_normalization", - post_hook = [" - {% - set final_table_relation = adapter.get_relation( - database=this.database, - schema=this.schema, - identifier='dedup_exchange_rate' - ) - %} - {# - If the final table doesn't exist, then obviously we can't delete anything from it. - Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) - So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) - #} - {% - if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') - %} - -- Delete records which are no longer active: - -- This query is equivalent, but the left join version is more performant: - -- delete from final_table where unique_key in ( - -- select unique_key from scd_table where 1 = 1 - -- ) and unique_key not in ( - -- select unique_key from scd_table where active_row = 1 - -- ) - -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD - -- entries that were _updated_ recently. This is because a deleted record will have an SCD record - -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. - delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select recent_records.unique_key - from ( - select distinct _airbyte_unique_key as unique_key - from {{ this }} - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - ) recent_records - left join ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - group by _airbyte_unique_key - ) active_counts - on recent_records.unique_key = active_counts.unique_key - where active_count is null or active_count = 0 - ) - {% else %} - -- We have to have a non-empty query, so just do a noop delete - delete from {{ this }} where 1=0 - {% endif %} - ","drop view _airbyte_test_normalization.dedup_exchange_rate_stg"], - tags = [ "top-level" ] -) }} --- depends_on: ref('dedup_exchange_rate_stg') -with -{% if is_incremental() %} -new_data as ( - -- retrieve incremental "new" data - select - * - from {{ ref('dedup_exchange_rate_stg') }} - -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} - where 1 = 1 - {{ incremental_clause('_airbyte_emitted_at', this) }} -), -new_data_ids as ( - -- build a subset of _airbyte_unique_key from rows that are new - select distinct - {{ dbt_utils.surrogate_key([ - 'id', - 'currency', - 'nzd', - ]) }} as _airbyte_unique_key - from new_data -), -empty_new_data as ( - -- build an empty table to only keep the table's column types - select * from new_data where 1 = 0 -), -previous_active_scd_data as ( - -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes - select - {{ star_intersect(ref('dedup_exchange_rate_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} - from {{ this }} as this_data - -- make a join with new_data using primary key to filter active data that need to be updated only - join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key - -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) - left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id - where _airbyte_active_row = 1 -), -input_data as ( - select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from new_data - union all - select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from previous_active_scd_data -), -{% else %} -input_data as ( - select * - from {{ ref('dedup_exchange_rate_stg') }} - -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} -), -{% endif %} -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - {{ dbt_utils.surrogate_key([ - 'id', - 'currency', - 'nzd', - ]) }} as _airbyte_unique_key, - id, - currency, - {{ adapter.quote('date') }}, - timestamp_col, - {{ adapter.quote('HKD@spéçiäl & characters') }}, - hkd_special___characters, - nzd, - usd, - {{ adapter.quote('date') }} as _airbyte_start_at, - lag({{ adapter.quote('date') }}) over ( - partition by id, currency, cast(nzd as {{ dbt_utils.type_string() }}) - order by - {{ adapter.quote('date') }} is null asc, - {{ adapter.quote('date') }} desc, - _airbyte_emitted_at desc - ) as _airbyte_end_at, - case when row_number() over ( - partition by id, currency, cast(nzd as {{ dbt_utils.type_string() }}) - order by - {{ adapter.quote('date') }} is null asc, - {{ adapter.quote('date') }} desc, - _airbyte_emitted_at desc - ) = 1 then 1 else 0 end as _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_dedup_exchange_rate_hashid - from input_data -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by - _airbyte_unique_key, - _airbyte_start_at, - _airbyte_emitted_at - order by _airbyte_active_row desc, _airbyte_ab_id - ) as _airbyte_row_num, - {{ dbt_utils.surrogate_key([ - '_airbyte_unique_key', - '_airbyte_start_at', - '_airbyte_emitted_at' - ]) }} as _airbyte_unique_key_scd, - scd_data.* - from scd_data -) -select - _airbyte_unique_key, - _airbyte_unique_key_scd, - id, - currency, - {{ adapter.quote('date') }}, - timestamp_col, - {{ adapter.quote('HKD@spéçiäl & characters') }}, - hkd_special___characters, - nzd, - usd, - _airbyte_start_at, - _airbyte_end_at, - _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_dedup_exchange_rate_hashid -from dedup_data where _airbyte_row_num = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql deleted file mode 100644 index dd4432bd60a5e..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql +++ /dev/null @@ -1,27 +0,0 @@ -{{ config( - unique_key = "_airbyte_unique_key", - schema = "test_normalization", - tags = [ "top-level" ] -) }} --- Final base SQL model --- depends_on: {{ ref('dedup_exchange_rate_scd') }} -select - _airbyte_unique_key, - id, - currency, - {{ adapter.quote('date') }}, - timestamp_col, - {{ adapter.quote('HKD@spéçiäl & characters') }}, - hkd_special___characters, - nzd, - usd, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_dedup_exchange_rate_hashid -from {{ ref('dedup_exchange_rate_scd') }} --- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} -where 1 = 1 -and _airbyte_active_row = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql deleted file mode 100644 index 3fe3205727b89..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql +++ /dev/null @@ -1,29 +0,0 @@ -{{ config( - unique_key = '_airbyte_ab_id', - schema = "test_normalization", - tags = [ "top-level" ] -) }} --- Final base SQL model --- depends_on: {{ ref('exchange_rate_ab3') }} -select - id, - currency, - {{ adapter.quote('date') }}, - timestamp_col, - {{ adapter.quote('HKD@spéçiäl & characters') }}, - hkd_special___characters, - nzd, - usd, - {{ adapter.quote('column__\'with"_quotes') }}, - datetime_tz, - datetime_no_tz, - time_tz, - time_no_tz, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_exchange_rate_hashid -from {{ ref('exchange_rate_ab3') }} --- exchange_rate from {{ source('test_normalization', '_airbyte_raw_exchange_rate') }} -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql deleted file mode 100644 index 86ec2c9e8b1b7..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql +++ /dev/null @@ -1,24 +0,0 @@ -{{ config( - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to build a hash column based on the values of this record --- depends_on: {{ ref('dedup_exchange_rate_ab2') }} -select - {{ dbt_utils.surrogate_key([ - 'id', - 'currency', - adapter.quote('date'), - 'timestamp_col', - adapter.quote('HKD@spéçiäl & characters'), - 'hkd_special___characters', - 'nzd', - 'usd', - ]) }} as _airbyte_dedup_exchange_rate_hashid, - tmp.* -from {{ ref('dedup_exchange_rate_ab2') }} tmp --- dedup_exchange_rate -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/sources.yml deleted file mode 100644 index f51802427655e..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/sources.yml +++ /dev/null @@ -1,16 +0,0 @@ -version: 2 -sources: -- name: test_normalization - quoting: - database: true - schema: false - identifier: false - tables: - - name: _airbyte_raw_1_prefix_startwith_number - - name: _airbyte_raw_dedup_cdc_excluded - - name: _airbyte_raw_dedup_exchange_rate - - name: _airbyte_raw_exchange_rate - - name: _airbyte_raw_multiple_column_names_conflicts - - name: _airbyte_raw_pos_dedup_cdcx - - name: _airbyte_raw_renamed_dedup_cdc_excluded - - name: _airbyte_raw_types_testing diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql deleted file mode 100644 index 59d722cb4f381..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ /dev/null @@ -1,84 +0,0 @@ - - - create table - test_normalization.`dedup_exchange_rate_scd__dbt_tmp` - as ( - --- depends_on: ref('dedup_exchange_rate_stg') -with - -input_data as ( - select * - from _airbyte_test_normalization.`dedup_exchange_rate_stg` - -- dedup_exchange_rate from test_normalization._airbyte_raw_dedup_exchange_rate -), - -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - md5(cast(concat(coalesce(cast(id as char), ''), '-', coalesce(cast(currency as char), ''), '-', coalesce(cast(nzd as char), '')) as char)) as _airbyte_unique_key, - id, - currency, - `date`, - timestamp_col, - `HKD@spéçiäl & characters`, - hkd_special___characters, - nzd, - usd, - `date` as _airbyte_start_at, - lag(`date`) over ( - partition by id, currency, cast(nzd as char) - order by - `date` is null asc, - `date` desc, - _airbyte_emitted_at desc - ) as _airbyte_end_at, - case when row_number() over ( - partition by id, currency, cast(nzd as char) - order by - `date` is null asc, - `date` desc, - _airbyte_emitted_at desc - ) = 1 then 1 else 0 end as _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_dedup_exchange_rate_hashid - from input_data -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by - _airbyte_unique_key, - _airbyte_start_at, - _airbyte_emitted_at - order by _airbyte_active_row desc, _airbyte_ab_id - ) as _airbyte_row_num, - md5(cast(concat(coalesce(cast(_airbyte_unique_key as char), ''), '-', coalesce(cast(_airbyte_start_at as char), ''), '-', coalesce(cast(_airbyte_emitted_at as char), '')) as char)) as _airbyte_unique_key_scd, - scd_data.* - from scd_data -) -select - _airbyte_unique_key, - _airbyte_unique_key_scd, - id, - currency, - `date`, - timestamp_col, - `HKD@spéçiäl & characters`, - hkd_special___characters, - nzd, - usd, - _airbyte_start_at, - _airbyte_end_at, - _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - - CURRENT_TIMESTAMP - as _airbyte_normalized_at, - _airbyte_dedup_exchange_rate_hashid -from dedup_data where _airbyte_row_num = 1 - ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql deleted file mode 100644 index d6ab488f2f636..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql +++ /dev/null @@ -1,30 +0,0 @@ - - - create table - test_normalization.`dedup_exchange_rate__dbt_tmp` - as ( - --- Final base SQL model --- depends_on: test_normalization.`dedup_exchange_rate_scd` -select - _airbyte_unique_key, - id, - currency, - `date`, - timestamp_col, - `HKD@spéçiäl & characters`, - hkd_special___characters, - nzd, - usd, - _airbyte_ab_id, - _airbyte_emitted_at, - - CURRENT_TIMESTAMP - as _airbyte_normalized_at, - _airbyte_dedup_exchange_rate_hashid -from test_normalization.`dedup_exchange_rate_scd` --- dedup_exchange_rate from test_normalization._airbyte_raw_dedup_exchange_rate -where 1 = 1 -and _airbyte_active_row = 1 - - ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql deleted file mode 100644 index 540fc0e7911f6..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql +++ /dev/null @@ -1,123 +0,0 @@ - - - create table - test_normalization.`exchange_rate__dbt_tmp` - as ( - -with __dbt__cte__exchange_rate_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: test_normalization._airbyte_raw_exchange_rate -select - json_value(_airbyte_data, - '$."id"' RETURNING CHAR) as id, - json_value(_airbyte_data, - '$."currency"' RETURNING CHAR) as currency, - json_value(_airbyte_data, - '$."date"' RETURNING CHAR) as `date`, - json_value(_airbyte_data, - '$."timestamp_col"' RETURNING CHAR) as timestamp_col, - json_value(_airbyte_data, - '$."HKD@spéçiäl & characters"' RETURNING CHAR) as `HKD@spéçiäl & characters`, - json_value(_airbyte_data, - '$."HKD_special___characters"' RETURNING CHAR) as hkd_special___characters, - json_value(_airbyte_data, - '$."NZD"' RETURNING CHAR) as nzd, - json_value(_airbyte_data, - '$."USD"' RETURNING CHAR) as usd, - json_value(_airbyte_data, - '$."column___with__quotes"' RETURNING CHAR) as `column__'with"_quotes`, - json_value(_airbyte_data, - '$."datetime_tz"' RETURNING CHAR) as datetime_tz, - json_value(_airbyte_data, - '$."datetime_no_tz"' RETURNING CHAR) as datetime_no_tz, - json_value(_airbyte_data, - '$."time_tz"' RETURNING CHAR) as time_tz, - json_value(_airbyte_data, - '$."time_no_tz"' RETURNING CHAR) as time_no_tz, - _airbyte_ab_id, - _airbyte_emitted_at, - - CURRENT_TIMESTAMP - as _airbyte_normalized_at -from test_normalization._airbyte_raw_exchange_rate as table_alias --- exchange_rate -where 1 = 1 -), __dbt__cte__exchange_rate_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__exchange_rate_ab1 -select - cast(id as - signed -) as id, - cast(currency as char(1024)) as currency, - case when `date` = '' then NULL - else cast(`date` as date) - end as `date` - , - cast(nullif(timestamp_col, '') as char(1024)) as timestamp_col, - cast(`HKD@spéçiäl & characters` as - float -) as `HKD@spéçiäl & characters`, - cast(hkd_special___characters as char(1024)) as hkd_special___characters, - cast(nzd as - float -) as nzd, - cast(usd as - float -) as usd, - cast(`column__'with"_quotes` as char(1024)) as `column__'with"_quotes`, - cast(nullif(datetime_tz, '') as char(1024)) as datetime_tz, - case when datetime_no_tz regexp '\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}.*' THEN STR_TO_DATE(SUBSTR(datetime_no_tz, 1, 19), '%Y-%m-%dT%H:%i:%S') - else cast(if(datetime_no_tz = '', NULL, datetime_no_tz) as datetime) - end as datetime_no_tz - , - nullif(cast(time_tz as char(1024)), "") as time_tz, - nullif(cast(time_no_tz as - time -), "") as time_no_tz, - _airbyte_ab_id, - _airbyte_emitted_at, - - CURRENT_TIMESTAMP - as _airbyte_normalized_at -from __dbt__cte__exchange_rate_ab1 --- exchange_rate -where 1 = 1 -), __dbt__cte__exchange_rate_ab3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__exchange_rate_ab2 -select - md5(cast(concat(coalesce(cast(id as char), ''), '-', coalesce(cast(currency as char), ''), '-', coalesce(cast(`date` as char), ''), '-', coalesce(cast(timestamp_col as char), ''), '-', coalesce(cast(`HKD@spéçiäl & characters` as char), ''), '-', coalesce(cast(hkd_special___characters as char), ''), '-', coalesce(cast(nzd as char), ''), '-', coalesce(cast(usd as char), ''), '-', coalesce(cast(`column__'with"_quotes` as char), ''), '-', coalesce(cast(datetime_tz as char), ''), '-', coalesce(cast(datetime_no_tz as char), ''), '-', coalesce(cast(time_tz as char), ''), '-', coalesce(cast(time_no_tz as char), '')) as char)) as _airbyte_exchange_rate_hashid, - tmp.* -from __dbt__cte__exchange_rate_ab2 tmp --- exchange_rate -where 1 = 1 -)-- Final base SQL model --- depends_on: __dbt__cte__exchange_rate_ab3 -select - id, - currency, - `date`, - timestamp_col, - `HKD@spéçiäl & characters`, - hkd_special___characters, - nzd, - usd, - `column__'with"_quotes`, - datetime_tz, - datetime_no_tz, - time_tz, - time_no_tz, - _airbyte_ab_id, - _airbyte_emitted_at, - - CURRENT_TIMESTAMP - as _airbyte_normalized_at, - _airbyte_exchange_rate_hashid -from __dbt__cte__exchange_rate_ab3 --- exchange_rate from test_normalization._airbyte_raw_exchange_rate -where 1 = 1 - ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql deleted file mode 100644 index 367544ad79b7b..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql +++ /dev/null @@ -1,76 +0,0 @@ - - create view _airbyte_test_normalization.`dedup_exchange_rate_stg__dbt_tmp` as ( - -with __dbt__cte__dedup_exchange_rate_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: test_normalization._airbyte_raw_dedup_exchange_rate -select - json_value(_airbyte_data, - '$."id"' RETURNING CHAR) as id, - json_value(_airbyte_data, - '$."currency"' RETURNING CHAR) as currency, - json_value(_airbyte_data, - '$."date"' RETURNING CHAR) as `date`, - json_value(_airbyte_data, - '$."timestamp_col"' RETURNING CHAR) as timestamp_col, - json_value(_airbyte_data, - '$."HKD@spéçiäl & characters"' RETURNING CHAR) as `HKD@spéçiäl & characters`, - json_value(_airbyte_data, - '$."HKD_special___characters"' RETURNING CHAR) as hkd_special___characters, - json_value(_airbyte_data, - '$."NZD"' RETURNING CHAR) as nzd, - json_value(_airbyte_data, - '$."USD"' RETURNING CHAR) as usd, - _airbyte_ab_id, - _airbyte_emitted_at, - - CURRENT_TIMESTAMP - as _airbyte_normalized_at -from test_normalization._airbyte_raw_dedup_exchange_rate as table_alias --- dedup_exchange_rate -where 1 = 1 - -), __dbt__cte__dedup_exchange_rate_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__dedup_exchange_rate_ab1 -select - cast(id as - signed -) as id, - cast(currency as char(1024)) as currency, - case when `date` = '' then NULL - else cast(`date` as date) - end as `date` - , - cast(nullif(timestamp_col, '') as char(1024)) as timestamp_col, - cast(`HKD@spéçiäl & characters` as - float -) as `HKD@spéçiäl & characters`, - cast(hkd_special___characters as char(1024)) as hkd_special___characters, - cast(nzd as - float -) as nzd, - cast(usd as - float -) as usd, - _airbyte_ab_id, - _airbyte_emitted_at, - - CURRENT_TIMESTAMP - as _airbyte_normalized_at -from __dbt__cte__dedup_exchange_rate_ab1 --- dedup_exchange_rate -where 1 = 1 - -)-- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__dedup_exchange_rate_ab2 -select - md5(cast(concat(coalesce(cast(id as char), ''), '-', coalesce(cast(currency as char), ''), '-', coalesce(cast(`date` as char), ''), '-', coalesce(cast(timestamp_col as char), ''), '-', coalesce(cast(`HKD@spéçiäl & characters` as char), ''), '-', coalesce(cast(hkd_special___characters as char), ''), '-', coalesce(cast(nzd as char), ''), '-', coalesce(cast(usd as char), '')) as char)) as _airbyte_dedup_exchange_rate_hashid, - tmp.* -from __dbt__cte__dedup_exchange_rate_ab2 tmp --- dedup_exchange_rate -where 1 = 1 - - ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/dbt_project.yml deleted file mode 100755 index e8b2b254e10be..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/dbt_project.yml +++ /dev/null @@ -1,86 +0,0 @@ -name: airbyte_utils -version: '1.0' -config-version: 2 -profile: normalize -source-paths: -- models -docs-paths: -- docs -analysis-paths: -- analysis -test-paths: -- tests -data-paths: -- data -macro-paths: -- macros -target-path: ../build -log-path: ../logs -modules-path: /dbt -clean-targets: -- build -- dbt_modules -quoting: - database: false - schema: false - identifier: false -models: - airbyte_utils: - +materialized: table - generated: - airbyte_ctes: - +tags: airbyte_internal_cte - +materialized: ephemeral - airbyte_incremental: - +tags: incremental_tables - +materialized: table - airbyte_tables: - +tags: normalized_tables - +materialized: table - airbyte_views: - +tags: airbyte_internal_views - +materialized: view -vars: - dbt_utils_dispatch_list: - - airbyte_utils - json_column: _airbyte_data - models_to_source: - exchange_rate_ab1: test_normalization.airbyte_raw_exchange_rate - exchange_rate_ab2: test_normalization.airbyte_raw_exchange_rate - exchange_rate_ab3: test_normalization.airbyte_raw_exchange_rate - exchange_rate: test_normalization.airbyte_raw_exchange_rate - dedup_exchange_rate_ab1: test_normalization.airbyte_raw_dedup_exchange_rate - dedup_exchange_rate_ab2: test_normalization.airbyte_raw_dedup_exchange_rate - dedup_exchange_rate_stg: test_normalization.airbyte_raw_dedup_exchange_rate - dedup_exchange_rate_scd: test_normalization.airbyte_raw_dedup_exchange_rate - dedup_exchange_rate: test_normalization.airbyte_raw_dedup_exchange_rate - renamed_dedup_cdc_excluded_ab1: test_normalization.airbyte_raw_renamed_dedup_cdc_excluded - renamed_dedup_cdc_excluded_ab2: test_normalization.airbyte_raw_renamed_dedup_cdc_excluded - renamed_dedup_cdc_excluded_stg: test_normalization.airbyte_raw_renamed_dedup_cdc_excluded - renamed_dedup_cdc_excluded_scd: test_normalization.airbyte_raw_renamed_dedup_cdc_excluded - renamed_dedup_cdc_excluded: test_normalization.airbyte_raw_renamed_dedup_cdc_excluded - dedup_cdc_excluded_ab1: test_normalization.airbyte_raw_dedup_cdc_excluded - dedup_cdc_excluded_ab2: test_normalization.airbyte_raw_dedup_cdc_excluded - dedup_cdc_excluded_stg: test_normalization.airbyte_raw_dedup_cdc_excluded - dedup_cdc_excluded_scd: test_normalization.airbyte_raw_dedup_cdc_excluded - dedup_cdc_excluded: test_normalization.airbyte_raw_dedup_cdc_excluded - pos_dedup_cdcx_ab1: test_normalization.airbyte_raw_pos_dedup_cdcx - pos_dedup_cdcx_ab2: test_normalization.airbyte_raw_pos_dedup_cdcx - pos_dedup_cdcx_stg: test_normalization.airbyte_raw_pos_dedup_cdcx - pos_dedup_cdcx_scd: test_normalization.airbyte_raw_pos_dedup_cdcx - pos_dedup_cdcx: test_normalization.airbyte_raw_pos_dedup_cdcx - ab_1_prefix_startwith_number_ab1: test_normalization.airbyte_raw_1_prefix_startwith_number - ab_1_prefix_startwith_number_ab2: test_normalization.airbyte_raw_1_prefix_startwith_number - ab_1_prefix_startwith_number_stg: test_normalization.airbyte_raw_1_prefix_startwith_number - ab_1_prefix_startwith_number_scd: test_normalization.airbyte_raw_1_prefix_startwith_number - ab_1_prefix_startwith_number: test_normalization.airbyte_raw_1_prefix_startwith_number - multiple_column_names_conflicts_ab1: test_normalization.airbyte_raw_multiple_column_names_conflicts - multiple_column_names_conflicts_ab2: test_normalization.airbyte_raw_multiple_column_names_conflicts - multiple_column_names_conflicts_stg: test_normalization.airbyte_raw_multiple_column_names_conflicts - multiple_column_names_conflicts_scd: test_normalization.airbyte_raw_multiple_column_names_conflicts - multiple_column_names_conflicts: test_normalization.airbyte_raw_multiple_column_names_conflicts - types_testing_ab1: test_normalization.airbyte_raw_types_testing - types_testing_ab2: test_normalization.airbyte_raw_types_testing - types_testing_stg: test_normalization.airbyte_raw_types_testing - types_testing_scd: test_normalization.airbyte_raw_types_testing - types_testing: test_normalization.airbyte_raw_types_testing diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql deleted file mode 100644 index cfd186b006ae3..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ /dev/null @@ -1,101 +0,0 @@ - - - create table test_normalization.dedup_exchange_rate_scd__dbt_tmp - - as - --- depends_on: ref('dedup_exchange_rate_stg') -with - -input_data as ( - select * - from test_normalization.dedup_exchange_rate_stg - -- dedup_exchange_rate from test_normalization.airbyte_raw_dedup_exchange_rate -), - -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - ora_hash( - - id || '~' || - - - currency || '~' || - - - nzd - - ) as "_AIRBYTE_UNIQUE_KEY", - id, - currency, - "DATE", - timestamp_col, - hkd_special___characters, - hkd_special___characters_1, - nzd, - usd, - "DATE" as "_AIRBYTE_START_AT", - lag("DATE") over ( - partition by id, currency, cast(nzd as varchar2(4000)) - order by - "DATE" desc nulls last, - "_AIRBYTE_EMITTED_AT" desc - ) as "_AIRBYTE_END_AT", - case when row_number() over ( - partition by id, currency, cast(nzd as varchar2(4000)) - order by - "DATE" desc nulls last, - "_AIRBYTE_EMITTED_AT" desc - ) = 1 then 1 else 0 end as "_AIRBYTE_ACTIVE_ROW", - "_AIRBYTE_AB_ID", - "_AIRBYTE_EMITTED_AT", - "_AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID" - from input_data -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by - "_AIRBYTE_UNIQUE_KEY", - "_AIRBYTE_START_AT", - "_AIRBYTE_EMITTED_AT" - order by "_AIRBYTE_ACTIVE_ROW" desc, "_AIRBYTE_AB_ID" - ) as "_AIRBYTE_ROW_NUM", - ora_hash( - - "_AIRBYTE_UNIQUE_KEY" || '~' || - - - "_AIRBYTE_START_AT" || '~' || - - - "_AIRBYTE_EMITTED_AT" - - ) as "_AIRBYTE_UNIQUE_KEY_SCD", - scd_data.* - from scd_data -) -select - "_AIRBYTE_UNIQUE_KEY", - "_AIRBYTE_UNIQUE_KEY_SCD", - id, - currency, - "DATE", - timestamp_col, - hkd_special___characters, - hkd_special___characters_1, - nzd, - usd, - "_AIRBYTE_START_AT", - "_AIRBYTE_END_AT", - "_AIRBYTE_ACTIVE_ROW", - "_AIRBYTE_AB_ID", - "_AIRBYTE_EMITTED_AT", - - CURRENT_TIMESTAMP - as "_AIRBYTE_NORMALIZED_AT", - "_AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID" -from dedup_data where "_AIRBYTE_ROW_NUM" = 1 \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql deleted file mode 100644 index e8d34d5c48dfc..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql +++ /dev/null @@ -1,28 +0,0 @@ - - - create table test_normalization.dedup_exchange_rate__dbt_tmp - - as - --- Final base SQL model --- depends_on: test_normalization.dedup_exchange_rate_scd -select - "_AIRBYTE_UNIQUE_KEY", - id, - currency, - "DATE", - timestamp_col, - hkd_special___characters, - hkd_special___characters_1, - nzd, - usd, - "_AIRBYTE_AB_ID", - "_AIRBYTE_EMITTED_AT", - - CURRENT_TIMESTAMP - as "_AIRBYTE_NORMALIZED_AT", - "_AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID" -from test_normalization.dedup_exchange_rate_scd --- dedup_exchange_rate from test_normalization.airbyte_raw_dedup_exchange_rate -where 1 = 1 -and "_AIRBYTE_ACTIVE_ROW" = 1 diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql deleted file mode 100644 index 4292befa848b8..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql +++ /dev/null @@ -1,153 +0,0 @@ - - - create table test_normalization.exchange_rate__dbt_tmp - - as - -with dbt__cte__exchange_rate_ab1__ as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: test_normalization.airbyte_raw_exchange_rate -select - json_value("_AIRBYTE_DATA", '$."id"') as id, - json_value("_AIRBYTE_DATA", '$."currency"') as currency, - json_value("_AIRBYTE_DATA", '$."date"') as "DATE", - json_value("_AIRBYTE_DATA", '$."timestamp_col"') as timestamp_col, - json_value("_AIRBYTE_DATA", '$."HKD@spéçiäl & characters"') as hkd_special___characters, - json_value("_AIRBYTE_DATA", '$."HKD_special___characters"') as hkd_special___characters_1, - json_value("_AIRBYTE_DATA", '$."NZD"') as nzd, - json_value("_AIRBYTE_DATA", '$."USD"') as usd, - json_value("_AIRBYTE_DATA", '$."column___with__quotes"') as column___with__quotes, - json_value("_AIRBYTE_DATA", '$."datetime_tz"') as datetime_tz, - json_value("_AIRBYTE_DATA", '$."datetime_no_tz"') as datetime_no_tz, - json_value("_AIRBYTE_DATA", '$."time_tz"') as time_tz, - json_value("_AIRBYTE_DATA", '$."time_no_tz"') as time_no_tz, - "_AIRBYTE_AB_ID", - "_AIRBYTE_EMITTED_AT", - - CURRENT_TIMESTAMP - as "_AIRBYTE_NORMALIZED_AT" -from test_normalization.airbyte_raw_exchange_rate --- exchange_rate -where 1 = 1 -), dbt__cte__exchange_rate_ab2__ as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: dbt__cte__exchange_rate_ab1__ -select - cast(id as - numeric -) as id, - cast(currency as varchar2(4000)) as currency, - cast(nullif("DATE", '') as - varchar2(4000) -) as "DATE", - cast(nullif(timestamp_col, '') as - varchar2(4000) -) as timestamp_col, - cast(hkd_special___characters as - float -) as hkd_special___characters, - cast(hkd_special___characters_1 as varchar2(4000)) as hkd_special___characters_1, - cast(nzd as - float -) as nzd, - cast(usd as - float -) as usd, - cast(column___with__quotes as varchar2(4000)) as column___with__quotes, - cast(nullif(datetime_tz, '') as - varchar2(4000) -) as datetime_tz, - cast(nullif(datetime_no_tz, '') as - varchar2(4000) -) as datetime_no_tz, - cast(nullif(time_tz, '') as - varchar2(4000) -) as time_tz, - cast(nullif(time_no_tz, '') as - varchar2(4000) -) as time_no_tz, - "_AIRBYTE_AB_ID", - "_AIRBYTE_EMITTED_AT", - - CURRENT_TIMESTAMP - as "_AIRBYTE_NORMALIZED_AT" -from dbt__cte__exchange_rate_ab1__ --- exchange_rate -where 1 = 1 -), dbt__cte__exchange_rate_ab3__ as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: dbt__cte__exchange_rate_ab2__ -select - ora_hash( - - id || '~' || - - - currency || '~' || - - - "DATE" || '~' || - - - timestamp_col || '~' || - - - hkd_special___characters || '~' || - - - hkd_special___characters_1 || '~' || - - - nzd || '~' || - - - usd || '~' || - - - column___with__quotes || '~' || - - - datetime_tz || '~' || - - - datetime_no_tz || '~' || - - - time_tz || '~' || - - - time_no_tz - - ) as "_AIRBYTE_EXCHANGE_RATE_HASHID", - tmp.* -from dbt__cte__exchange_rate_ab2__ tmp --- exchange_rate -where 1 = 1 -)-- Final base SQL model --- depends_on: dbt__cte__exchange_rate_ab3__ -select - id, - currency, - "DATE", - timestamp_col, - hkd_special___characters, - hkd_special___characters_1, - nzd, - usd, - column___with__quotes, - datetime_tz, - datetime_no_tz, - time_tz, - time_no_tz, - "_AIRBYTE_AB_ID", - "_AIRBYTE_EMITTED_AT", - - CURRENT_TIMESTAMP - as "_AIRBYTE_NORMALIZED_AT", - "_AIRBYTE_EXCHANGE_RATE_HASHID" -from dbt__cte__exchange_rate_ab3__ --- exchange_rate from test_normalization.airbyte_raw_exchange_rate -where 1 = 1 \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql deleted file mode 100644 index e1ad3ce68244f..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql +++ /dev/null @@ -1,94 +0,0 @@ - - create view test_normalization.dedup_exchange_rate_stg__dbt_tmp as - -with dbt__cte__dedup_exchange_rate_ab1__ as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: test_normalization.airbyte_raw_dedup_exchange_rate -select - json_value("_AIRBYTE_DATA", '$."id"') as id, - json_value("_AIRBYTE_DATA", '$."currency"') as currency, - json_value("_AIRBYTE_DATA", '$."date"') as "DATE", - json_value("_AIRBYTE_DATA", '$."timestamp_col"') as timestamp_col, - json_value("_AIRBYTE_DATA", '$."HKD@spéçiäl & characters"') as hkd_special___characters, - json_value("_AIRBYTE_DATA", '$."HKD_special___characters"') as hkd_special___characters_1, - json_value("_AIRBYTE_DATA", '$."NZD"') as nzd, - json_value("_AIRBYTE_DATA", '$."USD"') as usd, - "_AIRBYTE_AB_ID", - "_AIRBYTE_EMITTED_AT", - - CURRENT_TIMESTAMP - as "_AIRBYTE_NORMALIZED_AT" -from test_normalization.airbyte_raw_dedup_exchange_rate --- dedup_exchange_rate -where 1 = 1 - -), dbt__cte__dedup_exchange_rate_ab2__ as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: dbt__cte__dedup_exchange_rate_ab1__ -select - cast(id as - numeric -) as id, - cast(currency as varchar2(4000)) as currency, - cast(nullif("DATE", '') as - varchar2(4000) -) as "DATE", - cast(nullif(timestamp_col, '') as - varchar2(4000) -) as timestamp_col, - cast(hkd_special___characters as - float -) as hkd_special___characters, - cast(hkd_special___characters_1 as varchar2(4000)) as hkd_special___characters_1, - cast(nzd as - float -) as nzd, - cast(usd as - float -) as usd, - "_AIRBYTE_AB_ID", - "_AIRBYTE_EMITTED_AT", - - CURRENT_TIMESTAMP - as "_AIRBYTE_NORMALIZED_AT" -from dbt__cte__dedup_exchange_rate_ab1__ --- dedup_exchange_rate -where 1 = 1 - -)-- SQL model to build a hash column based on the values of this record --- depends_on: dbt__cte__dedup_exchange_rate_ab2__ -select - ora_hash( - - id || '~' || - - - currency || '~' || - - - "DATE" || '~' || - - - timestamp_col || '~' || - - - hkd_special___characters || '~' || - - - hkd_special___characters_1 || '~' || - - - nzd || '~' || - - - usd - - ) as "_AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID", - tmp.* -from dbt__cte__dedup_exchange_rate_ab2__ tmp --- dedup_exchange_rate -where 1 = 1 - - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql deleted file mode 100644 index 5c34c11584562..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql +++ /dev/null @@ -1,87 +0,0 @@ - - create view test_normalization.multiple_column_names_conflicts_stg__dbt_tmp as - -with dbt__cte__multiple_column_names_conflicts_ab1__ as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: test_normalization.airbyte_raw_multiple_column_names_conflicts -select - json_value("_AIRBYTE_DATA", '$."id"') as id, - json_value("_AIRBYTE_DATA", '$."User Id"') as user_id, - json_value("_AIRBYTE_DATA", '$."user_id"') as user_id_1, - json_value("_AIRBYTE_DATA", '$."User id"') as user_id_2, - json_value("_AIRBYTE_DATA", '$."user id"') as user_id_3, - json_value("_AIRBYTE_DATA", '$."User@Id"') as user_id_4, - json_value("_AIRBYTE_DATA", '$."UserId"') as userid, - "_AIRBYTE_AB_ID", - "_AIRBYTE_EMITTED_AT", - - CURRENT_TIMESTAMP - as "_AIRBYTE_NORMALIZED_AT" -from test_normalization.airbyte_raw_multiple_column_names_conflicts --- multiple_column_names_conflicts -where 1 = 1 - -), dbt__cte__multiple_column_names_conflicts_ab2__ as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: dbt__cte__multiple_column_names_conflicts_ab1__ -select - cast(id as - numeric -) as id, - cast(user_id as varchar2(4000)) as user_id, - cast(user_id_1 as - float -) as user_id_1, - cast(user_id_2 as - float -) as user_id_2, - cast(user_id_3 as - float -) as user_id_3, - cast(user_id_4 as varchar2(4000)) as user_id_4, - cast(userid as - float -) as userid, - "_AIRBYTE_AB_ID", - "_AIRBYTE_EMITTED_AT", - - CURRENT_TIMESTAMP - as "_AIRBYTE_NORMALIZED_AT" -from dbt__cte__multiple_column_names_conflicts_ab1__ --- multiple_column_names_conflicts -where 1 = 1 - -)-- SQL model to build a hash column based on the values of this record --- depends_on: dbt__cte__multiple_column_names_conflicts_ab2__ -select - ora_hash( - - id || '~' || - - - user_id || '~' || - - - user_id_1 || '~' || - - - user_id_2 || '~' || - - - user_id_3 || '~' || - - - user_id_4 || '~' || - - - userid - - ) as "_AIRBYTE_MULTIPLE_COLUMN_NAMES_CONFLICTS_HASHID", - tmp.* -from dbt__cte__multiple_column_names_conflicts_ab2__ tmp --- multiple_column_names_conflicts -where 1 = 1 - - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql deleted file mode 100644 index f6b2863d9c445..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql +++ /dev/null @@ -1,24 +0,0 @@ -{{ config( - unique_key = quote('_AIRBYTE_AB_ID'), - schema = "test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ source('test_normalization', 'airbyte_raw_dedup_exchange_rate') }} -select - {{ json_extract_scalar(quote('_AIRBYTE_DATA'), ['id'], ['id']) }} as id, - {{ json_extract_scalar(quote('_AIRBYTE_DATA'), ['currency'], ['currency']) }} as currency, - {{ json_extract_scalar(quote('_AIRBYTE_DATA'), ['date'], ['date']) }} as {{ quote('DATE') }}, - {{ json_extract_scalar(quote('_AIRBYTE_DATA'), ['timestamp_col'], ['timestamp_col']) }} as timestamp_col, - {{ json_extract_scalar(quote('_AIRBYTE_DATA'), ['HKD@spéçiäl & characters'], ['HKD@spéçiäl & characters']) }} as hkd_special___characters, - {{ json_extract_scalar(quote('_AIRBYTE_DATA'), ['HKD_special___characters'], ['HKD_special___characters']) }} as hkd_special___characters_1, - {{ json_extract_scalar(quote('_AIRBYTE_DATA'), ['NZD'], ['NZD']) }} as nzd, - {{ json_extract_scalar(quote('_AIRBYTE_DATA'), ['USD'], ['USD']) }} as usd, - {{ quote('_AIRBYTE_AB_ID') }}, - {{ quote('_AIRBYTE_EMITTED_AT') }}, - {{ current_timestamp() }} as {{ quote('_AIRBYTE_NORMALIZED_AT') }} -from {{ source('test_normalization', 'airbyte_raw_dedup_exchange_rate') }} --- dedup_exchange_rate -where 1 = 1 -{{ incremental_clause(quote('_AIRBYTE_EMITTED_AT'), this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql deleted file mode 100644 index f3158bc2e9193..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql +++ /dev/null @@ -1,24 +0,0 @@ -{{ config( - unique_key = quote('_AIRBYTE_AB_ID'), - schema = "test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: {{ ref('dedup_exchange_rate_ab1') }} -select - cast(id as {{ dbt_utils.type_bigint() }}) as id, - cast(currency as {{ dbt_utils.type_string() }}) as currency, - cast({{ empty_string_to_null(quote('DATE')) }} as {{ type_date() }}) as {{ quote('DATE') }}, - cast({{ empty_string_to_null('timestamp_col') }} as {{ type_timestamp_with_timezone() }}) as timestamp_col, - cast(hkd_special___characters as {{ dbt_utils.type_float() }}) as hkd_special___characters, - cast(hkd_special___characters_1 as {{ dbt_utils.type_string() }}) as hkd_special___characters_1, - cast(nzd as {{ dbt_utils.type_float() }}) as nzd, - cast(usd as {{ dbt_utils.type_float() }}) as usd, - {{ quote('_AIRBYTE_AB_ID') }}, - {{ quote('_AIRBYTE_EMITTED_AT') }}, - {{ current_timestamp() }} as {{ quote('_AIRBYTE_NORMALIZED_AT') }} -from {{ ref('dedup_exchange_rate_ab1') }} --- dedup_exchange_rate -where 1 = 1 -{{ incremental_clause(quote('_AIRBYTE_EMITTED_AT'), this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql deleted file mode 100644 index 9320dbc51f60f..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ /dev/null @@ -1,174 +0,0 @@ -{{ config( - unique_key = "{{ quote('_AIRBYTE_UNIQUE_KEY_SCD') }}", - schema = "test_normalization", - post_hook = [" - {% - set final_table_relation = adapter.get_relation( - database=this.database, - schema=this.schema, - identifier='dedup_exchange_rate' - ) - %} - {# - If the final table doesn't exist, then obviously we can't delete anything from it. - Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) - So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) - #} - {% - if final_table_relation is not none and quote('_AIRBYTE_UNIQUE_KEY') in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') - %} - -- Delete records which are no longer active: - -- This query is equivalent, but the left join version is more performant: - -- delete from final_table where unique_key in ( - -- select unique_key from scd_table where 1 = 1 - -- ) and unique_key not in ( - -- select unique_key from scd_table where active_row = 1 - -- ) - -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD - -- entries that were _updated_ recently. This is because a deleted record will have an SCD record - -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. - delete from {{ final_table_relation }} where {{ final_table_relation }}.{{ quote('_AIRBYTE_UNIQUE_KEY') }} in ( - select recent_records.unique_key - from ( - select distinct {{ quote('_AIRBYTE_UNIQUE_KEY') }} as unique_key - from {{ this }} - where 1=1 {{ incremental_clause(quote('_AIRBYTE_NORMALIZED_AT'), this.schema + '.' + quote('dedup_exchange_rate')) }} - ) recent_records - left join ( - select {{ quote('_AIRBYTE_UNIQUE_KEY') }} as unique_key, count({{ quote('_AIRBYTE_UNIQUE_KEY') }}) as active_count - from {{ this }} - where {{ quote('_AIRBYTE_ACTIVE_ROW') }} = 1 {{ incremental_clause(quote('_AIRBYTE_NORMALIZED_AT'), this.schema + '.' + quote('dedup_exchange_rate')) }} - group by {{ quote('_AIRBYTE_UNIQUE_KEY') }} - ) active_counts - on recent_records.unique_key = active_counts.unique_key - where active_count is null or active_count = 0 - ) - {% else %} - -- We have to have a non-empty query, so just do a noop delete - delete from {{ this }} where 1=0 - {% endif %} - ","drop view test_normalization.dedup_exchange_rate_stg"], - tags = [ "top-level" ] -) }} --- depends_on: ref('dedup_exchange_rate_stg') -with -{% if is_incremental() %} -new_data as ( - -- retrieve incremental "new" data - select - * - from {{ ref('dedup_exchange_rate_stg') }} - -- dedup_exchange_rate from {{ source('test_normalization', 'airbyte_raw_dedup_exchange_rate') }} - where 1 = 1 - {{ incremental_clause(quote('_AIRBYTE_EMITTED_AT'), this) }} -), -new_data_ids as ( - -- build a subset of {{ quote('_AIRBYTE_UNIQUE_KEY') }} from rows that are new - select distinct - {{ dbt_utils.surrogate_key([ - 'id', - 'currency', - 'nzd', - ]) }} as {{ quote('_AIRBYTE_UNIQUE_KEY') }} - from new_data -), -empty_new_data as ( - -- build an empty table to only keep the table's column types - select * from new_data where 1 = 0 -), -previous_active_scd_data as ( - -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes - select - {{ star_intersect(ref('dedup_exchange_rate_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} - from {{ this }} as this_data - -- make a join with new_data using primary key to filter active data that need to be updated only - join new_data_ids on this_data.{{ quote('_AIRBYTE_UNIQUE_KEY') }} = new_data_ids.{{ quote('_AIRBYTE_UNIQUE_KEY') }} - -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) - left join empty_new_data as inc_data on this_data.{{ quote('_AIRBYTE_AB_ID') }} = inc_data.{{ quote('_AIRBYTE_AB_ID') }} - where {{ quote('_AIRBYTE_ACTIVE_ROW') }} = 1 -), -input_data as ( - select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from new_data - union all - select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from previous_active_scd_data -), -{% else %} -input_data as ( - select * - from {{ ref('dedup_exchange_rate_stg') }} - -- dedup_exchange_rate from {{ source('test_normalization', 'airbyte_raw_dedup_exchange_rate') }} -), -{% endif %} -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - {{ dbt_utils.surrogate_key([ - 'id', - 'currency', - 'nzd', - ]) }} as {{ quote('_AIRBYTE_UNIQUE_KEY') }}, - id, - currency, - {{ quote('DATE') }}, - timestamp_col, - hkd_special___characters, - hkd_special___characters_1, - nzd, - usd, - {{ quote('DATE') }} as {{ quote('_AIRBYTE_START_AT') }}, - lag({{ quote('DATE') }}) over ( - partition by id, currency, cast(nzd as {{ dbt_utils.type_string() }}) - order by - {{ quote('DATE') }} desc nulls last, - {{ quote('_AIRBYTE_EMITTED_AT') }} desc - ) as {{ quote('_AIRBYTE_END_AT') }}, - case when row_number() over ( - partition by id, currency, cast(nzd as {{ dbt_utils.type_string() }}) - order by - {{ quote('DATE') }} desc nulls last, - {{ quote('_AIRBYTE_EMITTED_AT') }} desc - ) = 1 then 1 else 0 end as {{ quote('_AIRBYTE_ACTIVE_ROW') }}, - {{ quote('_AIRBYTE_AB_ID') }}, - {{ quote('_AIRBYTE_EMITTED_AT') }}, - {{ quote('_AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID') }} - from input_data -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by - {{ quote('_AIRBYTE_UNIQUE_KEY') }}, - {{ quote('_AIRBYTE_START_AT') }}, - {{ quote('_AIRBYTE_EMITTED_AT') }} - order by {{ quote('_AIRBYTE_ACTIVE_ROW') }} desc, {{ quote('_AIRBYTE_AB_ID') }} - ) as {{ quote('_AIRBYTE_ROW_NUM') }}, - {{ dbt_utils.surrogate_key([ - quote('_AIRBYTE_UNIQUE_KEY'), - quote('_AIRBYTE_START_AT'), - quote('_AIRBYTE_EMITTED_AT') - ]) }} as {{ quote('_AIRBYTE_UNIQUE_KEY_SCD') }}, - scd_data.* - from scd_data -) -select - {{ quote('_AIRBYTE_UNIQUE_KEY') }}, - {{ quote('_AIRBYTE_UNIQUE_KEY_SCD') }}, - id, - currency, - {{ quote('DATE') }}, - timestamp_col, - hkd_special___characters, - hkd_special___characters_1, - nzd, - usd, - {{ quote('_AIRBYTE_START_AT') }}, - {{ quote('_AIRBYTE_END_AT') }}, - {{ quote('_AIRBYTE_ACTIVE_ROW') }}, - {{ quote('_AIRBYTE_AB_ID') }}, - {{ quote('_AIRBYTE_EMITTED_AT') }}, - {{ current_timestamp() }} as {{ quote('_AIRBYTE_NORMALIZED_AT') }}, - {{ quote('_AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID') }} -from dedup_data where {{ quote('_AIRBYTE_ROW_NUM') }} = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql deleted file mode 100644 index 316e400418353..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql +++ /dev/null @@ -1,27 +0,0 @@ -{{ config( - unique_key = "{{ quote('_AIRBYTE_UNIQUE_KEY') }}", - schema = "test_normalization", - tags = [ "top-level" ] -) }} --- Final base SQL model --- depends_on: {{ ref('dedup_exchange_rate_scd') }} -select - {{ quote('_AIRBYTE_UNIQUE_KEY') }}, - id, - currency, - {{ quote('DATE') }}, - timestamp_col, - hkd_special___characters, - hkd_special___characters_1, - nzd, - usd, - {{ quote('_AIRBYTE_AB_ID') }}, - {{ quote('_AIRBYTE_EMITTED_AT') }}, - {{ current_timestamp() }} as {{ quote('_AIRBYTE_NORMALIZED_AT') }}, - {{ quote('_AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID') }} -from {{ ref('dedup_exchange_rate_scd') }} --- dedup_exchange_rate from {{ source('test_normalization', 'airbyte_raw_dedup_exchange_rate') }} -where 1 = 1 -and {{ quote('_AIRBYTE_ACTIVE_ROW') }} = 1 -{{ incremental_clause(quote('_AIRBYTE_EMITTED_AT'), this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql deleted file mode 100644 index 2fa5061764670..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql +++ /dev/null @@ -1,29 +0,0 @@ -{{ config( - unique_key = quote('_AIRBYTE_AB_ID'), - schema = "test_normalization", - tags = [ "top-level" ] -) }} --- Final base SQL model --- depends_on: {{ ref('exchange_rate_ab3') }} -select - id, - currency, - {{ quote('DATE') }}, - timestamp_col, - hkd_special___characters, - hkd_special___characters_1, - nzd, - usd, - column___with__quotes, - datetime_tz, - datetime_no_tz, - time_tz, - time_no_tz, - {{ quote('_AIRBYTE_AB_ID') }}, - {{ quote('_AIRBYTE_EMITTED_AT') }}, - {{ current_timestamp() }} as {{ quote('_AIRBYTE_NORMALIZED_AT') }}, - {{ quote('_AIRBYTE_EXCHANGE_RATE_HASHID') }} -from {{ ref('exchange_rate_ab3') }} --- exchange_rate from {{ source('test_normalization', 'airbyte_raw_exchange_rate') }} -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql deleted file mode 100644 index 15c9c07d71e9a..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql +++ /dev/null @@ -1,24 +0,0 @@ -{{ config( - unique_key = quote('_AIRBYTE_AB_ID'), - schema = "test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to build a hash column based on the values of this record --- depends_on: {{ ref('dedup_exchange_rate_ab2') }} -select - {{ dbt_utils.surrogate_key([ - 'id', - 'currency', - quote('DATE'), - 'timestamp_col', - 'hkd_special___characters', - 'hkd_special___characters_1', - 'nzd', - 'usd', - ]) }} as {{ quote('_AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID') }}, - tmp.* -from {{ ref('dedup_exchange_rate_ab2') }} tmp --- dedup_exchange_rate -where 1 = 1 -{{ incremental_clause(quote('_AIRBYTE_EMITTED_AT'), this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/sources.yml deleted file mode 100644 index 6fc61e6c97e1f..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/sources.yml +++ /dev/null @@ -1,16 +0,0 @@ -version: 2 -sources: -- name: test_normalization - quoting: - database: true - schema: false - identifier: false - tables: - - name: airbyte_raw_1_prefix_startwith_number - - name: airbyte_raw_dedup_cdc_excluded - - name: airbyte_raw_dedup_exchange_rate - - name: airbyte_raw_exchange_rate - - name: airbyte_raw_multiple_column_names_conflicts - - name: airbyte_raw_pos_dedup_cdcx - - name: airbyte_raw_renamed_dedup_cdc_excluded - - name: airbyte_raw_types_testing diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql deleted file mode 100644 index cfd186b006ae3..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ /dev/null @@ -1,101 +0,0 @@ - - - create table test_normalization.dedup_exchange_rate_scd__dbt_tmp - - as - --- depends_on: ref('dedup_exchange_rate_stg') -with - -input_data as ( - select * - from test_normalization.dedup_exchange_rate_stg - -- dedup_exchange_rate from test_normalization.airbyte_raw_dedup_exchange_rate -), - -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - ora_hash( - - id || '~' || - - - currency || '~' || - - - nzd - - ) as "_AIRBYTE_UNIQUE_KEY", - id, - currency, - "DATE", - timestamp_col, - hkd_special___characters, - hkd_special___characters_1, - nzd, - usd, - "DATE" as "_AIRBYTE_START_AT", - lag("DATE") over ( - partition by id, currency, cast(nzd as varchar2(4000)) - order by - "DATE" desc nulls last, - "_AIRBYTE_EMITTED_AT" desc - ) as "_AIRBYTE_END_AT", - case when row_number() over ( - partition by id, currency, cast(nzd as varchar2(4000)) - order by - "DATE" desc nulls last, - "_AIRBYTE_EMITTED_AT" desc - ) = 1 then 1 else 0 end as "_AIRBYTE_ACTIVE_ROW", - "_AIRBYTE_AB_ID", - "_AIRBYTE_EMITTED_AT", - "_AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID" - from input_data -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by - "_AIRBYTE_UNIQUE_KEY", - "_AIRBYTE_START_AT", - "_AIRBYTE_EMITTED_AT" - order by "_AIRBYTE_ACTIVE_ROW" desc, "_AIRBYTE_AB_ID" - ) as "_AIRBYTE_ROW_NUM", - ora_hash( - - "_AIRBYTE_UNIQUE_KEY" || '~' || - - - "_AIRBYTE_START_AT" || '~' || - - - "_AIRBYTE_EMITTED_AT" - - ) as "_AIRBYTE_UNIQUE_KEY_SCD", - scd_data.* - from scd_data -) -select - "_AIRBYTE_UNIQUE_KEY", - "_AIRBYTE_UNIQUE_KEY_SCD", - id, - currency, - "DATE", - timestamp_col, - hkd_special___characters, - hkd_special___characters_1, - nzd, - usd, - "_AIRBYTE_START_AT", - "_AIRBYTE_END_AT", - "_AIRBYTE_ACTIVE_ROW", - "_AIRBYTE_AB_ID", - "_AIRBYTE_EMITTED_AT", - - CURRENT_TIMESTAMP - as "_AIRBYTE_NORMALIZED_AT", - "_AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID" -from dedup_data where "_AIRBYTE_ROW_NUM" = 1 \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql deleted file mode 100644 index e8d34d5c48dfc..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql +++ /dev/null @@ -1,28 +0,0 @@ - - - create table test_normalization.dedup_exchange_rate__dbt_tmp - - as - --- Final base SQL model --- depends_on: test_normalization.dedup_exchange_rate_scd -select - "_AIRBYTE_UNIQUE_KEY", - id, - currency, - "DATE", - timestamp_col, - hkd_special___characters, - hkd_special___characters_1, - nzd, - usd, - "_AIRBYTE_AB_ID", - "_AIRBYTE_EMITTED_AT", - - CURRENT_TIMESTAMP - as "_AIRBYTE_NORMALIZED_AT", - "_AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID" -from test_normalization.dedup_exchange_rate_scd --- dedup_exchange_rate from test_normalization.airbyte_raw_dedup_exchange_rate -where 1 = 1 -and "_AIRBYTE_ACTIVE_ROW" = 1 diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql deleted file mode 100644 index 4292befa848b8..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql +++ /dev/null @@ -1,153 +0,0 @@ - - - create table test_normalization.exchange_rate__dbt_tmp - - as - -with dbt__cte__exchange_rate_ab1__ as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: test_normalization.airbyte_raw_exchange_rate -select - json_value("_AIRBYTE_DATA", '$."id"') as id, - json_value("_AIRBYTE_DATA", '$."currency"') as currency, - json_value("_AIRBYTE_DATA", '$."date"') as "DATE", - json_value("_AIRBYTE_DATA", '$."timestamp_col"') as timestamp_col, - json_value("_AIRBYTE_DATA", '$."HKD@spéçiäl & characters"') as hkd_special___characters, - json_value("_AIRBYTE_DATA", '$."HKD_special___characters"') as hkd_special___characters_1, - json_value("_AIRBYTE_DATA", '$."NZD"') as nzd, - json_value("_AIRBYTE_DATA", '$."USD"') as usd, - json_value("_AIRBYTE_DATA", '$."column___with__quotes"') as column___with__quotes, - json_value("_AIRBYTE_DATA", '$."datetime_tz"') as datetime_tz, - json_value("_AIRBYTE_DATA", '$."datetime_no_tz"') as datetime_no_tz, - json_value("_AIRBYTE_DATA", '$."time_tz"') as time_tz, - json_value("_AIRBYTE_DATA", '$."time_no_tz"') as time_no_tz, - "_AIRBYTE_AB_ID", - "_AIRBYTE_EMITTED_AT", - - CURRENT_TIMESTAMP - as "_AIRBYTE_NORMALIZED_AT" -from test_normalization.airbyte_raw_exchange_rate --- exchange_rate -where 1 = 1 -), dbt__cte__exchange_rate_ab2__ as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: dbt__cte__exchange_rate_ab1__ -select - cast(id as - numeric -) as id, - cast(currency as varchar2(4000)) as currency, - cast(nullif("DATE", '') as - varchar2(4000) -) as "DATE", - cast(nullif(timestamp_col, '') as - varchar2(4000) -) as timestamp_col, - cast(hkd_special___characters as - float -) as hkd_special___characters, - cast(hkd_special___characters_1 as varchar2(4000)) as hkd_special___characters_1, - cast(nzd as - float -) as nzd, - cast(usd as - float -) as usd, - cast(column___with__quotes as varchar2(4000)) as column___with__quotes, - cast(nullif(datetime_tz, '') as - varchar2(4000) -) as datetime_tz, - cast(nullif(datetime_no_tz, '') as - varchar2(4000) -) as datetime_no_tz, - cast(nullif(time_tz, '') as - varchar2(4000) -) as time_tz, - cast(nullif(time_no_tz, '') as - varchar2(4000) -) as time_no_tz, - "_AIRBYTE_AB_ID", - "_AIRBYTE_EMITTED_AT", - - CURRENT_TIMESTAMP - as "_AIRBYTE_NORMALIZED_AT" -from dbt__cte__exchange_rate_ab1__ --- exchange_rate -where 1 = 1 -), dbt__cte__exchange_rate_ab3__ as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: dbt__cte__exchange_rate_ab2__ -select - ora_hash( - - id || '~' || - - - currency || '~' || - - - "DATE" || '~' || - - - timestamp_col || '~' || - - - hkd_special___characters || '~' || - - - hkd_special___characters_1 || '~' || - - - nzd || '~' || - - - usd || '~' || - - - column___with__quotes || '~' || - - - datetime_tz || '~' || - - - datetime_no_tz || '~' || - - - time_tz || '~' || - - - time_no_tz - - ) as "_AIRBYTE_EXCHANGE_RATE_HASHID", - tmp.* -from dbt__cte__exchange_rate_ab2__ tmp --- exchange_rate -where 1 = 1 -)-- Final base SQL model --- depends_on: dbt__cte__exchange_rate_ab3__ -select - id, - currency, - "DATE", - timestamp_col, - hkd_special___characters, - hkd_special___characters_1, - nzd, - usd, - column___with__quotes, - datetime_tz, - datetime_no_tz, - time_tz, - time_no_tz, - "_AIRBYTE_AB_ID", - "_AIRBYTE_EMITTED_AT", - - CURRENT_TIMESTAMP - as "_AIRBYTE_NORMALIZED_AT", - "_AIRBYTE_EXCHANGE_RATE_HASHID" -from dbt__cte__exchange_rate_ab3__ --- exchange_rate from test_normalization.airbyte_raw_exchange_rate -where 1 = 1 \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql deleted file mode 100644 index e1ad3ce68244f..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql +++ /dev/null @@ -1,94 +0,0 @@ - - create view test_normalization.dedup_exchange_rate_stg__dbt_tmp as - -with dbt__cte__dedup_exchange_rate_ab1__ as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: test_normalization.airbyte_raw_dedup_exchange_rate -select - json_value("_AIRBYTE_DATA", '$."id"') as id, - json_value("_AIRBYTE_DATA", '$."currency"') as currency, - json_value("_AIRBYTE_DATA", '$."date"') as "DATE", - json_value("_AIRBYTE_DATA", '$."timestamp_col"') as timestamp_col, - json_value("_AIRBYTE_DATA", '$."HKD@spéçiäl & characters"') as hkd_special___characters, - json_value("_AIRBYTE_DATA", '$."HKD_special___characters"') as hkd_special___characters_1, - json_value("_AIRBYTE_DATA", '$."NZD"') as nzd, - json_value("_AIRBYTE_DATA", '$."USD"') as usd, - "_AIRBYTE_AB_ID", - "_AIRBYTE_EMITTED_AT", - - CURRENT_TIMESTAMP - as "_AIRBYTE_NORMALIZED_AT" -from test_normalization.airbyte_raw_dedup_exchange_rate --- dedup_exchange_rate -where 1 = 1 - -), dbt__cte__dedup_exchange_rate_ab2__ as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: dbt__cte__dedup_exchange_rate_ab1__ -select - cast(id as - numeric -) as id, - cast(currency as varchar2(4000)) as currency, - cast(nullif("DATE", '') as - varchar2(4000) -) as "DATE", - cast(nullif(timestamp_col, '') as - varchar2(4000) -) as timestamp_col, - cast(hkd_special___characters as - float -) as hkd_special___characters, - cast(hkd_special___characters_1 as varchar2(4000)) as hkd_special___characters_1, - cast(nzd as - float -) as nzd, - cast(usd as - float -) as usd, - "_AIRBYTE_AB_ID", - "_AIRBYTE_EMITTED_AT", - - CURRENT_TIMESTAMP - as "_AIRBYTE_NORMALIZED_AT" -from dbt__cte__dedup_exchange_rate_ab1__ --- dedup_exchange_rate -where 1 = 1 - -)-- SQL model to build a hash column based on the values of this record --- depends_on: dbt__cte__dedup_exchange_rate_ab2__ -select - ora_hash( - - id || '~' || - - - currency || '~' || - - - "DATE" || '~' || - - - timestamp_col || '~' || - - - hkd_special___characters || '~' || - - - hkd_special___characters_1 || '~' || - - - nzd || '~' || - - - usd - - ) as "_AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID", - tmp.* -from dbt__cte__dedup_exchange_rate_ab2__ tmp --- dedup_exchange_rate -where 1 = 1 - - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/dbt_project.yml deleted file mode 100755 index 6199d0a669d13..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/dbt_project.yml +++ /dev/null @@ -1,125 +0,0 @@ -name: airbyte_utils -version: '1.0' -config-version: 2 -profile: normalize -model-paths: -- models -docs-paths: -- docs -analysis-paths: -- analysis -test-paths: -- tests -seed-paths: -- data -macro-paths: -- macros -target-path: ../build -log-path: ../logs -packages-install-path: /dbt -clean-targets: -- build -- dbt_modules -quoting: - database: true - schema: false - identifier: true -models: - airbyte_utils: - +materialized: table - generated: - airbyte_ctes: - +tags: airbyte_internal_cte - +materialized: ephemeral - airbyte_incremental: - +tags: incremental_tables - +materialized: incremental - +on_schema_change: sync_all_columns - airbyte_tables: - +tags: normalized_tables - +materialized: table - airbyte_views: - +tags: airbyte_internal_views - +materialized: view -dispatch: -- macro_namespace: dbt_utils - search_order: - - airbyte_utils - - dbt_utils -vars: - json_column: _airbyte_data - models_to_source: - nested_stream_with_c__lting_into_long_names_ab1: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_c__lting_into_long_names_ab2: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_c__lting_into_long_names_stg: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_c__lting_into_long_names_scd: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_c__lting_into_long_names: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - non_nested_stream_wi__lting_into_long_names_ab1: test_normalization._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names - non_nested_stream_wi__lting_into_long_names_ab2: test_normalization._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names - non_nested_stream_wi__lting_into_long_names_ab3: test_normalization._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names - non_nested_stream_wi__lting_into_long_names: test_normalization._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names - some_stream_that_was_empty_ab1: test_normalization._airbyte_raw_some_stream_that_was_empty - some_stream_that_was_empty_ab2: test_normalization._airbyte_raw_some_stream_that_was_empty - some_stream_that_was_empty_stg: test_normalization._airbyte_raw_some_stream_that_was_empty - some_stream_that_was_empty_scd: test_normalization._airbyte_raw_some_stream_that_was_empty - some_stream_that_was_empty: test_normalization._airbyte_raw_some_stream_that_was_empty - simple_stream_with_n__lting_into_long_names_ab1: test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names - simple_stream_with_n__lting_into_long_names_ab2: test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names - simple_stream_with_n__lting_into_long_names_ab3: test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names - simple_stream_with_n__lting_into_long_names: test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names - conflict_stream_name_ab1: test_normalization._airbyte_raw_conflict_stream_name - conflict_stream_name_ab2: test_normalization._airbyte_raw_conflict_stream_name - conflict_stream_name_ab3: test_normalization._airbyte_raw_conflict_stream_name - conflict_stream_name: test_normalization._airbyte_raw_conflict_stream_name - conflict_stream_scalar_ab1: test_normalization._airbyte_raw_conflict_stream_scalar - conflict_stream_scalar_ab2: test_normalization._airbyte_raw_conflict_stream_scalar - conflict_stream_scalar_ab3: test_normalization._airbyte_raw_conflict_stream_scalar - conflict_stream_scalar: test_normalization._airbyte_raw_conflict_stream_scalar - conflict_stream_array_ab1: test_normalization._airbyte_raw_conflict_stream_array - conflict_stream_array_ab2: test_normalization._airbyte_raw_conflict_stream_array - conflict_stream_array_ab3: test_normalization._airbyte_raw_conflict_stream_array - conflict_stream_array: test_normalization._airbyte_raw_conflict_stream_array - unnest_alias_ab1: test_normalization._airbyte_raw_unnest_alias - unnest_alias_ab2: test_normalization._airbyte_raw_unnest_alias - unnest_alias_ab3: test_normalization._airbyte_raw_unnest_alias - unnest_alias: test_normalization._airbyte_raw_unnest_alias - arrays_ab1: test_normalization._airbyte_raw_arrays - arrays_ab2: test_normalization._airbyte_raw_arrays - arrays_ab3: test_normalization._airbyte_raw_arrays - arrays: test_normalization._airbyte_raw_arrays - nested_stream_with_c___long_names_partition_ab1: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_c___long_names_partition_ab2: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_c___long_names_partition_ab3: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_c___long_names_partition: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - conflict_stream_name_conflict_stream_name_ab1: test_normalization._airbyte_raw_conflict_stream_name - conflict_stream_name_conflict_stream_name_ab2: test_normalization._airbyte_raw_conflict_stream_name - conflict_stream_name_conflict_stream_name_ab3: test_normalization._airbyte_raw_conflict_stream_name - conflict_stream_name_conflict_stream_name: test_normalization._airbyte_raw_conflict_stream_name - unnest_alias_children_ab1: test_normalization._airbyte_raw_unnest_alias - unnest_alias_children_ab2: test_normalization._airbyte_raw_unnest_alias - unnest_alias_children_ab3: test_normalization._airbyte_raw_unnest_alias - unnest_alias_children: test_normalization._airbyte_raw_unnest_alias - arrays_nested_array_parent_ab1: test_normalization._airbyte_raw_arrays - arrays_nested_array_parent_ab2: test_normalization._airbyte_raw_arrays - arrays_nested_array_parent_ab3: test_normalization._airbyte_raw_arrays - arrays_nested_array_parent: test_normalization._airbyte_raw_arrays - nested_stream_with_c__ion_double_array_data_ab1: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_c__ion_double_array_data_ab2: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_c__ion_double_array_data_ab3: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_c__ion_double_array_data: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_c___names_partition_data_ab1: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_c___names_partition_data_ab2: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_c___names_partition_data_ab3: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_c___names_partition_data: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - conflict_stream_name___conflict_stream_name_ab1: test_normalization._airbyte_raw_conflict_stream_name - conflict_stream_name___conflict_stream_name_ab2: test_normalization._airbyte_raw_conflict_stream_name - conflict_stream_name___conflict_stream_name_ab3: test_normalization._airbyte_raw_conflict_stream_name - conflict_stream_name___conflict_stream_name: test_normalization._airbyte_raw_conflict_stream_name - unnest_alias_children_owner_ab1: test_normalization._airbyte_raw_unnest_alias - unnest_alias_children_owner_ab2: test_normalization._airbyte_raw_unnest_alias - unnest_alias_children_owner_ab3: test_normalization._airbyte_raw_unnest_alias - unnest_alias_children_owner: test_normalization._airbyte_raw_unnest_alias - unnest_alias_childre__column___with__quotes_ab1: test_normalization._airbyte_raw_unnest_alias - unnest_alias_childre__column___with__quotes_ab2: test_normalization._airbyte_raw_unnest_alias - unnest_alias_childre__column___with__quotes_ab3: test_normalization._airbyte_raw_unnest_alias - unnest_alias_childre__column___with__quotes: test_normalization._airbyte_raw_unnest_alias diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql deleted file mode 100644 index 150407b1fbdf5..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql +++ /dev/null @@ -1,73 +0,0 @@ - - - - create table "postgres".test_normalization."nested_stream_with_c__lting_into_long_names_scd" - as ( - --- depends_on: ref('nested_stream_with_c__lting_into_long_names_stg') -with - -input_data as ( - select * - from "postgres"._airbyte_test_normalization."nested_stream_with_c__lting_into_long_names_stg" - -- nested_stream_with_c__lting_into_long_names from "postgres".test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names -), - -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - md5(cast(coalesce(cast("id" as text), '') as text)) as _airbyte_unique_key, - "id", - "date", - "partition", - "date" as _airbyte_start_at, - lag("date") over ( - partition by "id" - order by - "date" is null asc, - "date" desc, - _airbyte_emitted_at desc - ) as _airbyte_end_at, - case when row_number() over ( - partition by "id" - order by - "date" is null asc, - "date" desc, - _airbyte_emitted_at desc - ) = 1 then 1 else 0 end as _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_nested_stre__nto_long_names_hashid - from input_data -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by - _airbyte_unique_key, - _airbyte_start_at, - _airbyte_emitted_at - order by _airbyte_active_row desc, _airbyte_ab_id - ) as _airbyte_row_num, - md5(cast(coalesce(cast(_airbyte_unique_key as text), '') || '-' || coalesce(cast(_airbyte_start_at as text), '') || '-' || coalesce(cast(_airbyte_emitted_at as text), '') as text)) as _airbyte_unique_key_scd, - scd_data.* - from scd_data -) -select - _airbyte_unique_key, - _airbyte_unique_key_scd, - "id", - "date", - "partition", - _airbyte_start_at, - _airbyte_end_at, - _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_nested_stre__nto_long_names_hashid -from dedup_data where _airbyte_row_num = 1 - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql deleted file mode 100644 index 885ba6546326a..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql +++ /dev/null @@ -1,71 +0,0 @@ - - - - create table "postgres".test_normalization."some_stream_that_was_empty_scd" - as ( - --- depends_on: ref('some_stream_that_was_empty_stg') -with - -input_data as ( - select * - from "postgres"._airbyte_test_normalization."some_stream_that_was_empty_stg" - -- some_stream_that_was_empty from "postgres".test_normalization._airbyte_raw_some_stream_that_was_empty -), - -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - md5(cast(coalesce(cast("id" as text), '') as text)) as _airbyte_unique_key, - "id", - "date", - "date" as _airbyte_start_at, - lag("date") over ( - partition by "id" - order by - "date" is null asc, - "date" desc, - _airbyte_emitted_at desc - ) as _airbyte_end_at, - case when row_number() over ( - partition by "id" - order by - "date" is null asc, - "date" desc, - _airbyte_emitted_at desc - ) = 1 then 1 else 0 end as _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_some_stream_that_was_empty_hashid - from input_data -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by - _airbyte_unique_key, - _airbyte_start_at, - _airbyte_emitted_at - order by _airbyte_active_row desc, _airbyte_ab_id - ) as _airbyte_row_num, - md5(cast(coalesce(cast(_airbyte_unique_key as text), '') || '-' || coalesce(cast(_airbyte_start_at as text), '') || '-' || coalesce(cast(_airbyte_emitted_at as text), '') as text)) as _airbyte_unique_key_scd, - scd_data.* - from scd_data -) -select - _airbyte_unique_key, - _airbyte_unique_key_scd, - "id", - "date", - _airbyte_start_at, - _airbyte_end_at, - _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_some_stream_that_was_empty_hashid -from dedup_data where _airbyte_row_num = 1 - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_c___long_names_partition.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_c___long_names_partition.sql deleted file mode 100644 index c2170eeb4df25..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_c___long_names_partition.sql +++ /dev/null @@ -1,64 +0,0 @@ - - - - create table "postgres".test_normalization."nested_stream_with_c___long_names_partition" - as ( - -with __dbt__cte__nested_stream_with_c___long_names_partition_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "postgres".test_normalization."nested_stream_with_c__lting_into_long_names_scd" -select - _airbyte_nested_stre__nto_long_names_hashid, - jsonb_extract_path("partition", 'double_array_data') as double_array_data, - jsonb_extract_path("partition", 'DATA') as "DATA", - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from "postgres".test_normalization."nested_stream_with_c__lting_into_long_names_scd" as table_alias --- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition -where 1 = 1 -and "partition" is not null - -), __dbt__cte__nested_stream_with_c___long_names_partition_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__nested_stream_with_c___long_names_partition_ab1 -select - _airbyte_nested_stre__nto_long_names_hashid, - double_array_data, - "DATA", - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from __dbt__cte__nested_stream_with_c___long_names_partition_ab1 --- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition -where 1 = 1 - -), __dbt__cte__nested_stream_with_c___long_names_partition_ab3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__nested_stream_with_c___long_names_partition_ab2 -select - md5(cast(coalesce(cast(_airbyte_nested_stre__nto_long_names_hashid as text), '') || '-' || coalesce(cast(double_array_data as text), '') || '-' || coalesce(cast("DATA" as text), '') as text)) as _airbyte_partition_hashid, - tmp.* -from __dbt__cte__nested_stream_with_c___long_names_partition_ab2 tmp --- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition -where 1 = 1 - -)-- Final base SQL model --- depends_on: __dbt__cte__nested_stream_with_c___long_names_partition_ab3 -select - _airbyte_nested_stre__nto_long_names_hashid, - double_array_data, - "DATA", - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_partition_hashid -from __dbt__cte__nested_stream_with_c___long_names_partition_ab3 --- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition from "postgres".test_normalization."nested_stream_with_c__lting_into_long_names_scd" -where 1 = 1 - - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_c___names_partition_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_c___names_partition_data.sql deleted file mode 100644 index 36a8a151153a7..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_c___names_partition_data.sql +++ /dev/null @@ -1,67 +0,0 @@ - - - - create table "postgres".test_normalization."nested_stream_with_c___names_partition_data" - as ( - -with __dbt__cte__nested_stream_with_c___names_partition_data_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "postgres".test_normalization."nested_stream_with_c___long_names_partition" - -select - _airbyte_partition_hashid, - jsonb_extract_path_text(_airbyte_nested_data, 'currency') as currency, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from "postgres".test_normalization."nested_stream_with_c___long_names_partition" as table_alias --- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA -cross join jsonb_array_elements( - case jsonb_typeof("DATA") - when 'array' then "DATA" - else '[]' end - ) as _airbyte_nested_data -where 1 = 1 -and "DATA" is not null - -), __dbt__cte__nested_stream_with_c___names_partition_data_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__nested_stream_with_c___names_partition_data_ab1 -select - _airbyte_partition_hashid, - cast(currency as text) as currency, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from __dbt__cte__nested_stream_with_c___names_partition_data_ab1 --- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA -where 1 = 1 - -), __dbt__cte__nested_stream_with_c___names_partition_data_ab3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__nested_stream_with_c___names_partition_data_ab2 -select - md5(cast(coalesce(cast(_airbyte_partition_hashid as text), '') || '-' || coalesce(cast(currency as text), '') as text)) as _airbyte_data_hashid, - tmp.* -from __dbt__cte__nested_stream_with_c___names_partition_data_ab2 tmp --- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA -where 1 = 1 - -)-- Final base SQL model --- depends_on: __dbt__cte__nested_stream_with_c___names_partition_data_ab3 -select - _airbyte_partition_hashid, - currency, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_data_hashid -from __dbt__cte__nested_stream_with_c___names_partition_data_ab3 --- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA from "postgres".test_normalization."nested_stream_with_c___long_names_partition" -where 1 = 1 - - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_c__ion_double_array_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_c__ion_double_array_data.sql deleted file mode 100644 index 4b6ec78084879..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_c__ion_double_array_data.sql +++ /dev/null @@ -1,67 +0,0 @@ - - - - create table "postgres".test_normalization."nested_stream_with_c__ion_double_array_data" - as ( - -with __dbt__cte__nested_stream_with_c__ion_double_array_data_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "postgres".test_normalization."nested_stream_with_c___long_names_partition" - -select - _airbyte_partition_hashid, - jsonb_extract_path_text(_airbyte_nested_data, 'id') as "id", - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from "postgres".test_normalization."nested_stream_with_c___long_names_partition" as table_alias --- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data -cross join jsonb_array_elements( - case jsonb_typeof(double_array_data) - when 'array' then double_array_data - else '[]' end - ) as _airbyte_nested_data -where 1 = 1 -and double_array_data is not null - -), __dbt__cte__nested_stream_with_c__ion_double_array_data_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__nested_stream_with_c__ion_double_array_data_ab1 -select - _airbyte_partition_hashid, - cast("id" as text) as "id", - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from __dbt__cte__nested_stream_with_c__ion_double_array_data_ab1 --- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data -where 1 = 1 - -), __dbt__cte__nested_stream_with_c__ion_double_array_data_ab3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__nested_stream_with_c__ion_double_array_data_ab2 -select - md5(cast(coalesce(cast(_airbyte_partition_hashid as text), '') || '-' || coalesce(cast("id" as text), '') as text)) as _airbyte_double_array_data_hashid, - tmp.* -from __dbt__cte__nested_stream_with_c__ion_double_array_data_ab2 tmp --- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data -where 1 = 1 - -)-- Final base SQL model --- depends_on: __dbt__cte__nested_stream_with_c__ion_double_array_data_ab3 -select - _airbyte_partition_hashid, - "id", - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_double_array_data_hashid -from __dbt__cte__nested_stream_with_c__ion_double_array_data_ab3 --- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data from "postgres".test_normalization."nested_stream_with_c___long_names_partition" -where 1 = 1 - - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names.sql deleted file mode 100644 index a713c3b75e2f0..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names.sql +++ /dev/null @@ -1,24 +0,0 @@ - - - - create table "postgres".test_normalization."nested_stream_with_c__lting_into_long_names" - as ( - --- Final base SQL model --- depends_on: "postgres".test_normalization."nested_stream_with_c__lting_into_long_names_scd" -select - _airbyte_unique_key, - "id", - "date", - "partition", - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_nested_stre__nto_long_names_hashid -from "postgres".test_normalization."nested_stream_with_c__lting_into_long_names_scd" --- nested_stream_with_c__lting_into_long_names from "postgres".test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names -where 1 = 1 -and _airbyte_active_row = 1 - - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names_stg.sql deleted file mode 100644 index 9062ea955a071..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names_stg.sql +++ /dev/null @@ -1,51 +0,0 @@ - - - - create table "postgres"._airbyte_test_normalization."nested_stream_with_c__lting_into_long_names_stg" - as ( - -with __dbt__cte__nested_stream_with_c__lting_into_long_names_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "postgres".test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names -select - jsonb_extract_path_text(_airbyte_data, 'id') as "id", - jsonb_extract_path_text(_airbyte_data, 'date') as "date", - - jsonb_extract_path(table_alias._airbyte_data, 'partition') - as "partition", - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from "postgres".test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names as table_alias --- nested_stream_with_c__lting_into_long_names -where 1 = 1 - -), __dbt__cte__nested_stream_with_c__lting_into_long_names_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__nested_stream_with_c__lting_into_long_names_ab1 -select - cast("id" as text) as "id", - cast("date" as text) as "date", - cast("partition" as - jsonb -) as "partition", - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from __dbt__cte__nested_stream_with_c__lting_into_long_names_ab1 --- nested_stream_with_c__lting_into_long_names -where 1 = 1 - -)-- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__nested_stream_with_c__lting_into_long_names_ab2 -select - md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast("date" as text), '') || '-' || coalesce(cast("partition" as text), '') as text)) as _airbyte_nested_stre__nto_long_names_hashid, - tmp.* -from __dbt__cte__nested_stream_with_c__lting_into_long_names_ab2 tmp --- nested_stream_with_c__lting_into_long_names -where 1 = 1 - - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/some_stream_that_was_empty.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/some_stream_that_was_empty.sql deleted file mode 100644 index 1556a86262084..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/some_stream_that_was_empty.sql +++ /dev/null @@ -1,23 +0,0 @@ - - - - create table "postgres".test_normalization."some_stream_that_was_empty" - as ( - --- Final base SQL model --- depends_on: "postgres".test_normalization."some_stream_that_was_empty_scd" -select - _airbyte_unique_key, - "id", - "date", - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_some_stream_that_was_empty_hashid -from "postgres".test_normalization."some_stream_that_was_empty_scd" --- some_stream_that_was_empty from "postgres".test_normalization._airbyte_raw_some_stream_that_was_empty -where 1 = 1 -and _airbyte_active_row = 1 - - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/some_stream_that_was_empty_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/some_stream_that_was_empty_stg.sql deleted file mode 100644 index e473519de41aa..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/some_stream_that_was_empty_stg.sql +++ /dev/null @@ -1,45 +0,0 @@ - - - - create table "postgres"._airbyte_test_normalization."some_stream_that_was_empty_stg" - as ( - -with __dbt__cte__some_stream_that_was_empty_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "postgres".test_normalization._airbyte_raw_some_stream_that_was_empty -select - jsonb_extract_path_text(_airbyte_data, 'id') as "id", - jsonb_extract_path_text(_airbyte_data, 'date') as "date", - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from "postgres".test_normalization._airbyte_raw_some_stream_that_was_empty as table_alias --- some_stream_that_was_empty -where 1 = 1 - -), __dbt__cte__some_stream_that_was_empty_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__some_stream_that_was_empty_ab1 -select - cast("id" as text) as "id", - cast("date" as text) as "date", - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from __dbt__cte__some_stream_that_was_empty_ab1 --- some_stream_that_was_empty -where 1 = 1 - -)-- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__some_stream_that_was_empty_ab2 -select - md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast("date" as text), '') as text)) as _airbyte_some_stream_that_was_empty_hashid, - tmp.* -from __dbt__cte__some_stream_that_was_empty_ab2 tmp --- some_stream_that_was_empty -where 1 = 1 - - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization_namespace/simple_stream_with_n__lting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization_namespace/simple_stream_with_n__lting_into_long_names.sql deleted file mode 100644 index aea94f43825c1..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization_namespace/simple_stream_with_n__lting_into_long_names.sql +++ /dev/null @@ -1,60 +0,0 @@ - - - - create table "postgres".test_normalization_namespace."simple_stream_with_n__lting_into_long_names" - as ( - -with __dbt__cte__simple_stream_with_n__lting_into_long_names_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "postgres".test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names -select - jsonb_extract_path_text(_airbyte_data, 'id') as "id", - jsonb_extract_path_text(_airbyte_data, 'date') as "date", - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from "postgres".test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names as table_alias --- simple_stream_with_n__lting_into_long_names -where 1 = 1 - -), __dbt__cte__simple_stream_with_n__lting_into_long_names_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__simple_stream_with_n__lting_into_long_names_ab1 -select - cast("id" as text) as "id", - cast("date" as text) as "date", - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from __dbt__cte__simple_stream_with_n__lting_into_long_names_ab1 --- simple_stream_with_n__lting_into_long_names -where 1 = 1 - -), __dbt__cte__simple_stream_with_n__lting_into_long_names_ab3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__simple_stream_with_n__lting_into_long_names_ab2 -select - md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast("date" as text), '') as text)) as _airbyte_simple_stre__nto_long_names_hashid, - tmp.* -from __dbt__cte__simple_stream_with_n__lting_into_long_names_ab2 tmp --- simple_stream_with_n__lting_into_long_names -where 1 = 1 - -)-- Final base SQL model --- depends_on: __dbt__cte__simple_stream_with_n__lting_into_long_names_ab3 -select - "id", - "date", - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_simple_stre__nto_long_names_hashid -from __dbt__cte__simple_stream_with_n__lting_into_long_names_ab3 --- simple_stream_with_n__lting_into_long_names from "postgres".test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names -where 1 = 1 - - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/arrays.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/arrays.sql deleted file mode 100644 index e10c4619e53a4..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/arrays.sql +++ /dev/null @@ -1,58 +0,0 @@ - - - create table "postgres".test_normalization."arrays__dbt_tmp" - as ( - -with __dbt__cte__arrays_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "postgres".test_normalization._airbyte_raw_arrays -select - jsonb_extract_path(_airbyte_data, 'array_of_strings') as array_of_strings, - - jsonb_extract_path(table_alias._airbyte_data, 'nested_array_parent') - as nested_array_parent, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from "postgres".test_normalization._airbyte_raw_arrays as table_alias --- arrays -where 1 = 1 -), __dbt__cte__arrays_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__arrays_ab1 -select - array_of_strings, - cast(nested_array_parent as - jsonb -) as nested_array_parent, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from __dbt__cte__arrays_ab1 --- arrays -where 1 = 1 -), __dbt__cte__arrays_ab3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__arrays_ab2 -select - md5(cast(coalesce(cast(array_of_strings as text), '') || '-' || coalesce(cast(nested_array_parent as text), '') as text)) as _airbyte_arrays_hashid, - tmp.* -from __dbt__cte__arrays_ab2 tmp --- arrays -where 1 = 1 -)-- Final base SQL model --- depends_on: __dbt__cte__arrays_ab3 -select - array_of_strings, - nested_array_parent, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_arrays_hashid -from __dbt__cte__arrays_ab3 --- arrays from "postgres".test_normalization._airbyte_raw_arrays -where 1 = 1 - ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/arrays_nested_array_parent.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/arrays_nested_array_parent.sql deleted file mode 100644 index 09ad8fe3cd3f9..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/arrays_nested_array_parent.sql +++ /dev/null @@ -1,55 +0,0 @@ - - - create table "postgres".test_normalization."arrays_nested_array_parent__dbt_tmp" - as ( - -with __dbt__cte__arrays_nested_array_parent_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "postgres".test_normalization."arrays" -select - _airbyte_arrays_hashid, - jsonb_extract_path(nested_array_parent, 'nested_array') as nested_array, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from "postgres".test_normalization."arrays" as table_alias --- nested_array_parent at arrays/nested_array_parent -where 1 = 1 -and nested_array_parent is not null -), __dbt__cte__arrays_nested_array_parent_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__arrays_nested_array_parent_ab1 -select - _airbyte_arrays_hashid, - nested_array, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from __dbt__cte__arrays_nested_array_parent_ab1 --- nested_array_parent at arrays/nested_array_parent -where 1 = 1 -), __dbt__cte__arrays_nested_array_parent_ab3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__arrays_nested_array_parent_ab2 -select - md5(cast(coalesce(cast(_airbyte_arrays_hashid as text), '') || '-' || coalesce(cast(nested_array as text), '') as text)) as _airbyte_nested_array_parent_hashid, - tmp.* -from __dbt__cte__arrays_nested_array_parent_ab2 tmp --- nested_array_parent at arrays/nested_array_parent -where 1 = 1 -)-- Final base SQL model --- depends_on: __dbt__cte__arrays_nested_array_parent_ab3 -select - _airbyte_arrays_hashid, - nested_array, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_nested_array_parent_hashid -from __dbt__cte__arrays_nested_array_parent_ab3 --- nested_array_parent at arrays/nested_array_parent from "postgres".test_normalization."arrays" -where 1 = 1 - ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/conflict_stream_array.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/conflict_stream_array.sql deleted file mode 100644 index c1c6ab12a7b7c..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/conflict_stream_array.sql +++ /dev/null @@ -1,54 +0,0 @@ - - - create table "postgres".test_normalization."conflict_stream_array__dbt_tmp" - as ( - -with __dbt__cte__conflict_stream_array_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "postgres".test_normalization._airbyte_raw_conflict_stream_array -select - jsonb_extract_path_text(_airbyte_data, 'id') as "id", - jsonb_extract_path(_airbyte_data, 'conflict_stream_array') as conflict_stream_array, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from "postgres".test_normalization._airbyte_raw_conflict_stream_array as table_alias --- conflict_stream_array -where 1 = 1 -), __dbt__cte__conflict_stream_array_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__conflict_stream_array_ab1 -select - cast("id" as text) as "id", - conflict_stream_array, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from __dbt__cte__conflict_stream_array_ab1 --- conflict_stream_array -where 1 = 1 -), __dbt__cte__conflict_stream_array_ab3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__conflict_stream_array_ab2 -select - md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast(conflict_stream_array as text), '') as text)) as _airbyte_conflict_stream_array_hashid, - tmp.* -from __dbt__cte__conflict_stream_array_ab2 tmp --- conflict_stream_array -where 1 = 1 -)-- Final base SQL model --- depends_on: __dbt__cte__conflict_stream_array_ab3 -select - "id", - conflict_stream_array, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_conflict_stream_array_hashid -from __dbt__cte__conflict_stream_array_ab3 --- conflict_stream_array from "postgres".test_normalization._airbyte_raw_conflict_stream_array -where 1 = 1 - ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/conflict_stream_name.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/conflict_stream_name.sql deleted file mode 100644 index ac5cffb8d00d9..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/conflict_stream_name.sql +++ /dev/null @@ -1,58 +0,0 @@ - - - create table "postgres".test_normalization."conflict_stream_name__dbt_tmp" - as ( - -with __dbt__cte__conflict_stream_name_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "postgres".test_normalization._airbyte_raw_conflict_stream_name -select - jsonb_extract_path_text(_airbyte_data, 'id') as "id", - - jsonb_extract_path(table_alias._airbyte_data, 'conflict_stream_name') - as conflict_stream_name, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from "postgres".test_normalization._airbyte_raw_conflict_stream_name as table_alias --- conflict_stream_name -where 1 = 1 -), __dbt__cte__conflict_stream_name_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__conflict_stream_name_ab1 -select - cast("id" as text) as "id", - cast(conflict_stream_name as - jsonb -) as conflict_stream_name, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from __dbt__cte__conflict_stream_name_ab1 --- conflict_stream_name -where 1 = 1 -), __dbt__cte__conflict_stream_name_ab3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__conflict_stream_name_ab2 -select - md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast(conflict_stream_name as text), '') as text)) as _airbyte_conflict_stream_name_hashid, - tmp.* -from __dbt__cte__conflict_stream_name_ab2 tmp --- conflict_stream_name -where 1 = 1 -)-- Final base SQL model --- depends_on: __dbt__cte__conflict_stream_name_ab3 -select - "id", - conflict_stream_name, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_conflict_stream_name_hashid -from __dbt__cte__conflict_stream_name_ab3 --- conflict_stream_name from "postgres".test_normalization._airbyte_raw_conflict_stream_name -where 1 = 1 - ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/conflict_stream_name___conflict_stream_name.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/conflict_stream_name___conflict_stream_name.sql deleted file mode 100644 index 4aa2c420ed45d..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/conflict_stream_name___conflict_stream_name.sql +++ /dev/null @@ -1,55 +0,0 @@ - - - create table "postgres".test_normalization."conflict_stream_name___conflict_stream_name__dbt_tmp" - as ( - -with __dbt__cte__conflict_stream_name___conflict_stream_name_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "postgres".test_normalization."conflict_stream_name_conflict_stream_name" -select - _airbyte_conflict_stream_name_2_hashid, - jsonb_extract_path_text(conflict_stream_name, 'groups') as groups, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from "postgres".test_normalization."conflict_stream_name_conflict_stream_name" as table_alias --- conflict_stream_name at conflict_stream_name/conflict_stream_name/conflict_stream_name -where 1 = 1 -and conflict_stream_name is not null -), __dbt__cte__conflict_stream_name___conflict_stream_name_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__conflict_stream_name___conflict_stream_name_ab1 -select - _airbyte_conflict_stream_name_2_hashid, - cast(groups as text) as groups, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from __dbt__cte__conflict_stream_name___conflict_stream_name_ab1 --- conflict_stream_name at conflict_stream_name/conflict_stream_name/conflict_stream_name -where 1 = 1 -), __dbt__cte__conflict_stream_name___conflict_stream_name_ab3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__conflict_stream_name___conflict_stream_name_ab2 -select - md5(cast(coalesce(cast(_airbyte_conflict_stream_name_2_hashid as text), '') || '-' || coalesce(cast(groups as text), '') as text)) as _airbyte_conflict_stream_name_3_hashid, - tmp.* -from __dbt__cte__conflict_stream_name___conflict_stream_name_ab2 tmp --- conflict_stream_name at conflict_stream_name/conflict_stream_name/conflict_stream_name -where 1 = 1 -)-- Final base SQL model --- depends_on: __dbt__cte__conflict_stream_name___conflict_stream_name_ab3 -select - _airbyte_conflict_stream_name_2_hashid, - groups, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_conflict_stream_name_3_hashid -from __dbt__cte__conflict_stream_name___conflict_stream_name_ab3 --- conflict_stream_name at conflict_stream_name/conflict_stream_name/conflict_stream_name from "postgres".test_normalization."conflict_stream_name_conflict_stream_name" -where 1 = 1 - ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/conflict_stream_name_conflict_stream_name.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/conflict_stream_name_conflict_stream_name.sql deleted file mode 100644 index 82dfb023674e5..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/conflict_stream_name_conflict_stream_name.sql +++ /dev/null @@ -1,59 +0,0 @@ - - - create table "postgres".test_normalization."conflict_stream_name_conflict_stream_name__dbt_tmp" - as ( - -with __dbt__cte__conflict_stream_name_conflict_stream_name_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "postgres".test_normalization."conflict_stream_name" -select - _airbyte_conflict_stream_name_hashid, - - jsonb_extract_path(table_alias.conflict_stream_name, 'conflict_stream_name') - as conflict_stream_name, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from "postgres".test_normalization."conflict_stream_name" as table_alias --- conflict_stream_name at conflict_stream_name/conflict_stream_name -where 1 = 1 -and conflict_stream_name is not null -), __dbt__cte__conflict_stream_name_conflict_stream_name_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__conflict_stream_name_conflict_stream_name_ab1 -select - _airbyte_conflict_stream_name_hashid, - cast(conflict_stream_name as - jsonb -) as conflict_stream_name, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from __dbt__cte__conflict_stream_name_conflict_stream_name_ab1 --- conflict_stream_name at conflict_stream_name/conflict_stream_name -where 1 = 1 -), __dbt__cte__conflict_stream_name_conflict_stream_name_ab3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__conflict_stream_name_conflict_stream_name_ab2 -select - md5(cast(coalesce(cast(_airbyte_conflict_stream_name_hashid as text), '') || '-' || coalesce(cast(conflict_stream_name as text), '') as text)) as _airbyte_conflict_stream_name_2_hashid, - tmp.* -from __dbt__cte__conflict_stream_name_conflict_stream_name_ab2 tmp --- conflict_stream_name at conflict_stream_name/conflict_stream_name -where 1 = 1 -)-- Final base SQL model --- depends_on: __dbt__cte__conflict_stream_name_conflict_stream_name_ab3 -select - _airbyte_conflict_stream_name_hashid, - conflict_stream_name, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_conflict_stream_name_2_hashid -from __dbt__cte__conflict_stream_name_conflict_stream_name_ab3 --- conflict_stream_name at conflict_stream_name/conflict_stream_name from "postgres".test_normalization."conflict_stream_name" -where 1 = 1 - ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/conflict_stream_scalar.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/conflict_stream_scalar.sql deleted file mode 100644 index 09a4fa01de977..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/conflict_stream_scalar.sql +++ /dev/null @@ -1,56 +0,0 @@ - - - create table "postgres".test_normalization."conflict_stream_scalar__dbt_tmp" - as ( - -with __dbt__cte__conflict_stream_scalar_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "postgres".test_normalization._airbyte_raw_conflict_stream_scalar -select - jsonb_extract_path_text(_airbyte_data, 'id') as "id", - jsonb_extract_path_text(_airbyte_data, 'conflict_stream_scalar') as conflict_stream_scalar, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from "postgres".test_normalization._airbyte_raw_conflict_stream_scalar as table_alias --- conflict_stream_scalar -where 1 = 1 -), __dbt__cte__conflict_stream_scalar_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__conflict_stream_scalar_ab1 -select - cast("id" as text) as "id", - cast(conflict_stream_scalar as - bigint -) as conflict_stream_scalar, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from __dbt__cte__conflict_stream_scalar_ab1 --- conflict_stream_scalar -where 1 = 1 -), __dbt__cte__conflict_stream_scalar_ab3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__conflict_stream_scalar_ab2 -select - md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast(conflict_stream_scalar as text), '') as text)) as _airbyte_conflict_stream_scalar_hashid, - tmp.* -from __dbt__cte__conflict_stream_scalar_ab2 tmp --- conflict_stream_scalar -where 1 = 1 -)-- Final base SQL model --- depends_on: __dbt__cte__conflict_stream_scalar_ab3 -select - "id", - conflict_stream_scalar, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_conflict_stream_scalar_hashid -from __dbt__cte__conflict_stream_scalar_ab3 --- conflict_stream_scalar from "postgres".test_normalization._airbyte_raw_conflict_stream_scalar -where 1 = 1 - ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/non_nested_stream_wi__lting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/non_nested_stream_wi__lting_into_long_names.sql deleted file mode 100644 index 31d2176c3888c..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/non_nested_stream_wi__lting_into_long_names.sql +++ /dev/null @@ -1,54 +0,0 @@ - - - create table "postgres".test_normalization."non_nested_stream_wi__lting_into_long_names__dbt_tmp" - as ( - -with __dbt__cte__non_nested_stream_wi__lting_into_long_names_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "postgres".test_normalization._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names -select - jsonb_extract_path_text(_airbyte_data, 'id') as "id", - jsonb_extract_path_text(_airbyte_data, 'date') as "date", - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from "postgres".test_normalization._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names as table_alias --- non_nested_stream_wi__lting_into_long_names -where 1 = 1 -), __dbt__cte__non_nested_stream_wi__lting_into_long_names_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__non_nested_stream_wi__lting_into_long_names_ab1 -select - cast("id" as text) as "id", - cast("date" as text) as "date", - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from __dbt__cte__non_nested_stream_wi__lting_into_long_names_ab1 --- non_nested_stream_wi__lting_into_long_names -where 1 = 1 -), __dbt__cte__non_nested_stream_wi__lting_into_long_names_ab3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__non_nested_stream_wi__lting_into_long_names_ab2 -select - md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast("date" as text), '') as text)) as _airbyte_non_nested___nto_long_names_hashid, - tmp.* -from __dbt__cte__non_nested_stream_wi__lting_into_long_names_ab2 tmp --- non_nested_stream_wi__lting_into_long_names -where 1 = 1 -)-- Final base SQL model --- depends_on: __dbt__cte__non_nested_stream_wi__lting_into_long_names_ab3 -select - "id", - "date", - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_non_nested___nto_long_names_hashid -from __dbt__cte__non_nested_stream_wi__lting_into_long_names_ab3 --- non_nested_stream_wi__lting_into_long_names from "postgres".test_normalization._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names -where 1 = 1 - ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/unnest_alias.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/unnest_alias.sql deleted file mode 100644 index 7af2f04f81f87..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/unnest_alias.sql +++ /dev/null @@ -1,56 +0,0 @@ - - - create table "postgres".test_normalization."unnest_alias__dbt_tmp" - as ( - -with __dbt__cte__unnest_alias_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "postgres".test_normalization._airbyte_raw_unnest_alias -select - jsonb_extract_path_text(_airbyte_data, 'id') as "id", - jsonb_extract_path(_airbyte_data, 'children') as children, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from "postgres".test_normalization._airbyte_raw_unnest_alias as table_alias --- unnest_alias -where 1 = 1 -), __dbt__cte__unnest_alias_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__unnest_alias_ab1 -select - cast("id" as - bigint -) as "id", - children, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from __dbt__cte__unnest_alias_ab1 --- unnest_alias -where 1 = 1 -), __dbt__cte__unnest_alias_ab3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__unnest_alias_ab2 -select - md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast(children as text), '') as text)) as _airbyte_unnest_alias_hashid, - tmp.* -from __dbt__cte__unnest_alias_ab2 tmp --- unnest_alias -where 1 = 1 -)-- Final base SQL model --- depends_on: __dbt__cte__unnest_alias_ab3 -select - "id", - children, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_unnest_alias_hashid -from __dbt__cte__unnest_alias_ab3 --- unnest_alias from "postgres".test_normalization._airbyte_raw_unnest_alias -where 1 = 1 - ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/unnest_alias_childre__column___with__quotes.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/unnest_alias_childre__column___with__quotes.sql deleted file mode 100644 index 6688069a62f01..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/unnest_alias_childre__column___with__quotes.sql +++ /dev/null @@ -1,61 +0,0 @@ - - - create table "postgres".test_normalization."unnest_alias_childre__column___with__quotes__dbt_tmp" - as ( - -with __dbt__cte__unnest_alias_childre__column___with__quotes_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "postgres".test_normalization."unnest_alias_children_owner" - -select - _airbyte_owner_hashid, - jsonb_extract_path_text(_airbyte_nested_data, 'currency') as currency, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from "postgres".test_normalization."unnest_alias_children_owner" as table_alias --- column___with__quotes at unnest_alias/children/owner/column`_'with"_quotes -cross join jsonb_array_elements( - case jsonb_typeof("column`_'with""_quotes") - when 'array' then "column`_'with""_quotes" - else '[]' end - ) as _airbyte_nested_data -where 1 = 1 -and "column`_'with""_quotes" is not null -), __dbt__cte__unnest_alias_childre__column___with__quotes_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__unnest_alias_childre__column___with__quotes_ab1 -select - _airbyte_owner_hashid, - cast(currency as text) as currency, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from __dbt__cte__unnest_alias_childre__column___with__quotes_ab1 --- column___with__quotes at unnest_alias/children/owner/column`_'with"_quotes -where 1 = 1 -), __dbt__cte__unnest_alias_childre__column___with__quotes_ab3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__unnest_alias_childre__column___with__quotes_ab2 -select - md5(cast(coalesce(cast(_airbyte_owner_hashid as text), '') || '-' || coalesce(cast(currency as text), '') as text)) as _airbyte_column___with__quotes_hashid, - tmp.* -from __dbt__cte__unnest_alias_childre__column___with__quotes_ab2 tmp --- column___with__quotes at unnest_alias/children/owner/column`_'with"_quotes -where 1 = 1 -)-- Final base SQL model --- depends_on: __dbt__cte__unnest_alias_childre__column___with__quotes_ab3 -select - _airbyte_owner_hashid, - currency, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_column___with__quotes_hashid -from __dbt__cte__unnest_alias_childre__column___with__quotes_ab3 --- column___with__quotes at unnest_alias/children/owner/column`_'with"_quotes from "postgres".test_normalization."unnest_alias_children_owner" -where 1 = 1 - ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/unnest_alias_children.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/unnest_alias_children.sql deleted file mode 100644 index 779394d5765dc..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/unnest_alias_children.sql +++ /dev/null @@ -1,70 +0,0 @@ - - - create table "postgres".test_normalization."unnest_alias_children__dbt_tmp" - as ( - -with __dbt__cte__unnest_alias_children_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "postgres".test_normalization."unnest_alias" - -select - _airbyte_unnest_alias_hashid, - jsonb_extract_path_text(_airbyte_nested_data, 'ab_id') as ab_id, - - jsonb_extract_path(_airbyte_nested_data, 'owner') - as "owner", - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from "postgres".test_normalization."unnest_alias" as table_alias --- children at unnest_alias/children -cross join jsonb_array_elements( - case jsonb_typeof(children) - when 'array' then children - else '[]' end - ) as _airbyte_nested_data -where 1 = 1 -and children is not null -), __dbt__cte__unnest_alias_children_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__unnest_alias_children_ab1 -select - _airbyte_unnest_alias_hashid, - cast(ab_id as - bigint -) as ab_id, - cast("owner" as - jsonb -) as "owner", - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from __dbt__cte__unnest_alias_children_ab1 --- children at unnest_alias/children -where 1 = 1 -), __dbt__cte__unnest_alias_children_ab3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__unnest_alias_children_ab2 -select - md5(cast(coalesce(cast(_airbyte_unnest_alias_hashid as text), '') || '-' || coalesce(cast(ab_id as text), '') || '-' || coalesce(cast("owner" as text), '') as text)) as _airbyte_children_hashid, - tmp.* -from __dbt__cte__unnest_alias_children_ab2 tmp --- children at unnest_alias/children -where 1 = 1 -)-- Final base SQL model --- depends_on: __dbt__cte__unnest_alias_children_ab3 -select - _airbyte_unnest_alias_hashid, - ab_id, - "owner", - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_children_hashid -from __dbt__cte__unnest_alias_children_ab3 --- children at unnest_alias/children from "postgres".test_normalization."unnest_alias" -where 1 = 1 - ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/unnest_alias_children_owner.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/unnest_alias_children_owner.sql deleted file mode 100644 index 651e1c11914eb..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/unnest_alias_children_owner.sql +++ /dev/null @@ -1,60 +0,0 @@ - - - create table "postgres".test_normalization."unnest_alias_children_owner__dbt_tmp" - as ( - -with __dbt__cte__unnest_alias_children_owner_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "postgres".test_normalization."unnest_alias_children" -select - _airbyte_children_hashid, - jsonb_extract_path_text("owner", 'owner_id') as owner_id, - jsonb_extract_path("owner", 'column`_''with"_quotes') as "column`_'with""_quotes", - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from "postgres".test_normalization."unnest_alias_children" as table_alias --- owner at unnest_alias/children/owner -where 1 = 1 -and "owner" is not null -), __dbt__cte__unnest_alias_children_owner_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__unnest_alias_children_owner_ab1 -select - _airbyte_children_hashid, - cast(owner_id as - bigint -) as owner_id, - "column`_'with""_quotes", - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from __dbt__cte__unnest_alias_children_owner_ab1 --- owner at unnest_alias/children/owner -where 1 = 1 -), __dbt__cte__unnest_alias_children_owner_ab3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__unnest_alias_children_owner_ab2 -select - md5(cast(coalesce(cast(_airbyte_children_hashid as text), '') || '-' || coalesce(cast(owner_id as text), '') || '-' || coalesce(cast("column`_'with""_quotes" as text), '') as text)) as _airbyte_owner_hashid, - tmp.* -from __dbt__cte__unnest_alias_children_owner_ab2 tmp --- owner at unnest_alias/children/owner -where 1 = 1 -)-- Final base SQL model --- depends_on: __dbt__cte__unnest_alias_children_owner_ab3 -select - _airbyte_children_hashid, - owner_id, - "column`_'with""_quotes", - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_owner_hashid -from __dbt__cte__unnest_alias_children_owner_ab3 --- owner at unnest_alias/children/owner from "postgres".test_normalization."unnest_alias_children" -where 1 = 1 - ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/arrays_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/arrays_ab1.sql deleted file mode 100644 index 6fbf79914b825..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/arrays_ab1.sql +++ /dev/null @@ -1,18 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ source('test_normalization', '_airbyte_raw_arrays') }} -select - {{ json_extract_string_array('_airbyte_data', ['array_of_strings'], ['array_of_strings']) }} as array_of_strings, - {{ json_extract('table_alias', '_airbyte_data', ['nested_array_parent'], ['nested_array_parent']) }} as nested_array_parent, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ source('test_normalization', '_airbyte_raw_arrays') }} as table_alias --- arrays -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/arrays_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/arrays_ab2.sql deleted file mode 100644 index 97010a6648aa3..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/arrays_ab2.sql +++ /dev/null @@ -1,18 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: {{ ref('arrays_ab1') }} -select - array_of_strings, - cast(nested_array_parent as {{ type_json() }}) as nested_array_parent, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('arrays_ab1') }} --- arrays -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/arrays_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/arrays_ab3.sql deleted file mode 100644 index c3c0afc5de7b3..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/arrays_ab3.sql +++ /dev/null @@ -1,18 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to build a hash column based on the values of this record --- depends_on: {{ ref('arrays_ab2') }} -select - {{ dbt_utils.surrogate_key([ - array_to_string('array_of_strings'), - object_to_string('nested_array_parent'), - ]) }} as _airbyte_arrays_hashid, - tmp.* -from {{ ref('arrays_ab2') }} tmp --- arrays -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/arrays_nested_array_parent_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/arrays_nested_array_parent_ab1.sql deleted file mode 100644 index 2cbe78b134dc7..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/arrays_nested_array_parent_ab1.sql +++ /dev/null @@ -1,18 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - schema = "_airbyte_test_normalization", - tags = [ "nested-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ ref('arrays') }} -select - _airbyte_arrays_hashid, - {{ json_extract_string_array('nested_array_parent', ['nested_array'], ['nested_array']) }} as nested_array, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('arrays') }} as table_alias --- nested_array_parent at arrays/nested_array_parent -where 1 = 1 -and nested_array_parent is not null - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/arrays_nested_array_parent_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/arrays_nested_array_parent_ab2.sql deleted file mode 100644 index 0a2dde68d0b85..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/arrays_nested_array_parent_ab2.sql +++ /dev/null @@ -1,17 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - schema = "_airbyte_test_normalization", - tags = [ "nested-intermediate" ] -) }} --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: {{ ref('arrays_nested_array_parent_ab1') }} -select - _airbyte_arrays_hashid, - nested_array, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('arrays_nested_array_parent_ab1') }} --- nested_array_parent at arrays/nested_array_parent -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/arrays_nested_array_parent_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/arrays_nested_array_parent_ab3.sql deleted file mode 100644 index c59efa0e9ad20..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/arrays_nested_array_parent_ab3.sql +++ /dev/null @@ -1,17 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - schema = "_airbyte_test_normalization", - tags = [ "nested-intermediate" ] -) }} --- SQL model to build a hash column based on the values of this record --- depends_on: {{ ref('arrays_nested_array_parent_ab2') }} -select - {{ dbt_utils.surrogate_key([ - '_airbyte_arrays_hashid', - array_to_string('nested_array'), - ]) }} as _airbyte_nested_array_parent_hashid, - tmp.* -from {{ ref('arrays_nested_array_parent_ab2') }} tmp --- nested_array_parent at arrays/nested_array_parent -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_array_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_array_ab1.sql deleted file mode 100644 index 611e84ed967b1..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_array_ab1.sql +++ /dev/null @@ -1,18 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ source('test_normalization', '_airbyte_raw_conflict_stream_array') }} -select - {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as {{ adapter.quote('id') }}, - {{ json_extract_array('_airbyte_data', ['conflict_stream_array'], ['conflict_stream_array']) }} as conflict_stream_array, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ source('test_normalization', '_airbyte_raw_conflict_stream_array') }} as table_alias --- conflict_stream_array -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_array_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_array_ab2.sql deleted file mode 100644 index 2193fab3931cd..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_array_ab2.sql +++ /dev/null @@ -1,18 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: {{ ref('conflict_stream_array_ab1') }} -select - cast({{ adapter.quote('id') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('id') }}, - conflict_stream_array, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('conflict_stream_array_ab1') }} --- conflict_stream_array -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_array_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_array_ab3.sql deleted file mode 100644 index e70e5cf665517..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_array_ab3.sql +++ /dev/null @@ -1,18 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to build a hash column based on the values of this record --- depends_on: {{ ref('conflict_stream_array_ab2') }} -select - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - array_to_string('conflict_stream_array'), - ]) }} as _airbyte_conflict_stream_array_hashid, - tmp.* -from {{ ref('conflict_stream_array_ab2') }} tmp --- conflict_stream_array -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name___conflict_stream_name_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name___conflict_stream_name_ab1.sql deleted file mode 100644 index 87c51e6de1793..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name___conflict_stream_name_ab1.sql +++ /dev/null @@ -1,18 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - schema = "_airbyte_test_normalization", - tags = [ "nested-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ ref('conflict_stream_name_conflict_stream_name') }} -select - _airbyte_conflict_stream_name_2_hashid, - {{ json_extract_scalar('conflict_stream_name', ['groups'], ['groups']) }} as groups, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('conflict_stream_name_conflict_stream_name') }} as table_alias --- conflict_stream_name at conflict_stream_name/conflict_stream_name/conflict_stream_name -where 1 = 1 -and conflict_stream_name is not null - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name___conflict_stream_name_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name___conflict_stream_name_ab2.sql deleted file mode 100644 index 06ff95b10ff85..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name___conflict_stream_name_ab2.sql +++ /dev/null @@ -1,17 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - schema = "_airbyte_test_normalization", - tags = [ "nested-intermediate" ] -) }} --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: {{ ref('conflict_stream_name___conflict_stream_name_ab1') }} -select - _airbyte_conflict_stream_name_2_hashid, - cast(groups as {{ dbt_utils.type_string() }}) as groups, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('conflict_stream_name___conflict_stream_name_ab1') }} --- conflict_stream_name at conflict_stream_name/conflict_stream_name/conflict_stream_name -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name___conflict_stream_name_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name___conflict_stream_name_ab3.sql deleted file mode 100644 index 09e0262357c90..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name___conflict_stream_name_ab3.sql +++ /dev/null @@ -1,17 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - schema = "_airbyte_test_normalization", - tags = [ "nested-intermediate" ] -) }} --- SQL model to build a hash column based on the values of this record --- depends_on: {{ ref('conflict_stream_name___conflict_stream_name_ab2') }} -select - {{ dbt_utils.surrogate_key([ - '_airbyte_conflict_stream_name_2_hashid', - 'groups', - ]) }} as _airbyte_conflict_stream_name_3_hashid, - tmp.* -from {{ ref('conflict_stream_name___conflict_stream_name_ab2') }} tmp --- conflict_stream_name at conflict_stream_name/conflict_stream_name/conflict_stream_name -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name_ab1.sql deleted file mode 100644 index 158c5358a3559..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name_ab1.sql +++ /dev/null @@ -1,18 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ source('test_normalization', '_airbyte_raw_conflict_stream_name') }} -select - {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as {{ adapter.quote('id') }}, - {{ json_extract('table_alias', '_airbyte_data', ['conflict_stream_name'], ['conflict_stream_name']) }} as conflict_stream_name, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ source('test_normalization', '_airbyte_raw_conflict_stream_name') }} as table_alias --- conflict_stream_name -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name_ab2.sql deleted file mode 100644 index c2d58329204cf..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name_ab2.sql +++ /dev/null @@ -1,18 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: {{ ref('conflict_stream_name_ab1') }} -select - cast({{ adapter.quote('id') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('id') }}, - cast(conflict_stream_name as {{ type_json() }}) as conflict_stream_name, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('conflict_stream_name_ab1') }} --- conflict_stream_name -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name_ab3.sql deleted file mode 100644 index 78f7cfe9bea5e..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name_ab3.sql +++ /dev/null @@ -1,18 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to build a hash column based on the values of this record --- depends_on: {{ ref('conflict_stream_name_ab2') }} -select - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - object_to_string('conflict_stream_name'), - ]) }} as _airbyte_conflict_stream_name_hashid, - tmp.* -from {{ ref('conflict_stream_name_ab2') }} tmp --- conflict_stream_name -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name_conflict_stream_name_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name_conflict_stream_name_ab1.sql deleted file mode 100644 index fcee51f386031..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name_conflict_stream_name_ab1.sql +++ /dev/null @@ -1,18 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - schema = "_airbyte_test_normalization", - tags = [ "nested-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ ref('conflict_stream_name') }} -select - _airbyte_conflict_stream_name_hashid, - {{ json_extract('table_alias', 'conflict_stream_name', ['conflict_stream_name'], ['conflict_stream_name']) }} as conflict_stream_name, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('conflict_stream_name') }} as table_alias --- conflict_stream_name at conflict_stream_name/conflict_stream_name -where 1 = 1 -and conflict_stream_name is not null - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name_conflict_stream_name_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name_conflict_stream_name_ab2.sql deleted file mode 100644 index e097773611da6..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name_conflict_stream_name_ab2.sql +++ /dev/null @@ -1,17 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - schema = "_airbyte_test_normalization", - tags = [ "nested-intermediate" ] -) }} --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: {{ ref('conflict_stream_name_conflict_stream_name_ab1') }} -select - _airbyte_conflict_stream_name_hashid, - cast(conflict_stream_name as {{ type_json() }}) as conflict_stream_name, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('conflict_stream_name_conflict_stream_name_ab1') }} --- conflict_stream_name at conflict_stream_name/conflict_stream_name -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name_conflict_stream_name_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name_conflict_stream_name_ab3.sql deleted file mode 100644 index 0892d61432767..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name_conflict_stream_name_ab3.sql +++ /dev/null @@ -1,17 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - schema = "_airbyte_test_normalization", - tags = [ "nested-intermediate" ] -) }} --- SQL model to build a hash column based on the values of this record --- depends_on: {{ ref('conflict_stream_name_conflict_stream_name_ab2') }} -select - {{ dbt_utils.surrogate_key([ - '_airbyte_conflict_stream_name_hashid', - object_to_string('conflict_stream_name'), - ]) }} as _airbyte_conflict_stream_name_2_hashid, - tmp.* -from {{ ref('conflict_stream_name_conflict_stream_name_ab2') }} tmp --- conflict_stream_name at conflict_stream_name/conflict_stream_name -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_scalar_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_scalar_ab1.sql deleted file mode 100644 index 473ada08d890f..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_scalar_ab1.sql +++ /dev/null @@ -1,18 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ source('test_normalization', '_airbyte_raw_conflict_stream_scalar') }} -select - {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as {{ adapter.quote('id') }}, - {{ json_extract_scalar('_airbyte_data', ['conflict_stream_scalar'], ['conflict_stream_scalar']) }} as conflict_stream_scalar, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ source('test_normalization', '_airbyte_raw_conflict_stream_scalar') }} as table_alias --- conflict_stream_scalar -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_scalar_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_scalar_ab2.sql deleted file mode 100644 index 2f307fd526ecc..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_scalar_ab2.sql +++ /dev/null @@ -1,18 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: {{ ref('conflict_stream_scalar_ab1') }} -select - cast({{ adapter.quote('id') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('id') }}, - cast(conflict_stream_scalar as {{ dbt_utils.type_bigint() }}) as conflict_stream_scalar, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('conflict_stream_scalar_ab1') }} --- conflict_stream_scalar -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_scalar_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_scalar_ab3.sql deleted file mode 100644 index c2fa037be1c00..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_scalar_ab3.sql +++ /dev/null @@ -1,18 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to build a hash column based on the values of this record --- depends_on: {{ ref('conflict_stream_scalar_ab2') }} -select - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - 'conflict_stream_scalar', - ]) }} as _airbyte_conflict_stream_scalar_hashid, - tmp.* -from {{ ref('conflict_stream_scalar_ab2') }} tmp --- conflict_stream_scalar -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___long_names_partition_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___long_names_partition_ab1.sql deleted file mode 100644 index fafabe2d98407..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___long_names_partition_ab1.sql +++ /dev/null @@ -1,20 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - schema = "_airbyte_test_normalization", - tags = [ "nested-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ ref('nested_stream_with_c__lting_into_long_names_scd') }} -select - _airbyte_nested_stre__nto_long_names_hashid, - {{ json_extract_array(adapter.quote('partition'), ['double_array_data'], ['double_array_data']) }} as double_array_data, - {{ json_extract_array(adapter.quote('partition'), ['DATA'], ['DATA']) }} as {{ adapter.quote('DATA') }}, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('nested_stream_with_c__lting_into_long_names_scd') }} as table_alias --- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition -where 1 = 1 -and {{ adapter.quote('partition') }} is not null -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___long_names_partition_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___long_names_partition_ab2.sql deleted file mode 100644 index a622952dbeff9..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___long_names_partition_ab2.sql +++ /dev/null @@ -1,19 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - schema = "_airbyte_test_normalization", - tags = [ "nested-intermediate" ] -) }} --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: {{ ref('nested_stream_with_c___long_names_partition_ab1') }} -select - _airbyte_nested_stre__nto_long_names_hashid, - double_array_data, - {{ adapter.quote('DATA') }}, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('nested_stream_with_c___long_names_partition_ab1') }} --- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___long_names_partition_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___long_names_partition_ab3.sql deleted file mode 100644 index 3eb1b81838277..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___long_names_partition_ab3.sql +++ /dev/null @@ -1,19 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - schema = "_airbyte_test_normalization", - tags = [ "nested-intermediate" ] -) }} --- SQL model to build a hash column based on the values of this record --- depends_on: {{ ref('nested_stream_with_c___long_names_partition_ab2') }} -select - {{ dbt_utils.surrogate_key([ - '_airbyte_nested_stre__nto_long_names_hashid', - array_to_string('double_array_data'), - array_to_string(adapter.quote('DATA')), - ]) }} as _airbyte_partition_hashid, - tmp.* -from {{ ref('nested_stream_with_c___long_names_partition_ab2') }} tmp --- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___names_partition_data_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___names_partition_data_ab1.sql deleted file mode 100644 index 0aab8469aefd2..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___names_partition_data_ab1.sql +++ /dev/null @@ -1,21 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - schema = "_airbyte_test_normalization", - tags = [ "nested-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ ref('nested_stream_with_c___long_names_partition') }} -{{ unnest_cte(ref('nested_stream_with_c___long_names_partition'), 'partition', adapter.quote('DATA')) }} -select - _airbyte_partition_hashid, - {{ json_extract_scalar(unnested_column_value(adapter.quote('DATA')), ['currency'], ['currency']) }} as currency, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('nested_stream_with_c___long_names_partition') }} as table_alias --- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA -{{ cross_join_unnest('partition', adapter.quote('DATA')) }} -where 1 = 1 -and {{ adapter.quote('DATA') }} is not null -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___names_partition_data_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___names_partition_data_ab2.sql deleted file mode 100644 index f6cb35f7d406b..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___names_partition_data_ab2.sql +++ /dev/null @@ -1,18 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - schema = "_airbyte_test_normalization", - tags = [ "nested-intermediate" ] -) }} --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: {{ ref('nested_stream_with_c___names_partition_data_ab1') }} -select - _airbyte_partition_hashid, - cast(currency as {{ dbt_utils.type_string() }}) as currency, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('nested_stream_with_c___names_partition_data_ab1') }} --- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___names_partition_data_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___names_partition_data_ab3.sql deleted file mode 100644 index f06e21a1432e6..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___names_partition_data_ab3.sql +++ /dev/null @@ -1,18 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - schema = "_airbyte_test_normalization", - tags = [ "nested-intermediate" ] -) }} --- SQL model to build a hash column based on the values of this record --- depends_on: {{ ref('nested_stream_with_c___names_partition_data_ab2') }} -select - {{ dbt_utils.surrogate_key([ - '_airbyte_partition_hashid', - 'currency', - ]) }} as _airbyte_data_hashid, - tmp.* -from {{ ref('nested_stream_with_c___names_partition_data_ab2') }} tmp --- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c__ion_double_array_data_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c__ion_double_array_data_ab1.sql deleted file mode 100644 index 5f674cdcd1a69..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c__ion_double_array_data_ab1.sql +++ /dev/null @@ -1,21 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - schema = "_airbyte_test_normalization", - tags = [ "nested-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ ref('nested_stream_with_c___long_names_partition') }} -{{ unnest_cte(ref('nested_stream_with_c___long_names_partition'), 'partition', 'double_array_data') }} -select - _airbyte_partition_hashid, - {{ json_extract_scalar(unnested_column_value('double_array_data'), ['id'], ['id']) }} as {{ adapter.quote('id') }}, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('nested_stream_with_c___long_names_partition') }} as table_alias --- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data -{{ cross_join_unnest('partition', 'double_array_data') }} -where 1 = 1 -and double_array_data is not null -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c__ion_double_array_data_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c__ion_double_array_data_ab2.sql deleted file mode 100644 index 6d785589955da..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c__ion_double_array_data_ab2.sql +++ /dev/null @@ -1,18 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - schema = "_airbyte_test_normalization", - tags = [ "nested-intermediate" ] -) }} --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: {{ ref('nested_stream_with_c__ion_double_array_data_ab1') }} -select - _airbyte_partition_hashid, - cast({{ adapter.quote('id') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('id') }}, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('nested_stream_with_c__ion_double_array_data_ab1') }} --- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c__ion_double_array_data_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c__ion_double_array_data_ab3.sql deleted file mode 100644 index c83657e465f6f..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c__ion_double_array_data_ab3.sql +++ /dev/null @@ -1,18 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - schema = "_airbyte_test_normalization", - tags = [ "nested-intermediate" ] -) }} --- SQL model to build a hash column based on the values of this record --- depends_on: {{ ref('nested_stream_with_c__ion_double_array_data_ab2') }} -select - {{ dbt_utils.surrogate_key([ - '_airbyte_partition_hashid', - adapter.quote('id'), - ]) }} as _airbyte_double_array_data_hashid, - tmp.* -from {{ ref('nested_stream_with_c__ion_double_array_data_ab2') }} tmp --- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c__lting_into_long_names_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c__lting_into_long_names_ab1.sql deleted file mode 100644 index 767a1071f1745..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c__lting_into_long_names_ab1.sql +++ /dev/null @@ -1,20 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} -select - {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as {{ adapter.quote('id') }}, - {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as {{ adapter.quote('date') }}, - {{ json_extract('table_alias', '_airbyte_data', ['partition'], ['partition']) }} as {{ adapter.quote('partition') }}, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} as table_alias --- nested_stream_with_c__lting_into_long_names -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c__lting_into_long_names_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c__lting_into_long_names_ab2.sql deleted file mode 100644 index 6739cf914f383..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c__lting_into_long_names_ab2.sql +++ /dev/null @@ -1,20 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: {{ ref('nested_stream_with_c__lting_into_long_names_ab1') }} -select - cast({{ adapter.quote('id') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('id') }}, - cast({{ adapter.quote('date') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('date') }}, - cast({{ adapter.quote('partition') }} as {{ type_json() }}) as {{ adapter.quote('partition') }}, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('nested_stream_with_c__lting_into_long_names_ab1') }} --- nested_stream_with_c__lting_into_long_names -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/non_nested_stream_wi__lting_into_long_names_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/non_nested_stream_wi__lting_into_long_names_ab1.sql deleted file mode 100644 index dfbf901b64ab4..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/non_nested_stream_wi__lting_into_long_names_ab1.sql +++ /dev/null @@ -1,18 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ source('test_normalization', '_airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names') }} -select - {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as {{ adapter.quote('id') }}, - {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as {{ adapter.quote('date') }}, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ source('test_normalization', '_airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names') }} as table_alias --- non_nested_stream_wi__lting_into_long_names -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/non_nested_stream_wi__lting_into_long_names_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/non_nested_stream_wi__lting_into_long_names_ab2.sql deleted file mode 100644 index 3488676ec99f7..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/non_nested_stream_wi__lting_into_long_names_ab2.sql +++ /dev/null @@ -1,18 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: {{ ref('non_nested_stream_wi__lting_into_long_names_ab1') }} -select - cast({{ adapter.quote('id') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('id') }}, - cast({{ adapter.quote('date') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('date') }}, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('non_nested_stream_wi__lting_into_long_names_ab1') }} --- non_nested_stream_wi__lting_into_long_names -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/non_nested_stream_wi__lting_into_long_names_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/non_nested_stream_wi__lting_into_long_names_ab3.sql deleted file mode 100644 index a673655e03ff7..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/non_nested_stream_wi__lting_into_long_names_ab3.sql +++ /dev/null @@ -1,18 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to build a hash column based on the values of this record --- depends_on: {{ ref('non_nested_stream_wi__lting_into_long_names_ab2') }} -select - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - adapter.quote('date'), - ]) }} as _airbyte_non_nested___nto_long_names_hashid, - tmp.* -from {{ ref('non_nested_stream_wi__lting_into_long_names_ab2') }} tmp --- non_nested_stream_wi__lting_into_long_names -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/some_stream_that_was_empty_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/some_stream_that_was_empty_ab1.sql deleted file mode 100644 index 6862a6ac2688c..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/some_stream_that_was_empty_ab1.sql +++ /dev/null @@ -1,19 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ source('test_normalization', '_airbyte_raw_some_stream_that_was_empty') }} -select - {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as {{ adapter.quote('id') }}, - {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as {{ adapter.quote('date') }}, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ source('test_normalization', '_airbyte_raw_some_stream_that_was_empty') }} as table_alias --- some_stream_that_was_empty -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/some_stream_that_was_empty_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/some_stream_that_was_empty_ab2.sql deleted file mode 100644 index 258f8b697b564..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/some_stream_that_was_empty_ab2.sql +++ /dev/null @@ -1,19 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: {{ ref('some_stream_that_was_empty_ab1') }} -select - cast({{ adapter.quote('id') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('id') }}, - cast({{ adapter.quote('date') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('date') }}, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('some_stream_that_was_empty_ab1') }} --- some_stream_that_was_empty -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_ab1.sql deleted file mode 100644 index 60085cd403242..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_ab1.sql +++ /dev/null @@ -1,18 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ source('test_normalization', '_airbyte_raw_unnest_alias') }} -select - {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as {{ adapter.quote('id') }}, - {{ json_extract_array('_airbyte_data', ['children'], ['children']) }} as children, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ source('test_normalization', '_airbyte_raw_unnest_alias') }} as table_alias --- unnest_alias -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_ab2.sql deleted file mode 100644 index 3bffe697fa097..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_ab2.sql +++ /dev/null @@ -1,18 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: {{ ref('unnest_alias_ab1') }} -select - cast({{ adapter.quote('id') }} as {{ dbt_utils.type_bigint() }}) as {{ adapter.quote('id') }}, - children, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('unnest_alias_ab1') }} --- unnest_alias -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_ab3.sql deleted file mode 100644 index 36d29cbc26e67..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_ab3.sql +++ /dev/null @@ -1,18 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to build a hash column based on the values of this record --- depends_on: {{ ref('unnest_alias_ab2') }} -select - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - array_to_string('children'), - ]) }} as _airbyte_unnest_alias_hashid, - tmp.* -from {{ ref('unnest_alias_ab2') }} tmp --- unnest_alias -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_childre__column___with__quotes_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_childre__column___with__quotes_ab1.sql deleted file mode 100644 index 505c4699fcc39..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_childre__column___with__quotes_ab1.sql +++ /dev/null @@ -1,20 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - schema = "_airbyte_test_normalization", - tags = [ "nested-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ ref('unnest_alias_children_owner') }} -{{ unnest_cte(ref('unnest_alias_children_owner'), 'owner', adapter.quote('column`_\'with""_quotes')) }} -select - _airbyte_owner_hashid, - {{ json_extract_scalar(unnested_column_value(adapter.quote('column`_\'with""_quotes')), ['currency'], ['currency']) }} as currency, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('unnest_alias_children_owner') }} as table_alias --- column___with__quotes at unnest_alias/children/owner/column`_'with"_quotes -{{ cross_join_unnest('owner', adapter.quote('column`_\'with""_quotes')) }} -where 1 = 1 -and {{ adapter.quote('column`_\'with""_quotes') }} is not null - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_childre__column___with__quotes_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_childre__column___with__quotes_ab2.sql deleted file mode 100644 index fe150b0ef18cd..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_childre__column___with__quotes_ab2.sql +++ /dev/null @@ -1,17 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - schema = "_airbyte_test_normalization", - tags = [ "nested-intermediate" ] -) }} --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: {{ ref('unnest_alias_childre__column___with__quotes_ab1') }} -select - _airbyte_owner_hashid, - cast(currency as {{ dbt_utils.type_string() }}) as currency, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('unnest_alias_childre__column___with__quotes_ab1') }} --- column___with__quotes at unnest_alias/children/owner/column`_'with"_quotes -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_childre__column___with__quotes_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_childre__column___with__quotes_ab3.sql deleted file mode 100644 index 86b03f9708a90..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_childre__column___with__quotes_ab3.sql +++ /dev/null @@ -1,17 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - schema = "_airbyte_test_normalization", - tags = [ "nested-intermediate" ] -) }} --- SQL model to build a hash column based on the values of this record --- depends_on: {{ ref('unnest_alias_childre__column___with__quotes_ab2') }} -select - {{ dbt_utils.surrogate_key([ - '_airbyte_owner_hashid', - 'currency', - ]) }} as _airbyte_column___with__quotes_hashid, - tmp.* -from {{ ref('unnest_alias_childre__column___with__quotes_ab2') }} tmp --- column___with__quotes at unnest_alias/children/owner/column`_'with"_quotes -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_children_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_children_ab1.sql deleted file mode 100644 index 6ac97b369163b..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_children_ab1.sql +++ /dev/null @@ -1,21 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - schema = "_airbyte_test_normalization", - tags = [ "nested-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ ref('unnest_alias') }} -{{ unnest_cte(ref('unnest_alias'), 'unnest_alias', 'children') }} -select - _airbyte_unnest_alias_hashid, - {{ json_extract_scalar(unnested_column_value('children'), ['ab_id'], ['ab_id']) }} as ab_id, - {{ json_extract('', unnested_column_value('children'), ['owner'], ['owner']) }} as {{ adapter.quote('owner') }}, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('unnest_alias') }} as table_alias --- children at unnest_alias/children -{{ cross_join_unnest('unnest_alias', 'children') }} -where 1 = 1 -and children is not null - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_children_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_children_ab2.sql deleted file mode 100644 index aa7bd0d46c1de..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_children_ab2.sql +++ /dev/null @@ -1,18 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - schema = "_airbyte_test_normalization", - tags = [ "nested-intermediate" ] -) }} --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: {{ ref('unnest_alias_children_ab1') }} -select - _airbyte_unnest_alias_hashid, - cast(ab_id as {{ dbt_utils.type_bigint() }}) as ab_id, - cast({{ adapter.quote('owner') }} as {{ type_json() }}) as {{ adapter.quote('owner') }}, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('unnest_alias_children_ab1') }} --- children at unnest_alias/children -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_children_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_children_ab3.sql deleted file mode 100644 index e5a3aa0268c54..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_children_ab3.sql +++ /dev/null @@ -1,18 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - schema = "_airbyte_test_normalization", - tags = [ "nested-intermediate" ] -) }} --- SQL model to build a hash column based on the values of this record --- depends_on: {{ ref('unnest_alias_children_ab2') }} -select - {{ dbt_utils.surrogate_key([ - '_airbyte_unnest_alias_hashid', - 'ab_id', - object_to_string(adapter.quote('owner')), - ]) }} as _airbyte_children_hashid, - tmp.* -from {{ ref('unnest_alias_children_ab2') }} tmp --- children at unnest_alias/children -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_children_owner_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_children_owner_ab1.sql deleted file mode 100644 index 1fe7e748b55dc..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_children_owner_ab1.sql +++ /dev/null @@ -1,19 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - schema = "_airbyte_test_normalization", - tags = [ "nested-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ ref('unnest_alias_children') }} -select - _airbyte_children_hashid, - {{ json_extract_scalar(adapter.quote('owner'), ['owner_id'], ['owner_id']) }} as owner_id, - {{ json_extract_array(adapter.quote('owner'), ['column`_\'with"_quotes'], ['column___with__quotes']) }} as {{ adapter.quote('column`_\'with""_quotes') }}, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('unnest_alias_children') }} as table_alias --- owner at unnest_alias/children/owner -where 1 = 1 -and {{ adapter.quote('owner') }} is not null - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_children_owner_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_children_owner_ab2.sql deleted file mode 100644 index d6a8942fa8c59..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_children_owner_ab2.sql +++ /dev/null @@ -1,18 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - schema = "_airbyte_test_normalization", - tags = [ "nested-intermediate" ] -) }} --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: {{ ref('unnest_alias_children_owner_ab1') }} -select - _airbyte_children_hashid, - cast(owner_id as {{ dbt_utils.type_bigint() }}) as owner_id, - {{ adapter.quote('column`_\'with""_quotes') }}, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('unnest_alias_children_owner_ab1') }} --- owner at unnest_alias/children/owner -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_children_owner_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_children_owner_ab3.sql deleted file mode 100644 index 46eeb0375687b..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_children_owner_ab3.sql +++ /dev/null @@ -1,18 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - schema = "_airbyte_test_normalization", - tags = [ "nested-intermediate" ] -) }} --- SQL model to build a hash column based on the values of this record --- depends_on: {{ ref('unnest_alias_children_owner_ab2') }} -select - {{ dbt_utils.surrogate_key([ - '_airbyte_children_hashid', - 'owner_id', - array_to_string(adapter.quote('column`_\'with""_quotes')), - ]) }} as _airbyte_owner_hashid, - tmp.* -from {{ ref('unnest_alias_children_owner_ab2') }} tmp --- owner at unnest_alias/children/owner -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization_namespace/simple_stream_with_n__lting_into_long_names_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization_namespace/simple_stream_with_n__lting_into_long_names_ab1.sql deleted file mode 100644 index b732876827659..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization_namespace/simple_stream_with_n__lting_into_long_names_ab1.sql +++ /dev/null @@ -1,19 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization_namespace", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ source('test_normalization_namespace', '_airbyte_raw_simple_stream_with_namespace_resulting_into_long_names') }} -select - {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as {{ adapter.quote('id') }}, - {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as {{ adapter.quote('date') }}, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ source('test_normalization_namespace', '_airbyte_raw_simple_stream_with_namespace_resulting_into_long_names') }} as table_alias --- simple_stream_with_n__lting_into_long_names -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization_namespace/simple_stream_with_n__lting_into_long_names_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization_namespace/simple_stream_with_n__lting_into_long_names_ab2.sql deleted file mode 100644 index a2f35bfcefb1c..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization_namespace/simple_stream_with_n__lting_into_long_names_ab2.sql +++ /dev/null @@ -1,19 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization_namespace", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: {{ ref('simple_stream_with_n__lting_into_long_names_ab1') }} -select - cast({{ adapter.quote('id') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('id') }}, - cast({{ adapter.quote('date') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('date') }}, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('simple_stream_with_n__lting_into_long_names_ab1') }} --- simple_stream_with_n__lting_into_long_names -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization_namespace/simple_stream_with_n__lting_into_long_names_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization_namespace/simple_stream_with_n__lting_into_long_names_ab3.sql deleted file mode 100644 index 231ba585f7024..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization_namespace/simple_stream_with_n__lting_into_long_names_ab3.sql +++ /dev/null @@ -1,19 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization_namespace", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to build a hash column based on the values of this record --- depends_on: {{ ref('simple_stream_with_n__lting_into_long_names_ab2') }} -select - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - adapter.quote('date'), - ]) }} as _airbyte_simple_stre__nto_long_names_hashid, - tmp.* -from {{ ref('simple_stream_with_n__lting_into_long_names_ab2') }} tmp --- simple_stream_with_n__lting_into_long_names -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql deleted file mode 100644 index 5eaf6186aaab4..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql +++ /dev/null @@ -1,163 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_active_row','_airbyte_unique_key_scd','_airbyte_emitted_at'],'type': 'btree'}], - unique_key = "_airbyte_unique_key_scd", - schema = "test_normalization", - post_hook = [" - {% - set final_table_relation = adapter.get_relation( - database=this.database, - schema=this.schema, - identifier='nested_stream_with_c__lting_into_long_names' - ) - %} - {# - If the final table doesn't exist, then obviously we can't delete anything from it. - Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) - So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) - #} - {% - if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') - %} - -- Delete records which are no longer active: - -- This query is equivalent, but the left join version is more performant: - -- delete from final_table where unique_key in ( - -- select unique_key from scd_table where 1 = 1 - -- ) and unique_key not in ( - -- select unique_key from scd_table where active_row = 1 - -- ) - -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD - -- entries that were _updated_ recently. This is because a deleted record will have an SCD record - -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. - delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select recent_records.unique_key - from ( - select distinct _airbyte_unique_key as unique_key - from {{ this }} - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_c__lting_into_long_names')) }} - ) recent_records - left join ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_c__lting_into_long_names')) }} - group by _airbyte_unique_key - ) active_counts - on recent_records.unique_key = active_counts.unique_key - where active_count is null or active_count = 0 - ) - {% else %} - -- We have to have a non-empty query, so just do a noop delete - delete from {{ this }} where 1=0 - {% endif %} - ","delete from _airbyte_test_normalization.nested_stream_with_c__lting_into_long_names_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.nested_stream_with_c__lting_into_long_names_stg)"], - tags = [ "top-level" ] -) }} --- depends_on: ref('nested_stream_with_c__lting_into_long_names_stg') -with -{% if is_incremental() %} -new_data as ( - -- retrieve incremental "new" data - select - * - from {{ ref('nested_stream_with_c__lting_into_long_names_stg') }} - -- nested_stream_with_c__lting_into_long_names from {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} - where 1 = 1 - {{ incremental_clause('_airbyte_emitted_at', this) }} -), -new_data_ids as ( - -- build a subset of _airbyte_unique_key from rows that are new - select distinct - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - ]) }} as _airbyte_unique_key - from new_data -), -empty_new_data as ( - -- build an empty table to only keep the table's column types - select * from new_data where 1 = 0 -), -previous_active_scd_data as ( - -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes - select - {{ star_intersect(ref('nested_stream_with_c__lting_into_long_names_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} - from {{ this }} as this_data - -- make a join with new_data using primary key to filter active data that need to be updated only - join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key - -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) - left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id - where _airbyte_active_row = 1 -), -input_data as ( - select {{ dbt_utils.star(ref('nested_stream_with_c__lting_into_long_names_stg')) }} from new_data - union all - select {{ dbt_utils.star(ref('nested_stream_with_c__lting_into_long_names_stg')) }} from previous_active_scd_data -), -{% else %} -input_data as ( - select * - from {{ ref('nested_stream_with_c__lting_into_long_names_stg') }} - -- nested_stream_with_c__lting_into_long_names from {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} -), -{% endif %} -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - ]) }} as _airbyte_unique_key, - {{ adapter.quote('id') }}, - {{ adapter.quote('date') }}, - {{ adapter.quote('partition') }}, - {{ adapter.quote('date') }} as _airbyte_start_at, - lag({{ adapter.quote('date') }}) over ( - partition by {{ adapter.quote('id') }} - order by - {{ adapter.quote('date') }} is null asc, - {{ adapter.quote('date') }} desc, - _airbyte_emitted_at desc - ) as _airbyte_end_at, - case when row_number() over ( - partition by {{ adapter.quote('id') }} - order by - {{ adapter.quote('date') }} is null asc, - {{ adapter.quote('date') }} desc, - _airbyte_emitted_at desc - ) = 1 then 1 else 0 end as _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_nested_stre__nto_long_names_hashid - from input_data -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by - _airbyte_unique_key, - _airbyte_start_at, - _airbyte_emitted_at - order by _airbyte_active_row desc, _airbyte_ab_id - ) as _airbyte_row_num, - {{ dbt_utils.surrogate_key([ - '_airbyte_unique_key', - '_airbyte_start_at', - '_airbyte_emitted_at' - ]) }} as _airbyte_unique_key_scd, - scd_data.* - from scd_data -) -select - _airbyte_unique_key, - _airbyte_unique_key_scd, - {{ adapter.quote('id') }}, - {{ adapter.quote('date') }}, - {{ adapter.quote('partition') }}, - _airbyte_start_at, - _airbyte_end_at, - _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_nested_stre__nto_long_names_hashid -from dedup_data where _airbyte_row_num = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql deleted file mode 100644 index c35233d432cb3..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql +++ /dev/null @@ -1,161 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_active_row','_airbyte_unique_key_scd','_airbyte_emitted_at'],'type': 'btree'}], - unique_key = "_airbyte_unique_key_scd", - schema = "test_normalization", - post_hook = [" - {% - set final_table_relation = adapter.get_relation( - database=this.database, - schema=this.schema, - identifier='some_stream_that_was_empty' - ) - %} - {# - If the final table doesn't exist, then obviously we can't delete anything from it. - Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) - So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) - #} - {% - if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') - %} - -- Delete records which are no longer active: - -- This query is equivalent, but the left join version is more performant: - -- delete from final_table where unique_key in ( - -- select unique_key from scd_table where 1 = 1 - -- ) and unique_key not in ( - -- select unique_key from scd_table where active_row = 1 - -- ) - -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD - -- entries that were _updated_ recently. This is because a deleted record will have an SCD record - -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. - delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select recent_records.unique_key - from ( - select distinct _airbyte_unique_key as unique_key - from {{ this }} - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('some_stream_that_was_empty')) }} - ) recent_records - left join ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('some_stream_that_was_empty')) }} - group by _airbyte_unique_key - ) active_counts - on recent_records.unique_key = active_counts.unique_key - where active_count is null or active_count = 0 - ) - {% else %} - -- We have to have a non-empty query, so just do a noop delete - delete from {{ this }} where 1=0 - {% endif %} - ","delete from _airbyte_test_normalization.some_stream_that_was_empty_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.some_stream_that_was_empty_stg)"], - tags = [ "top-level" ] -) }} --- depends_on: ref('some_stream_that_was_empty_stg') -with -{% if is_incremental() %} -new_data as ( - -- retrieve incremental "new" data - select - * - from {{ ref('some_stream_that_was_empty_stg') }} - -- some_stream_that_was_empty from {{ source('test_normalization', '_airbyte_raw_some_stream_that_was_empty') }} - where 1 = 1 - {{ incremental_clause('_airbyte_emitted_at', this) }} -), -new_data_ids as ( - -- build a subset of _airbyte_unique_key from rows that are new - select distinct - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - ]) }} as _airbyte_unique_key - from new_data -), -empty_new_data as ( - -- build an empty table to only keep the table's column types - select * from new_data where 1 = 0 -), -previous_active_scd_data as ( - -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes - select - {{ star_intersect(ref('some_stream_that_was_empty_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} - from {{ this }} as this_data - -- make a join with new_data using primary key to filter active data that need to be updated only - join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key - -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) - left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id - where _airbyte_active_row = 1 -), -input_data as ( - select {{ dbt_utils.star(ref('some_stream_that_was_empty_stg')) }} from new_data - union all - select {{ dbt_utils.star(ref('some_stream_that_was_empty_stg')) }} from previous_active_scd_data -), -{% else %} -input_data as ( - select * - from {{ ref('some_stream_that_was_empty_stg') }} - -- some_stream_that_was_empty from {{ source('test_normalization', '_airbyte_raw_some_stream_that_was_empty') }} -), -{% endif %} -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - ]) }} as _airbyte_unique_key, - {{ adapter.quote('id') }}, - {{ adapter.quote('date') }}, - {{ adapter.quote('date') }} as _airbyte_start_at, - lag({{ adapter.quote('date') }}) over ( - partition by {{ adapter.quote('id') }} - order by - {{ adapter.quote('date') }} is null asc, - {{ adapter.quote('date') }} desc, - _airbyte_emitted_at desc - ) as _airbyte_end_at, - case when row_number() over ( - partition by {{ adapter.quote('id') }} - order by - {{ adapter.quote('date') }} is null asc, - {{ adapter.quote('date') }} desc, - _airbyte_emitted_at desc - ) = 1 then 1 else 0 end as _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_some_stream_that_was_empty_hashid - from input_data -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by - _airbyte_unique_key, - _airbyte_start_at, - _airbyte_emitted_at - order by _airbyte_active_row desc, _airbyte_ab_id - ) as _airbyte_row_num, - {{ dbt_utils.surrogate_key([ - '_airbyte_unique_key', - '_airbyte_start_at', - '_airbyte_emitted_at' - ]) }} as _airbyte_unique_key_scd, - scd_data.* - from scd_data -) -select - _airbyte_unique_key, - _airbyte_unique_key_scd, - {{ adapter.quote('id') }}, - {{ adapter.quote('date') }}, - _airbyte_start_at, - _airbyte_end_at, - _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_some_stream_that_was_empty_hashid -from dedup_data where _airbyte_row_num = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c___long_names_partition.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c___long_names_partition.sql deleted file mode 100644 index 92e9c5d4fe088..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c___long_names_partition.sql +++ /dev/null @@ -1,20 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - schema = "test_normalization", - tags = [ "nested" ] -) }} --- Final base SQL model --- depends_on: {{ ref('nested_stream_with_c___long_names_partition_ab3') }} -select - _airbyte_nested_stre__nto_long_names_hashid, - double_array_data, - {{ adapter.quote('DATA') }}, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_partition_hashid -from {{ ref('nested_stream_with_c___long_names_partition_ab3') }} --- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition from {{ ref('nested_stream_with_c__lting_into_long_names_scd') }} -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c___names_partition_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c___names_partition_data.sql deleted file mode 100644 index f453cd838e21f..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c___names_partition_data.sql +++ /dev/null @@ -1,19 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - schema = "test_normalization", - tags = [ "nested" ] -) }} --- Final base SQL model --- depends_on: {{ ref('nested_stream_with_c___names_partition_data_ab3') }} -select - _airbyte_partition_hashid, - currency, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_data_hashid -from {{ ref('nested_stream_with_c___names_partition_data_ab3') }} --- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA from {{ ref('nested_stream_with_c___long_names_partition') }} -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c__ion_double_array_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c__ion_double_array_data.sql deleted file mode 100644 index ea7bc2e780956..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c__ion_double_array_data.sql +++ /dev/null @@ -1,19 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - schema = "test_normalization", - tags = [ "nested" ] -) }} --- Final base SQL model --- depends_on: {{ ref('nested_stream_with_c__ion_double_array_data_ab3') }} -select - _airbyte_partition_hashid, - {{ adapter.quote('id') }}, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_double_array_data_hashid -from {{ ref('nested_stream_with_c__ion_double_array_data_ab3') }} --- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data from {{ ref('nested_stream_with_c___long_names_partition') }} -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names.sql deleted file mode 100644 index 26c3aded7063d..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names.sql +++ /dev/null @@ -1,23 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_unique_key'],'unique':True}], - unique_key = "_airbyte_unique_key", - schema = "test_normalization", - tags = [ "top-level" ] -) }} --- Final base SQL model --- depends_on: {{ ref('nested_stream_with_c__lting_into_long_names_scd') }} -select - _airbyte_unique_key, - {{ adapter.quote('id') }}, - {{ adapter.quote('date') }}, - {{ adapter.quote('partition') }}, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_nested_stre__nto_long_names_hashid -from {{ ref('nested_stream_with_c__lting_into_long_names_scd') }} --- nested_stream_with_c__lting_into_long_names from {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} -where 1 = 1 -and _airbyte_active_row = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names_stg.sql deleted file mode 100644 index 8249fe95741a4..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names_stg.sql +++ /dev/null @@ -1,20 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to build a hash column based on the values of this record --- depends_on: {{ ref('nested_stream_with_c__lting_into_long_names_ab2') }} -select - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - adapter.quote('date'), - object_to_string(adapter.quote('partition')), - ]) }} as _airbyte_nested_stre__nto_long_names_hashid, - tmp.* -from {{ ref('nested_stream_with_c__lting_into_long_names_ab2') }} tmp --- nested_stream_with_c__lting_into_long_names -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/some_stream_that_was_empty.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/some_stream_that_was_empty.sql deleted file mode 100644 index 23bcd85bcf91c..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/some_stream_that_was_empty.sql +++ /dev/null @@ -1,22 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_unique_key'],'unique':True}], - unique_key = "_airbyte_unique_key", - schema = "test_normalization", - tags = [ "top-level" ] -) }} --- Final base SQL model --- depends_on: {{ ref('some_stream_that_was_empty_scd') }} -select - _airbyte_unique_key, - {{ adapter.quote('id') }}, - {{ adapter.quote('date') }}, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_some_stream_that_was_empty_hashid -from {{ ref('some_stream_that_was_empty_scd') }} --- some_stream_that_was_empty from {{ source('test_normalization', '_airbyte_raw_some_stream_that_was_empty') }} -where 1 = 1 -and _airbyte_active_row = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/some_stream_that_was_empty_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/some_stream_that_was_empty_stg.sql deleted file mode 100644 index ca645527eca86..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/some_stream_that_was_empty_stg.sql +++ /dev/null @@ -1,19 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to build a hash column based on the values of this record --- depends_on: {{ ref('some_stream_that_was_empty_ab2') }} -select - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - adapter.quote('date'), - ]) }} as _airbyte_some_stream_that_was_empty_hashid, - tmp.* -from {{ ref('some_stream_that_was_empty_ab2') }} tmp --- some_stream_that_was_empty -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization_namespace/simple_stream_with_n__lting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization_namespace/simple_stream_with_n__lting_into_long_names.sql deleted file mode 100644 index 7f70fc83c6163..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization_namespace/simple_stream_with_n__lting_into_long_names.sql +++ /dev/null @@ -1,20 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "test_normalization_namespace", - tags = [ "top-level" ] -) }} --- Final base SQL model --- depends_on: {{ ref('simple_stream_with_n__lting_into_long_names_ab3') }} -select - {{ adapter.quote('id') }}, - {{ adapter.quote('date') }}, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_simple_stre__nto_long_names_hashid -from {{ ref('simple_stream_with_n__lting_into_long_names_ab3') }} --- simple_stream_with_n__lting_into_long_names from {{ source('test_normalization_namespace', '_airbyte_raw_simple_stream_with_namespace_resulting_into_long_names') }} -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/arrays.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/arrays.sql deleted file mode 100644 index 875d028168620..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/arrays.sql +++ /dev/null @@ -1,19 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "test_normalization", - tags = [ "top-level" ] -) }} --- Final base SQL model --- depends_on: {{ ref('arrays_ab3') }} -select - array_of_strings, - nested_array_parent, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_arrays_hashid -from {{ ref('arrays_ab3') }} --- arrays from {{ source('test_normalization', '_airbyte_raw_arrays') }} -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/arrays_nested_array_parent.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/arrays_nested_array_parent.sql deleted file mode 100644 index 73f13e380ac25..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/arrays_nested_array_parent.sql +++ /dev/null @@ -1,18 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - schema = "test_normalization", - tags = [ "nested" ] -) }} --- Final base SQL model --- depends_on: {{ ref('arrays_nested_array_parent_ab3') }} -select - _airbyte_arrays_hashid, - nested_array, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_nested_array_parent_hashid -from {{ ref('arrays_nested_array_parent_ab3') }} --- nested_array_parent at arrays/nested_array_parent from {{ ref('arrays') }} -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/conflict_stream_array.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/conflict_stream_array.sql deleted file mode 100644 index ede71a891dc05..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/conflict_stream_array.sql +++ /dev/null @@ -1,19 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "test_normalization", - tags = [ "top-level" ] -) }} --- Final base SQL model --- depends_on: {{ ref('conflict_stream_array_ab3') }} -select - {{ adapter.quote('id') }}, - conflict_stream_array, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_conflict_stream_array_hashid -from {{ ref('conflict_stream_array_ab3') }} --- conflict_stream_array from {{ source('test_normalization', '_airbyte_raw_conflict_stream_array') }} -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/conflict_stream_name.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/conflict_stream_name.sql deleted file mode 100644 index f203166febe17..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/conflict_stream_name.sql +++ /dev/null @@ -1,19 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "test_normalization", - tags = [ "top-level" ] -) }} --- Final base SQL model --- depends_on: {{ ref('conflict_stream_name_ab3') }} -select - {{ adapter.quote('id') }}, - conflict_stream_name, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_conflict_stream_name_hashid -from {{ ref('conflict_stream_name_ab3') }} --- conflict_stream_name from {{ source('test_normalization', '_airbyte_raw_conflict_stream_name') }} -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/conflict_stream_name___conflict_stream_name.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/conflict_stream_name___conflict_stream_name.sql deleted file mode 100644 index 2c221c2940b75..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/conflict_stream_name___conflict_stream_name.sql +++ /dev/null @@ -1,18 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - schema = "test_normalization", - tags = [ "nested" ] -) }} --- Final base SQL model --- depends_on: {{ ref('conflict_stream_name___conflict_stream_name_ab3') }} -select - _airbyte_conflict_stream_name_2_hashid, - groups, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_conflict_stream_name_3_hashid -from {{ ref('conflict_stream_name___conflict_stream_name_ab3') }} --- conflict_stream_name at conflict_stream_name/conflict_stream_name/conflict_stream_name from {{ ref('conflict_stream_name_conflict_stream_name') }} -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/conflict_stream_name_conflict_stream_name.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/conflict_stream_name_conflict_stream_name.sql deleted file mode 100644 index 195d067ffe415..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/conflict_stream_name_conflict_stream_name.sql +++ /dev/null @@ -1,18 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - schema = "test_normalization", - tags = [ "nested" ] -) }} --- Final base SQL model --- depends_on: {{ ref('conflict_stream_name_conflict_stream_name_ab3') }} -select - _airbyte_conflict_stream_name_hashid, - conflict_stream_name, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_conflict_stream_name_2_hashid -from {{ ref('conflict_stream_name_conflict_stream_name_ab3') }} --- conflict_stream_name at conflict_stream_name/conflict_stream_name from {{ ref('conflict_stream_name') }} -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/conflict_stream_scalar.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/conflict_stream_scalar.sql deleted file mode 100644 index 31f263905b533..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/conflict_stream_scalar.sql +++ /dev/null @@ -1,19 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "test_normalization", - tags = [ "top-level" ] -) }} --- Final base SQL model --- depends_on: {{ ref('conflict_stream_scalar_ab3') }} -select - {{ adapter.quote('id') }}, - conflict_stream_scalar, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_conflict_stream_scalar_hashid -from {{ ref('conflict_stream_scalar_ab3') }} --- conflict_stream_scalar from {{ source('test_normalization', '_airbyte_raw_conflict_stream_scalar') }} -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/non_nested_stream_wi__lting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/non_nested_stream_wi__lting_into_long_names.sql deleted file mode 100644 index 8b4cddcd4b179..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/non_nested_stream_wi__lting_into_long_names.sql +++ /dev/null @@ -1,19 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "test_normalization", - tags = [ "top-level" ] -) }} --- Final base SQL model --- depends_on: {{ ref('non_nested_stream_wi__lting_into_long_names_ab3') }} -select - {{ adapter.quote('id') }}, - {{ adapter.quote('date') }}, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_non_nested___nto_long_names_hashid -from {{ ref('non_nested_stream_wi__lting_into_long_names_ab3') }} --- non_nested_stream_wi__lting_into_long_names from {{ source('test_normalization', '_airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names') }} -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/unnest_alias.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/unnest_alias.sql deleted file mode 100644 index 7c113e7291b5d..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/unnest_alias.sql +++ /dev/null @@ -1,19 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "test_normalization", - tags = [ "top-level" ] -) }} --- Final base SQL model --- depends_on: {{ ref('unnest_alias_ab3') }} -select - {{ adapter.quote('id') }}, - children, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_unnest_alias_hashid -from {{ ref('unnest_alias_ab3') }} --- unnest_alias from {{ source('test_normalization', '_airbyte_raw_unnest_alias') }} -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/unnest_alias_childre__column___with__quotes.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/unnest_alias_childre__column___with__quotes.sql deleted file mode 100644 index ae4165f58160f..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/unnest_alias_childre__column___with__quotes.sql +++ /dev/null @@ -1,18 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - schema = "test_normalization", - tags = [ "nested" ] -) }} --- Final base SQL model --- depends_on: {{ ref('unnest_alias_childre__column___with__quotes_ab3') }} -select - _airbyte_owner_hashid, - currency, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_column___with__quotes_hashid -from {{ ref('unnest_alias_childre__column___with__quotes_ab3') }} --- column___with__quotes at unnest_alias/children/owner/column`_'with"_quotes from {{ ref('unnest_alias_children_owner') }} -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/unnest_alias_children.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/unnest_alias_children.sql deleted file mode 100644 index 9f98219880ec5..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/unnest_alias_children.sql +++ /dev/null @@ -1,19 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - schema = "test_normalization", - tags = [ "nested" ] -) }} --- Final base SQL model --- depends_on: {{ ref('unnest_alias_children_ab3') }} -select - _airbyte_unnest_alias_hashid, - ab_id, - {{ adapter.quote('owner') }}, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_children_hashid -from {{ ref('unnest_alias_children_ab3') }} --- children at unnest_alias/children from {{ ref('unnest_alias') }} -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/unnest_alias_children_owner.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/unnest_alias_children_owner.sql deleted file mode 100644 index 14c766c3dd59f..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/unnest_alias_children_owner.sql +++ /dev/null @@ -1,19 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - schema = "test_normalization", - tags = [ "nested" ] -) }} --- Final base SQL model --- depends_on: {{ ref('unnest_alias_children_owner_ab3') }} -select - _airbyte_children_hashid, - owner_id, - {{ adapter.quote('column`_\'with""_quotes') }}, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_owner_hashid -from {{ ref('unnest_alias_children_owner_ab3') }} --- owner at unnest_alias/children/owner from {{ ref('unnest_alias_children') }} -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/sources.yml deleted file mode 100644 index 29bae1b4b5105..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/sources.yml +++ /dev/null @@ -1,23 +0,0 @@ -version: 2 -sources: -- name: test_normalization - quoting: - database: true - schema: false - identifier: false - tables: - - name: _airbyte_raw_arrays - - name: _airbyte_raw_conflict_stream_array - - name: _airbyte_raw_conflict_stream_name - - name: _airbyte_raw_conflict_stream_scalar - - name: _airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - - name: _airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names - - name: _airbyte_raw_some_stream_that_was_empty - - name: _airbyte_raw_unnest_alias -- name: test_normalization_namespace - quoting: - database: true - schema: false - identifier: false - tables: - - name: _airbyte_raw_simple_stream_with_namespace_resulting_into_long_names diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql deleted file mode 100644 index 7026a868cc5d3..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql +++ /dev/null @@ -1,15 +0,0 @@ - - - delete from "postgres".test_normalization."nested_stream_with_c__lting_into_long_names_scd" - where (_airbyte_unique_key_scd) in ( - select (_airbyte_unique_key_scd) - from "nested_stream_with_c__lting_into_long_name__dbt_tmp" - ); - - - insert into "postgres".test_normalization."nested_stream_with_c__lting_into_long_names_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "date", "partition", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_nested_stre__nto_long_names_hashid") - ( - select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "date", "partition", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_nested_stre__nto_long_names_hashid" - from "nested_stream_with_c__lting_into_long_name__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql deleted file mode 100644 index cb4ff47eeea78..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql +++ /dev/null @@ -1,15 +0,0 @@ - - - delete from "postgres".test_normalization."some_stream_that_was_empty_scd" - where (_airbyte_unique_key_scd) in ( - select (_airbyte_unique_key_scd) - from "some_stream_that_was_empty_scd__dbt_tmp" - ); - - - insert into "postgres".test_normalization."some_stream_that_was_empty_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "date", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_some_stream_that_was_empty_hashid") - ( - select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "date", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_some_stream_that_was_empty_hashid" - from "some_stream_that_was_empty_scd__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_c___long_names_partition.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_c___long_names_partition.sql deleted file mode 100644 index 3a98824ffdd13..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_c___long_names_partition.sql +++ /dev/null @@ -1,9 +0,0 @@ - - - - insert into "postgres".test_normalization."nested_stream_with_c___long_names_partition" ("_airbyte_nested_stre__nto_long_names_hashid", "double_array_data", "DATA", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_partition_hashid") - ( - select "_airbyte_nested_stre__nto_long_names_hashid", "double_array_data", "DATA", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_partition_hashid" - from "nested_stream_with_c___long_names_partitio__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_c___names_partition_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_c___names_partition_data.sql deleted file mode 100644 index b90c9cb238e0b..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_c___names_partition_data.sql +++ /dev/null @@ -1,9 +0,0 @@ - - - - insert into "postgres".test_normalization."nested_stream_with_c___names_partition_data" ("_airbyte_partition_hashid", "currency", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_data_hashid") - ( - select "_airbyte_partition_hashid", "currency", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_data_hashid" - from "nested_stream_with_c___names_partition_dat__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_c__ion_double_array_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_c__ion_double_array_data.sql deleted file mode 100644 index 98dfb2ba788b4..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_c__ion_double_array_data.sql +++ /dev/null @@ -1,9 +0,0 @@ - - - - insert into "postgres".test_normalization."nested_stream_with_c__ion_double_array_data" ("_airbyte_partition_hashid", "id", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_double_array_data_hashid") - ( - select "_airbyte_partition_hashid", "id", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_double_array_data_hashid" - from "nested_stream_with_c__ion_double_array_dat__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names.sql deleted file mode 100644 index bf109e096b702..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names.sql +++ /dev/null @@ -1,15 +0,0 @@ - - - delete from "postgres".test_normalization."nested_stream_with_c__lting_into_long_names" - where (_airbyte_unique_key) in ( - select (_airbyte_unique_key) - from "nested_stream_with_c__lting_into_long_name__dbt_tmp" - ); - - - insert into "postgres".test_normalization."nested_stream_with_c__lting_into_long_names" ("_airbyte_unique_key", "id", "date", "partition", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_nested_stre__nto_long_names_hashid") - ( - select "_airbyte_unique_key", "id", "date", "partition", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_nested_stre__nto_long_names_hashid" - from "nested_stream_with_c__lting_into_long_name__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names_stg.sql deleted file mode 100644 index 275461f97657f..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names_stg.sql +++ /dev/null @@ -1,15 +0,0 @@ - - - delete from "postgres"._airbyte_test_normalization."nested_stream_with_c__lting_into_long_names_stg" - where (_airbyte_ab_id) in ( - select (_airbyte_ab_id) - from "nested_stream_with_c__lting_into_long_name__dbt_tmp" - ); - - - insert into "postgres"._airbyte_test_normalization."nested_stream_with_c__lting_into_long_names_stg" ("_airbyte_nested_stre__nto_long_names_hashid", "id", "date", "partition", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at") - ( - select "_airbyte_nested_stre__nto_long_names_hashid", "id", "date", "partition", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at" - from "nested_stream_with_c__lting_into_long_name__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/some_stream_that_was_empty.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/some_stream_that_was_empty.sql deleted file mode 100644 index 97759325fe3df..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/some_stream_that_was_empty.sql +++ /dev/null @@ -1,15 +0,0 @@ - - - delete from "postgres".test_normalization."some_stream_that_was_empty" - where (_airbyte_unique_key) in ( - select (_airbyte_unique_key) - from "some_stream_that_was_empty__dbt_tmp" - ); - - - insert into "postgres".test_normalization."some_stream_that_was_empty" ("_airbyte_unique_key", "id", "date", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_some_stream_that_was_empty_hashid") - ( - select "_airbyte_unique_key", "id", "date", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_some_stream_that_was_empty_hashid" - from "some_stream_that_was_empty__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/some_stream_that_was_empty_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/some_stream_that_was_empty_stg.sql deleted file mode 100644 index a0aa7cb30dd64..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/some_stream_that_was_empty_stg.sql +++ /dev/null @@ -1,15 +0,0 @@ - - - delete from "postgres"._airbyte_test_normalization."some_stream_that_was_empty_stg" - where (_airbyte_ab_id) in ( - select (_airbyte_ab_id) - from "some_stream_that_was_empty_stg__dbt_tmp" - ); - - - insert into "postgres"._airbyte_test_normalization."some_stream_that_was_empty_stg" ("_airbyte_some_stream_that_was_empty_hashid", "id", "date", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at") - ( - select "_airbyte_some_stream_that_was_empty_hashid", "id", "date", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at" - from "some_stream_that_was_empty_stg__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization_namespace/simple_stream_with_n__lting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization_namespace/simple_stream_with_n__lting_into_long_names.sql deleted file mode 100644 index b3397712e600e..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization_namespace/simple_stream_with_n__lting_into_long_names.sql +++ /dev/null @@ -1,15 +0,0 @@ - - - delete from "postgres".test_normalization_namespace."simple_stream_with_n__lting_into_long_names" - where (_airbyte_ab_id) in ( - select (_airbyte_ab_id) - from "simple_stream_with_n__lting_into_long_name__dbt_tmp" - ); - - - insert into "postgres".test_normalization_namespace."simple_stream_with_n__lting_into_long_names" ("id", "date", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_simple_stre__nto_long_names_hashid") - ( - select "id", "date", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_simple_stre__nto_long_names_hashid" - from "simple_stream_with_n__lting_into_long_name__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/arrays.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/arrays.sql deleted file mode 100644 index e10c4619e53a4..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/arrays.sql +++ /dev/null @@ -1,58 +0,0 @@ - - - create table "postgres".test_normalization."arrays__dbt_tmp" - as ( - -with __dbt__cte__arrays_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "postgres".test_normalization._airbyte_raw_arrays -select - jsonb_extract_path(_airbyte_data, 'array_of_strings') as array_of_strings, - - jsonb_extract_path(table_alias._airbyte_data, 'nested_array_parent') - as nested_array_parent, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from "postgres".test_normalization._airbyte_raw_arrays as table_alias --- arrays -where 1 = 1 -), __dbt__cte__arrays_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__arrays_ab1 -select - array_of_strings, - cast(nested_array_parent as - jsonb -) as nested_array_parent, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from __dbt__cte__arrays_ab1 --- arrays -where 1 = 1 -), __dbt__cte__arrays_ab3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__arrays_ab2 -select - md5(cast(coalesce(cast(array_of_strings as text), '') || '-' || coalesce(cast(nested_array_parent as text), '') as text)) as _airbyte_arrays_hashid, - tmp.* -from __dbt__cte__arrays_ab2 tmp --- arrays -where 1 = 1 -)-- Final base SQL model --- depends_on: __dbt__cte__arrays_ab3 -select - array_of_strings, - nested_array_parent, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_arrays_hashid -from __dbt__cte__arrays_ab3 --- arrays from "postgres".test_normalization._airbyte_raw_arrays -where 1 = 1 - ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/arrays_nested_array_parent.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/arrays_nested_array_parent.sql deleted file mode 100644 index 09ad8fe3cd3f9..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/arrays_nested_array_parent.sql +++ /dev/null @@ -1,55 +0,0 @@ - - - create table "postgres".test_normalization."arrays_nested_array_parent__dbt_tmp" - as ( - -with __dbt__cte__arrays_nested_array_parent_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "postgres".test_normalization."arrays" -select - _airbyte_arrays_hashid, - jsonb_extract_path(nested_array_parent, 'nested_array') as nested_array, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from "postgres".test_normalization."arrays" as table_alias --- nested_array_parent at arrays/nested_array_parent -where 1 = 1 -and nested_array_parent is not null -), __dbt__cte__arrays_nested_array_parent_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__arrays_nested_array_parent_ab1 -select - _airbyte_arrays_hashid, - nested_array, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from __dbt__cte__arrays_nested_array_parent_ab1 --- nested_array_parent at arrays/nested_array_parent -where 1 = 1 -), __dbt__cte__arrays_nested_array_parent_ab3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__arrays_nested_array_parent_ab2 -select - md5(cast(coalesce(cast(_airbyte_arrays_hashid as text), '') || '-' || coalesce(cast(nested_array as text), '') as text)) as _airbyte_nested_array_parent_hashid, - tmp.* -from __dbt__cte__arrays_nested_array_parent_ab2 tmp --- nested_array_parent at arrays/nested_array_parent -where 1 = 1 -)-- Final base SQL model --- depends_on: __dbt__cte__arrays_nested_array_parent_ab3 -select - _airbyte_arrays_hashid, - nested_array, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_nested_array_parent_hashid -from __dbt__cte__arrays_nested_array_parent_ab3 --- nested_array_parent at arrays/nested_array_parent from "postgres".test_normalization."arrays" -where 1 = 1 - ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/conflict_stream_array.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/conflict_stream_array.sql deleted file mode 100644 index c1c6ab12a7b7c..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/conflict_stream_array.sql +++ /dev/null @@ -1,54 +0,0 @@ - - - create table "postgres".test_normalization."conflict_stream_array__dbt_tmp" - as ( - -with __dbt__cte__conflict_stream_array_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "postgres".test_normalization._airbyte_raw_conflict_stream_array -select - jsonb_extract_path_text(_airbyte_data, 'id') as "id", - jsonb_extract_path(_airbyte_data, 'conflict_stream_array') as conflict_stream_array, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from "postgres".test_normalization._airbyte_raw_conflict_stream_array as table_alias --- conflict_stream_array -where 1 = 1 -), __dbt__cte__conflict_stream_array_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__conflict_stream_array_ab1 -select - cast("id" as text) as "id", - conflict_stream_array, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from __dbt__cte__conflict_stream_array_ab1 --- conflict_stream_array -where 1 = 1 -), __dbt__cte__conflict_stream_array_ab3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__conflict_stream_array_ab2 -select - md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast(conflict_stream_array as text), '') as text)) as _airbyte_conflict_stream_array_hashid, - tmp.* -from __dbt__cte__conflict_stream_array_ab2 tmp --- conflict_stream_array -where 1 = 1 -)-- Final base SQL model --- depends_on: __dbt__cte__conflict_stream_array_ab3 -select - "id", - conflict_stream_array, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_conflict_stream_array_hashid -from __dbt__cte__conflict_stream_array_ab3 --- conflict_stream_array from "postgres".test_normalization._airbyte_raw_conflict_stream_array -where 1 = 1 - ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/conflict_stream_name.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/conflict_stream_name.sql deleted file mode 100644 index ac5cffb8d00d9..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/conflict_stream_name.sql +++ /dev/null @@ -1,58 +0,0 @@ - - - create table "postgres".test_normalization."conflict_stream_name__dbt_tmp" - as ( - -with __dbt__cte__conflict_stream_name_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "postgres".test_normalization._airbyte_raw_conflict_stream_name -select - jsonb_extract_path_text(_airbyte_data, 'id') as "id", - - jsonb_extract_path(table_alias._airbyte_data, 'conflict_stream_name') - as conflict_stream_name, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from "postgres".test_normalization._airbyte_raw_conflict_stream_name as table_alias --- conflict_stream_name -where 1 = 1 -), __dbt__cte__conflict_stream_name_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__conflict_stream_name_ab1 -select - cast("id" as text) as "id", - cast(conflict_stream_name as - jsonb -) as conflict_stream_name, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from __dbt__cte__conflict_stream_name_ab1 --- conflict_stream_name -where 1 = 1 -), __dbt__cte__conflict_stream_name_ab3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__conflict_stream_name_ab2 -select - md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast(conflict_stream_name as text), '') as text)) as _airbyte_conflict_stream_name_hashid, - tmp.* -from __dbt__cte__conflict_stream_name_ab2 tmp --- conflict_stream_name -where 1 = 1 -)-- Final base SQL model --- depends_on: __dbt__cte__conflict_stream_name_ab3 -select - "id", - conflict_stream_name, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_conflict_stream_name_hashid -from __dbt__cte__conflict_stream_name_ab3 --- conflict_stream_name from "postgres".test_normalization._airbyte_raw_conflict_stream_name -where 1 = 1 - ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/conflict_stream_name___conflict_stream_name.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/conflict_stream_name___conflict_stream_name.sql deleted file mode 100644 index 4aa2c420ed45d..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/conflict_stream_name___conflict_stream_name.sql +++ /dev/null @@ -1,55 +0,0 @@ - - - create table "postgres".test_normalization."conflict_stream_name___conflict_stream_name__dbt_tmp" - as ( - -with __dbt__cte__conflict_stream_name___conflict_stream_name_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "postgres".test_normalization."conflict_stream_name_conflict_stream_name" -select - _airbyte_conflict_stream_name_2_hashid, - jsonb_extract_path_text(conflict_stream_name, 'groups') as groups, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from "postgres".test_normalization."conflict_stream_name_conflict_stream_name" as table_alias --- conflict_stream_name at conflict_stream_name/conflict_stream_name/conflict_stream_name -where 1 = 1 -and conflict_stream_name is not null -), __dbt__cte__conflict_stream_name___conflict_stream_name_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__conflict_stream_name___conflict_stream_name_ab1 -select - _airbyte_conflict_stream_name_2_hashid, - cast(groups as text) as groups, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from __dbt__cte__conflict_stream_name___conflict_stream_name_ab1 --- conflict_stream_name at conflict_stream_name/conflict_stream_name/conflict_stream_name -where 1 = 1 -), __dbt__cte__conflict_stream_name___conflict_stream_name_ab3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__conflict_stream_name___conflict_stream_name_ab2 -select - md5(cast(coalesce(cast(_airbyte_conflict_stream_name_2_hashid as text), '') || '-' || coalesce(cast(groups as text), '') as text)) as _airbyte_conflict_stream_name_3_hashid, - tmp.* -from __dbt__cte__conflict_stream_name___conflict_stream_name_ab2 tmp --- conflict_stream_name at conflict_stream_name/conflict_stream_name/conflict_stream_name -where 1 = 1 -)-- Final base SQL model --- depends_on: __dbt__cte__conflict_stream_name___conflict_stream_name_ab3 -select - _airbyte_conflict_stream_name_2_hashid, - groups, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_conflict_stream_name_3_hashid -from __dbt__cte__conflict_stream_name___conflict_stream_name_ab3 --- conflict_stream_name at conflict_stream_name/conflict_stream_name/conflict_stream_name from "postgres".test_normalization."conflict_stream_name_conflict_stream_name" -where 1 = 1 - ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/conflict_stream_name_conflict_stream_name.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/conflict_stream_name_conflict_stream_name.sql deleted file mode 100644 index 82dfb023674e5..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/conflict_stream_name_conflict_stream_name.sql +++ /dev/null @@ -1,59 +0,0 @@ - - - create table "postgres".test_normalization."conflict_stream_name_conflict_stream_name__dbt_tmp" - as ( - -with __dbt__cte__conflict_stream_name_conflict_stream_name_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "postgres".test_normalization."conflict_stream_name" -select - _airbyte_conflict_stream_name_hashid, - - jsonb_extract_path(table_alias.conflict_stream_name, 'conflict_stream_name') - as conflict_stream_name, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from "postgres".test_normalization."conflict_stream_name" as table_alias --- conflict_stream_name at conflict_stream_name/conflict_stream_name -where 1 = 1 -and conflict_stream_name is not null -), __dbt__cte__conflict_stream_name_conflict_stream_name_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__conflict_stream_name_conflict_stream_name_ab1 -select - _airbyte_conflict_stream_name_hashid, - cast(conflict_stream_name as - jsonb -) as conflict_stream_name, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from __dbt__cte__conflict_stream_name_conflict_stream_name_ab1 --- conflict_stream_name at conflict_stream_name/conflict_stream_name -where 1 = 1 -), __dbt__cte__conflict_stream_name_conflict_stream_name_ab3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__conflict_stream_name_conflict_stream_name_ab2 -select - md5(cast(coalesce(cast(_airbyte_conflict_stream_name_hashid as text), '') || '-' || coalesce(cast(conflict_stream_name as text), '') as text)) as _airbyte_conflict_stream_name_2_hashid, - tmp.* -from __dbt__cte__conflict_stream_name_conflict_stream_name_ab2 tmp --- conflict_stream_name at conflict_stream_name/conflict_stream_name -where 1 = 1 -)-- Final base SQL model --- depends_on: __dbt__cte__conflict_stream_name_conflict_stream_name_ab3 -select - _airbyte_conflict_stream_name_hashid, - conflict_stream_name, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_conflict_stream_name_2_hashid -from __dbt__cte__conflict_stream_name_conflict_stream_name_ab3 --- conflict_stream_name at conflict_stream_name/conflict_stream_name from "postgres".test_normalization."conflict_stream_name" -where 1 = 1 - ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/conflict_stream_scalar.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/conflict_stream_scalar.sql deleted file mode 100644 index 09a4fa01de977..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/conflict_stream_scalar.sql +++ /dev/null @@ -1,56 +0,0 @@ - - - create table "postgres".test_normalization."conflict_stream_scalar__dbt_tmp" - as ( - -with __dbt__cte__conflict_stream_scalar_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "postgres".test_normalization._airbyte_raw_conflict_stream_scalar -select - jsonb_extract_path_text(_airbyte_data, 'id') as "id", - jsonb_extract_path_text(_airbyte_data, 'conflict_stream_scalar') as conflict_stream_scalar, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from "postgres".test_normalization._airbyte_raw_conflict_stream_scalar as table_alias --- conflict_stream_scalar -where 1 = 1 -), __dbt__cte__conflict_stream_scalar_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__conflict_stream_scalar_ab1 -select - cast("id" as text) as "id", - cast(conflict_stream_scalar as - bigint -) as conflict_stream_scalar, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from __dbt__cte__conflict_stream_scalar_ab1 --- conflict_stream_scalar -where 1 = 1 -), __dbt__cte__conflict_stream_scalar_ab3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__conflict_stream_scalar_ab2 -select - md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast(conflict_stream_scalar as text), '') as text)) as _airbyte_conflict_stream_scalar_hashid, - tmp.* -from __dbt__cte__conflict_stream_scalar_ab2 tmp --- conflict_stream_scalar -where 1 = 1 -)-- Final base SQL model --- depends_on: __dbt__cte__conflict_stream_scalar_ab3 -select - "id", - conflict_stream_scalar, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_conflict_stream_scalar_hashid -from __dbt__cte__conflict_stream_scalar_ab3 --- conflict_stream_scalar from "postgres".test_normalization._airbyte_raw_conflict_stream_scalar -where 1 = 1 - ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/non_nested_stream_wi__lting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/non_nested_stream_wi__lting_into_long_names.sql deleted file mode 100644 index 31d2176c3888c..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/non_nested_stream_wi__lting_into_long_names.sql +++ /dev/null @@ -1,54 +0,0 @@ - - - create table "postgres".test_normalization."non_nested_stream_wi__lting_into_long_names__dbt_tmp" - as ( - -with __dbt__cte__non_nested_stream_wi__lting_into_long_names_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "postgres".test_normalization._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names -select - jsonb_extract_path_text(_airbyte_data, 'id') as "id", - jsonb_extract_path_text(_airbyte_data, 'date') as "date", - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from "postgres".test_normalization._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names as table_alias --- non_nested_stream_wi__lting_into_long_names -where 1 = 1 -), __dbt__cte__non_nested_stream_wi__lting_into_long_names_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__non_nested_stream_wi__lting_into_long_names_ab1 -select - cast("id" as text) as "id", - cast("date" as text) as "date", - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from __dbt__cte__non_nested_stream_wi__lting_into_long_names_ab1 --- non_nested_stream_wi__lting_into_long_names -where 1 = 1 -), __dbt__cte__non_nested_stream_wi__lting_into_long_names_ab3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__non_nested_stream_wi__lting_into_long_names_ab2 -select - md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast("date" as text), '') as text)) as _airbyte_non_nested___nto_long_names_hashid, - tmp.* -from __dbt__cte__non_nested_stream_wi__lting_into_long_names_ab2 tmp --- non_nested_stream_wi__lting_into_long_names -where 1 = 1 -)-- Final base SQL model --- depends_on: __dbt__cte__non_nested_stream_wi__lting_into_long_names_ab3 -select - "id", - "date", - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_non_nested___nto_long_names_hashid -from __dbt__cte__non_nested_stream_wi__lting_into_long_names_ab3 --- non_nested_stream_wi__lting_into_long_names from "postgres".test_normalization._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names -where 1 = 1 - ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/unnest_alias.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/unnest_alias.sql deleted file mode 100644 index 7af2f04f81f87..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/unnest_alias.sql +++ /dev/null @@ -1,56 +0,0 @@ - - - create table "postgres".test_normalization."unnest_alias__dbt_tmp" - as ( - -with __dbt__cte__unnest_alias_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "postgres".test_normalization._airbyte_raw_unnest_alias -select - jsonb_extract_path_text(_airbyte_data, 'id') as "id", - jsonb_extract_path(_airbyte_data, 'children') as children, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from "postgres".test_normalization._airbyte_raw_unnest_alias as table_alias --- unnest_alias -where 1 = 1 -), __dbt__cte__unnest_alias_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__unnest_alias_ab1 -select - cast("id" as - bigint -) as "id", - children, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from __dbt__cte__unnest_alias_ab1 --- unnest_alias -where 1 = 1 -), __dbt__cte__unnest_alias_ab3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__unnest_alias_ab2 -select - md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast(children as text), '') as text)) as _airbyte_unnest_alias_hashid, - tmp.* -from __dbt__cte__unnest_alias_ab2 tmp --- unnest_alias -where 1 = 1 -)-- Final base SQL model --- depends_on: __dbt__cte__unnest_alias_ab3 -select - "id", - children, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_unnest_alias_hashid -from __dbt__cte__unnest_alias_ab3 --- unnest_alias from "postgres".test_normalization._airbyte_raw_unnest_alias -where 1 = 1 - ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/unnest_alias_childre__column___with__quotes.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/unnest_alias_childre__column___with__quotes.sql deleted file mode 100644 index 6688069a62f01..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/unnest_alias_childre__column___with__quotes.sql +++ /dev/null @@ -1,61 +0,0 @@ - - - create table "postgres".test_normalization."unnest_alias_childre__column___with__quotes__dbt_tmp" - as ( - -with __dbt__cte__unnest_alias_childre__column___with__quotes_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "postgres".test_normalization."unnest_alias_children_owner" - -select - _airbyte_owner_hashid, - jsonb_extract_path_text(_airbyte_nested_data, 'currency') as currency, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from "postgres".test_normalization."unnest_alias_children_owner" as table_alias --- column___with__quotes at unnest_alias/children/owner/column`_'with"_quotes -cross join jsonb_array_elements( - case jsonb_typeof("column`_'with""_quotes") - when 'array' then "column`_'with""_quotes" - else '[]' end - ) as _airbyte_nested_data -where 1 = 1 -and "column`_'with""_quotes" is not null -), __dbt__cte__unnest_alias_childre__column___with__quotes_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__unnest_alias_childre__column___with__quotes_ab1 -select - _airbyte_owner_hashid, - cast(currency as text) as currency, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from __dbt__cte__unnest_alias_childre__column___with__quotes_ab1 --- column___with__quotes at unnest_alias/children/owner/column`_'with"_quotes -where 1 = 1 -), __dbt__cte__unnest_alias_childre__column___with__quotes_ab3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__unnest_alias_childre__column___with__quotes_ab2 -select - md5(cast(coalesce(cast(_airbyte_owner_hashid as text), '') || '-' || coalesce(cast(currency as text), '') as text)) as _airbyte_column___with__quotes_hashid, - tmp.* -from __dbt__cte__unnest_alias_childre__column___with__quotes_ab2 tmp --- column___with__quotes at unnest_alias/children/owner/column`_'with"_quotes -where 1 = 1 -)-- Final base SQL model --- depends_on: __dbt__cte__unnest_alias_childre__column___with__quotes_ab3 -select - _airbyte_owner_hashid, - currency, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_column___with__quotes_hashid -from __dbt__cte__unnest_alias_childre__column___with__quotes_ab3 --- column___with__quotes at unnest_alias/children/owner/column`_'with"_quotes from "postgres".test_normalization."unnest_alias_children_owner" -where 1 = 1 - ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/unnest_alias_children.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/unnest_alias_children.sql deleted file mode 100644 index 779394d5765dc..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/unnest_alias_children.sql +++ /dev/null @@ -1,70 +0,0 @@ - - - create table "postgres".test_normalization."unnest_alias_children__dbt_tmp" - as ( - -with __dbt__cte__unnest_alias_children_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "postgres".test_normalization."unnest_alias" - -select - _airbyte_unnest_alias_hashid, - jsonb_extract_path_text(_airbyte_nested_data, 'ab_id') as ab_id, - - jsonb_extract_path(_airbyte_nested_data, 'owner') - as "owner", - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from "postgres".test_normalization."unnest_alias" as table_alias --- children at unnest_alias/children -cross join jsonb_array_elements( - case jsonb_typeof(children) - when 'array' then children - else '[]' end - ) as _airbyte_nested_data -where 1 = 1 -and children is not null -), __dbt__cte__unnest_alias_children_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__unnest_alias_children_ab1 -select - _airbyte_unnest_alias_hashid, - cast(ab_id as - bigint -) as ab_id, - cast("owner" as - jsonb -) as "owner", - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from __dbt__cte__unnest_alias_children_ab1 --- children at unnest_alias/children -where 1 = 1 -), __dbt__cte__unnest_alias_children_ab3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__unnest_alias_children_ab2 -select - md5(cast(coalesce(cast(_airbyte_unnest_alias_hashid as text), '') || '-' || coalesce(cast(ab_id as text), '') || '-' || coalesce(cast("owner" as text), '') as text)) as _airbyte_children_hashid, - tmp.* -from __dbt__cte__unnest_alias_children_ab2 tmp --- children at unnest_alias/children -where 1 = 1 -)-- Final base SQL model --- depends_on: __dbt__cte__unnest_alias_children_ab3 -select - _airbyte_unnest_alias_hashid, - ab_id, - "owner", - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_children_hashid -from __dbt__cte__unnest_alias_children_ab3 --- children at unnest_alias/children from "postgres".test_normalization."unnest_alias" -where 1 = 1 - ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/unnest_alias_children_owner.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/unnest_alias_children_owner.sql deleted file mode 100644 index 651e1c11914eb..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/unnest_alias_children_owner.sql +++ /dev/null @@ -1,60 +0,0 @@ - - - create table "postgres".test_normalization."unnest_alias_children_owner__dbt_tmp" - as ( - -with __dbt__cte__unnest_alias_children_owner_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "postgres".test_normalization."unnest_alias_children" -select - _airbyte_children_hashid, - jsonb_extract_path_text("owner", 'owner_id') as owner_id, - jsonb_extract_path("owner", 'column`_''with"_quotes') as "column`_'with""_quotes", - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from "postgres".test_normalization."unnest_alias_children" as table_alias --- owner at unnest_alias/children/owner -where 1 = 1 -and "owner" is not null -), __dbt__cte__unnest_alias_children_owner_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__unnest_alias_children_owner_ab1 -select - _airbyte_children_hashid, - cast(owner_id as - bigint -) as owner_id, - "column`_'with""_quotes", - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from __dbt__cte__unnest_alias_children_owner_ab1 --- owner at unnest_alias/children/owner -where 1 = 1 -), __dbt__cte__unnest_alias_children_owner_ab3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__unnest_alias_children_owner_ab2 -select - md5(cast(coalesce(cast(_airbyte_children_hashid as text), '') || '-' || coalesce(cast(owner_id as text), '') || '-' || coalesce(cast("column`_'with""_quotes" as text), '') as text)) as _airbyte_owner_hashid, - tmp.* -from __dbt__cte__unnest_alias_children_owner_ab2 tmp --- owner at unnest_alias/children/owner -where 1 = 1 -)-- Final base SQL model --- depends_on: __dbt__cte__unnest_alias_children_owner_ab3 -select - _airbyte_children_hashid, - owner_id, - "column`_'with""_quotes", - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_owner_hashid -from __dbt__cte__unnest_alias_children_owner_ab3 --- owner at unnest_alias/children/owner from "postgres".test_normalization."unnest_alias_children" -where 1 = 1 - ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/dbt_project.yml deleted file mode 100755 index 013a446b320a5..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/dbt_project.yml +++ /dev/null @@ -1,70 +0,0 @@ -name: airbyte_utils -version: '1.0' -config-version: 2 -profile: normalize -model-paths: -- modified_models -docs-paths: -- docs -analysis-paths: -- analysis -test-paths: -- tests -seed-paths: -- data -macro-paths: -- macros -target-path: ../build -log-path: ../logs -packages-install-path: /dbt -clean-targets: -- build -- dbt_modules -quoting: - database: true - schema: false - identifier: true -models: - airbyte_utils: - +materialized: table - generated: - airbyte_ctes: - +tags: airbyte_internal_cte - +materialized: ephemeral - airbyte_incremental: - +tags: incremental_tables - +materialized: incremental - +on_schema_change: sync_all_columns - airbyte_tables: - +tags: normalized_tables - +materialized: table - airbyte_views: - +tags: airbyte_internal_views - +materialized: view -dispatch: -- macro_namespace: dbt_utils - search_order: - - airbyte_utils - - dbt_utils -vars: - json_column: _airbyte_data - models_to_source: - exchange_rate_ab1: test_normalization._airbyte_raw_exchange_rate - exchange_rate_ab2: test_normalization._airbyte_raw_exchange_rate - exchange_rate_ab3: test_normalization._airbyte_raw_exchange_rate - exchange_rate: test_normalization._airbyte_raw_exchange_rate - dedup_exchange_rate_ab1: test_normalization._airbyte_raw_dedup_exchange_rate - dedup_exchange_rate_ab2: test_normalization._airbyte_raw_dedup_exchange_rate - dedup_exchange_rate_stg: test_normalization._airbyte_raw_dedup_exchange_rate - dedup_exchange_rate_scd: test_normalization._airbyte_raw_dedup_exchange_rate - dedup_exchange_rate: test_normalization._airbyte_raw_dedup_exchange_rate - renamed_dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded - renamed_dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded - renamed_dedup_cdc_excluded_stg: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded - renamed_dedup_cdc_excluded_scd: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded - renamed_dedup_cdc_excluded: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded - dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_dedup_cdc_excluded - dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_dedup_cdc_excluded - dedup_cdc_excluded_stg: test_normalization._airbyte_raw_dedup_cdc_excluded - dedup_cdc_excluded_scd: test_normalization._airbyte_raw_dedup_cdc_excluded - dedup_cdc_excluded: test_normalization._airbyte_raw_dedup_cdc_excluded diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_dbt_project.yml deleted file mode 100644 index 12745c37a1508..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_dbt_project.yml +++ /dev/null @@ -1,90 +0,0 @@ -name: airbyte_utils -version: '1.0' -config-version: 2 -profile: normalize -model-paths: -- models -docs-paths: -- docs -analysis-paths: -- analysis -test-paths: -- tests -seed-paths: -- data -macro-paths: -- macros -target-path: ../build -log-path: ../logs -packages-install-path: /dbt -clean-targets: -- build -- dbt_modules -quoting: - database: true - schema: false - identifier: true -models: - airbyte_utils: - +materialized: table - generated: - airbyte_ctes: - +tags: airbyte_internal_cte - +materialized: ephemeral - airbyte_incremental: - +tags: incremental_tables - +materialized: incremental - +on_schema_change: sync_all_columns - airbyte_tables: - +tags: normalized_tables - +materialized: table - airbyte_views: - +tags: airbyte_internal_views - +materialized: view -dispatch: -- macro_namespace: dbt_utils - search_order: - - airbyte_utils - - dbt_utils -vars: - json_column: _airbyte_data - models_to_source: - exchange_rate_ab1: test_normalization._airbyte_raw_exchange_rate - exchange_rate_ab2: test_normalization._airbyte_raw_exchange_rate - exchange_rate_ab3: test_normalization._airbyte_raw_exchange_rate - exchange_rate: test_normalization._airbyte_raw_exchange_rate - dedup_exchange_rate_ab1: test_normalization._airbyte_raw_dedup_exchange_rate - dedup_exchange_rate_ab2: test_normalization._airbyte_raw_dedup_exchange_rate - dedup_exchange_rate_stg: test_normalization._airbyte_raw_dedup_exchange_rate - dedup_exchange_rate_scd: test_normalization._airbyte_raw_dedup_exchange_rate - dedup_exchange_rate: test_normalization._airbyte_raw_dedup_exchange_rate - renamed_dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded - renamed_dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded - renamed_dedup_cdc_excluded_stg: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded - renamed_dedup_cdc_excluded_scd: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded - renamed_dedup_cdc_excluded: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded - dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_dedup_cdc_excluded - dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_dedup_cdc_excluded - dedup_cdc_excluded_stg: test_normalization._airbyte_raw_dedup_cdc_excluded - dedup_cdc_excluded_scd: test_normalization._airbyte_raw_dedup_cdc_excluded - dedup_cdc_excluded: test_normalization._airbyte_raw_dedup_cdc_excluded - pos_dedup_cdcx_ab1: test_normalization._airbyte_raw_pos_dedup_cdcx - pos_dedup_cdcx_ab2: test_normalization._airbyte_raw_pos_dedup_cdcx - pos_dedup_cdcx_stg: test_normalization._airbyte_raw_pos_dedup_cdcx - pos_dedup_cdcx_scd: test_normalization._airbyte_raw_pos_dedup_cdcx - pos_dedup_cdcx: test_normalization._airbyte_raw_pos_dedup_cdcx - 1_prefix_startwith_number_ab1: test_normalization._airbyte_raw_1_prefix_startwith_number - 1_prefix_startwith_number_ab2: test_normalization._airbyte_raw_1_prefix_startwith_number - 1_prefix_startwith_number_stg: test_normalization._airbyte_raw_1_prefix_startwith_number - 1_prefix_startwith_number_scd: test_normalization._airbyte_raw_1_prefix_startwith_number - 1_prefix_startwith_number: test_normalization._airbyte_raw_1_prefix_startwith_number - multiple_column_names_conflicts_ab1: test_normalization._airbyte_raw_multiple_column_names_conflicts - multiple_column_names_conflicts_ab2: test_normalization._airbyte_raw_multiple_column_names_conflicts - multiple_column_names_conflicts_stg: test_normalization._airbyte_raw_multiple_column_names_conflicts - multiple_column_names_conflicts_scd: test_normalization._airbyte_raw_multiple_column_names_conflicts - multiple_column_names_conflicts: test_normalization._airbyte_raw_multiple_column_names_conflicts - types_testing_ab1: test_normalization._airbyte_raw_types_testing - types_testing_ab2: test_normalization._airbyte_raw_types_testing - types_testing_stg: test_normalization._airbyte_raw_types_testing - types_testing_scd: test_normalization._airbyte_raw_types_testing - types_testing: test_normalization._airbyte_raw_types_testing diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql deleted file mode 100644 index dac6628377db2..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql +++ /dev/null @@ -1,73 +0,0 @@ - - - - create table "postgres".test_normalization."1_prefix_startwith_number_scd" - as ( - --- depends_on: ref('1_prefix_startwith_number_stg') -with - -input_data as ( - select * - from "postgres"._airbyte_test_normalization."1_prefix_startwith_number_stg" - -- 1_prefix_startwith_number from "postgres".test_normalization._airbyte_raw_1_prefix_startwith_number -), - -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - md5(cast(coalesce(cast("id" as text), '') as text)) as _airbyte_unique_key, - "id", - "date", - "text", - "date" as _airbyte_start_at, - lag("date") over ( - partition by "id" - order by - "date" is null asc, - "date" desc, - _airbyte_emitted_at desc - ) as _airbyte_end_at, - case when row_number() over ( - partition by "id" - order by - "date" is null asc, - "date" desc, - _airbyte_emitted_at desc - ) = 1 then 1 else 0 end as _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_1_prefix_startwith_number_hashid - from input_data -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by - _airbyte_unique_key, - _airbyte_start_at, - _airbyte_emitted_at - order by _airbyte_active_row desc, _airbyte_ab_id - ) as _airbyte_row_num, - md5(cast(coalesce(cast(_airbyte_unique_key as text), '') || '-' || coalesce(cast(_airbyte_start_at as text), '') || '-' || coalesce(cast(_airbyte_emitted_at as text), '') as text)) as _airbyte_unique_key_scd, - scd_data.* - from scd_data -) -select - _airbyte_unique_key, - _airbyte_unique_key_scd, - "id", - "date", - "text", - _airbyte_start_at, - _airbyte_end_at, - _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_1_prefix_startwith_number_hashid -from dedup_data where _airbyte_row_num = 1 - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql deleted file mode 100644 index ba66363a77f5a..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql +++ /dev/null @@ -1,79 +0,0 @@ - - - - create table "postgres".test_normalization."dedup_cdc_excluded_scd" - as ( - --- depends_on: ref('dedup_cdc_excluded_stg') -with - -input_data as ( - select * - from "postgres"._airbyte_test_normalization."dedup_cdc_excluded_stg" - -- dedup_cdc_excluded from "postgres".test_normalization._airbyte_raw_dedup_cdc_excluded -), - -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - md5(cast(coalesce(cast("id" as text), '') as text)) as _airbyte_unique_key, - "id", - "name", - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _ab_cdc_lsn as _airbyte_start_at, - lag(_ab_cdc_lsn) over ( - partition by "id" - order by - _ab_cdc_lsn is null asc, - _ab_cdc_lsn desc, - _ab_cdc_updated_at desc, - _airbyte_emitted_at desc - ) as _airbyte_end_at, - case when row_number() over ( - partition by "id" - order by - _ab_cdc_lsn is null asc, - _ab_cdc_lsn desc, - _ab_cdc_updated_at desc, - _airbyte_emitted_at desc - ) = 1 and _ab_cdc_deleted_at is null then 1 else 0 end as _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_dedup_cdc_excluded_hashid - from input_data -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by - _airbyte_unique_key, - _airbyte_start_at, - _airbyte_emitted_at, cast(_ab_cdc_deleted_at as text), cast(_ab_cdc_updated_at as text) - order by _airbyte_active_row desc, _airbyte_ab_id - ) as _airbyte_row_num, - md5(cast(coalesce(cast(_airbyte_unique_key as text), '') || '-' || coalesce(cast(_airbyte_start_at as text), '') || '-' || coalesce(cast(_airbyte_emitted_at as text), '') || '-' || coalesce(cast(_ab_cdc_deleted_at as text), '') || '-' || coalesce(cast(_ab_cdc_updated_at as text), '') as text)) as _airbyte_unique_key_scd, - scd_data.* - from scd_data -) -select - _airbyte_unique_key, - _airbyte_unique_key_scd, - "id", - "name", - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _airbyte_start_at, - _airbyte_end_at, - _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_dedup_cdc_excluded_hashid -from dedup_data where _airbyte_row_num = 1 - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql deleted file mode 100644 index c9440958247d2..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ /dev/null @@ -1,83 +0,0 @@ - - - - create table "postgres".test_normalization."dedup_exchange_rate_scd" - as ( - --- depends_on: ref('dedup_exchange_rate_stg') -with - -input_data as ( - select * - from "postgres"._airbyte_test_normalization."dedup_exchange_rate_stg" - -- dedup_exchange_rate from "postgres".test_normalization._airbyte_raw_dedup_exchange_rate -), - -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast(currency as text), '') || '-' || coalesce(cast(nzd as text), '') as text)) as _airbyte_unique_key, - "id", - currency, - "date", - timestamp_col, - "HKD@spéçiäl & characters", - hkd_special___characters, - nzd, - usd, - "date" as _airbyte_start_at, - lag("date") over ( - partition by "id", currency, cast(nzd as text) - order by - "date" is null asc, - "date" desc, - _airbyte_emitted_at desc - ) as _airbyte_end_at, - case when row_number() over ( - partition by "id", currency, cast(nzd as text) - order by - "date" is null asc, - "date" desc, - _airbyte_emitted_at desc - ) = 1 then 1 else 0 end as _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_dedup_exchange_rate_hashid - from input_data -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by - _airbyte_unique_key, - _airbyte_start_at, - _airbyte_emitted_at - order by _airbyte_active_row desc, _airbyte_ab_id - ) as _airbyte_row_num, - md5(cast(coalesce(cast(_airbyte_unique_key as text), '') || '-' || coalesce(cast(_airbyte_start_at as text), '') || '-' || coalesce(cast(_airbyte_emitted_at as text), '') as text)) as _airbyte_unique_key_scd, - scd_data.* - from scd_data -) -select - _airbyte_unique_key, - _airbyte_unique_key_scd, - "id", - currency, - "date", - timestamp_col, - "HKD@spéçiäl & characters", - hkd_special___characters, - nzd, - usd, - _airbyte_start_at, - _airbyte_end_at, - _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_dedup_exchange_rate_hashid -from dedup_data where _airbyte_row_num = 1 - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql deleted file mode 100644 index 9eb7e6e349ab2..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql +++ /dev/null @@ -1,81 +0,0 @@ - - - - create table "postgres".test_normalization."multiple_column_names_conflicts_scd" - as ( - --- depends_on: ref('multiple_column_names_conflicts_stg') -with - -input_data as ( - select * - from "postgres"._airbyte_test_normalization."multiple_column_names_conflicts_stg" - -- multiple_column_names_conflicts from "postgres".test_normalization._airbyte_raw_multiple_column_names_conflicts -), - -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - md5(cast(coalesce(cast("id" as text), '') as text)) as _airbyte_unique_key, - "id", - "User Id", - user_id, - "User id", - "user id", - "User@Id", - userid, - _airbyte_emitted_at as _airbyte_start_at, - lag(_airbyte_emitted_at) over ( - partition by "id" - order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, - _airbyte_emitted_at desc - ) as _airbyte_end_at, - case when row_number() over ( - partition by "id" - order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, - _airbyte_emitted_at desc - ) = 1 then 1 else 0 end as _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_multiple_co__ames_conflicts_hashid - from input_data -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by - _airbyte_unique_key, - _airbyte_start_at, - _airbyte_emitted_at - order by _airbyte_active_row desc, _airbyte_ab_id - ) as _airbyte_row_num, - md5(cast(coalesce(cast(_airbyte_unique_key as text), '') || '-' || coalesce(cast(_airbyte_start_at as text), '') || '-' || coalesce(cast(_airbyte_emitted_at as text), '') as text)) as _airbyte_unique_key_scd, - scd_data.* - from scd_data -) -select - _airbyte_unique_key, - _airbyte_unique_key_scd, - "id", - "User Id", - user_id, - "User id", - "user id", - "User@Id", - userid, - _airbyte_start_at, - _airbyte_end_at, - _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_multiple_co__ames_conflicts_hashid -from dedup_data where _airbyte_row_num = 1 - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql deleted file mode 100644 index 450815d1ccc51..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql +++ /dev/null @@ -1,83 +0,0 @@ - - - - create table "postgres".test_normalization."pos_dedup_cdcx_scd" - as ( - --- depends_on: ref('pos_dedup_cdcx_stg') -with - -input_data as ( - select * - from "postgres"._airbyte_test_normalization."pos_dedup_cdcx_stg" - -- pos_dedup_cdcx from "postgres".test_normalization._airbyte_raw_pos_dedup_cdcx -), - -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - md5(cast(coalesce(cast("id" as text), '') as text)) as _airbyte_unique_key, - "id", - "name", - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _ab_cdc_log_pos, - _ab_cdc_updated_at as _airbyte_start_at, - lag(_ab_cdc_updated_at) over ( - partition by "id" - order by - _ab_cdc_updated_at is null asc, - _ab_cdc_updated_at desc, - _ab_cdc_updated_at desc, - _ab_cdc_log_pos desc, - _airbyte_emitted_at desc - ) as _airbyte_end_at, - case when row_number() over ( - partition by "id" - order by - _ab_cdc_updated_at is null asc, - _ab_cdc_updated_at desc, - _ab_cdc_updated_at desc, - _ab_cdc_log_pos desc, - _airbyte_emitted_at desc - ) = 1 and _ab_cdc_deleted_at is null then 1 else 0 end as _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_pos_dedup_cdcx_hashid - from input_data -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by - _airbyte_unique_key, - _airbyte_start_at, - _airbyte_emitted_at, cast(_ab_cdc_deleted_at as text), cast(_ab_cdc_updated_at as text), cast(_ab_cdc_log_pos as text) - order by _airbyte_active_row desc, _airbyte_ab_id - ) as _airbyte_row_num, - md5(cast(coalesce(cast(_airbyte_unique_key as text), '') || '-' || coalesce(cast(_airbyte_start_at as text), '') || '-' || coalesce(cast(_airbyte_emitted_at as text), '') || '-' || coalesce(cast(_ab_cdc_deleted_at as text), '') || '-' || coalesce(cast(_ab_cdc_updated_at as text), '') || '-' || coalesce(cast(_ab_cdc_log_pos as text), '') as text)) as _airbyte_unique_key_scd, - scd_data.* - from scd_data -) -select - _airbyte_unique_key, - _airbyte_unique_key_scd, - "id", - "name", - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _ab_cdc_log_pos, - _airbyte_start_at, - _airbyte_end_at, - _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_pos_dedup_cdcx_hashid -from dedup_data where _airbyte_row_num = 1 - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql deleted file mode 100644 index 31e25e700b601..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql +++ /dev/null @@ -1,71 +0,0 @@ - - - - create table "postgres".test_normalization."renamed_dedup_cdc_excluded_scd" - as ( - --- depends_on: ref('renamed_dedup_cdc_excluded_stg') -with - -input_data as ( - select * - from "postgres"._airbyte_test_normalization."renamed_dedup_cdc_excluded_stg" - -- renamed_dedup_cdc_excluded from "postgres".test_normalization._airbyte_raw_renamed_dedup_cdc_excluded -), - -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - md5(cast(coalesce(cast("id" as text), '') as text)) as _airbyte_unique_key, - "id", - _ab_cdc_updated_at, - _ab_cdc_updated_at as _airbyte_start_at, - lag(_ab_cdc_updated_at) over ( - partition by "id" - order by - _ab_cdc_updated_at is null asc, - _ab_cdc_updated_at desc, - _airbyte_emitted_at desc - ) as _airbyte_end_at, - case when row_number() over ( - partition by "id" - order by - _ab_cdc_updated_at is null asc, - _ab_cdc_updated_at desc, - _airbyte_emitted_at desc - ) = 1 then 1 else 0 end as _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_renamed_dedup_cdc_excluded_hashid - from input_data -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by - _airbyte_unique_key, - _airbyte_start_at, - _airbyte_emitted_at - order by _airbyte_active_row desc, _airbyte_ab_id - ) as _airbyte_row_num, - md5(cast(coalesce(cast(_airbyte_unique_key as text), '') || '-' || coalesce(cast(_airbyte_start_at as text), '') || '-' || coalesce(cast(_airbyte_emitted_at as text), '') as text)) as _airbyte_unique_key_scd, - scd_data.* - from scd_data -) -select - _airbyte_unique_key, - _airbyte_unique_key_scd, - "id", - _ab_cdc_updated_at, - _airbyte_start_at, - _airbyte_end_at, - _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_renamed_dedup_cdc_excluded_hashid -from dedup_data where _airbyte_row_num = 1 - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/types_testing_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/types_testing_scd.sql deleted file mode 100644 index 238d662a00cfe..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/types_testing_scd.sql +++ /dev/null @@ -1,73 +0,0 @@ - - - - create table "postgres".test_normalization."types_testing_scd" - as ( - --- depends_on: ref('types_testing_stg') -with - -input_data as ( - select * - from "postgres"._airbyte_test_normalization."types_testing_stg" - -- types_testing from "postgres".test_normalization._airbyte_raw_types_testing -), - -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - md5(cast(coalesce(cast("id" as text), '') as text)) as _airbyte_unique_key, - "id", - airbyte_integer_column, - nullable_airbyte_integer_column, - _airbyte_emitted_at as _airbyte_start_at, - lag(_airbyte_emitted_at) over ( - partition by "id" - order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, - _airbyte_emitted_at desc - ) as _airbyte_end_at, - case when row_number() over ( - partition by "id" - order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, - _airbyte_emitted_at desc - ) = 1 then 1 else 0 end as _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_types_testing_hashid - from input_data -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by - _airbyte_unique_key, - _airbyte_start_at, - _airbyte_emitted_at - order by _airbyte_active_row desc, _airbyte_ab_id - ) as _airbyte_row_num, - md5(cast(coalesce(cast(_airbyte_unique_key as text), '') || '-' || coalesce(cast(_airbyte_start_at as text), '') || '-' || coalesce(cast(_airbyte_emitted_at as text), '') as text)) as _airbyte_unique_key_scd, - scd_data.* - from scd_data -) -select - _airbyte_unique_key, - _airbyte_unique_key_scd, - "id", - airbyte_integer_column, - nullable_airbyte_integer_column, - _airbyte_start_at, - _airbyte_end_at, - _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_types_testing_hashid -from dedup_data where _airbyte_row_num = 1 - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/1_prefix_startwith_number.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/1_prefix_startwith_number.sql deleted file mode 100644 index aad38834ac949..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/1_prefix_startwith_number.sql +++ /dev/null @@ -1,24 +0,0 @@ - - - - create table "postgres".test_normalization."1_prefix_startwith_number" - as ( - --- Final base SQL model --- depends_on: "postgres".test_normalization."1_prefix_startwith_number_scd" -select - _airbyte_unique_key, - "id", - "date", - "text", - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_1_prefix_startwith_number_hashid -from "postgres".test_normalization."1_prefix_startwith_number_scd" --- 1_prefix_startwith_number from "postgres".test_normalization._airbyte_raw_1_prefix_startwith_number -where 1 = 1 -and _airbyte_active_row = 1 - - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/1_prefix_startwith_number_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/1_prefix_startwith_number_stg.sql deleted file mode 100644 index 94b51fa8be0bd..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/1_prefix_startwith_number_stg.sql +++ /dev/null @@ -1,51 +0,0 @@ - - - - create table "postgres"._airbyte_test_normalization."1_prefix_startwith_number_stg" - as ( - -with __dbt__cte__1_prefix_startwith_number_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "postgres".test_normalization._airbyte_raw_1_prefix_startwith_number -select - jsonb_extract_path_text(_airbyte_data, 'id') as "id", - jsonb_extract_path_text(_airbyte_data, 'date') as "date", - jsonb_extract_path_text(_airbyte_data, 'text') as "text", - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from "postgres".test_normalization._airbyte_raw_1_prefix_startwith_number as table_alias --- 1_prefix_startwith_number -where 1 = 1 - -), __dbt__cte__1_prefix_startwith_number_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__1_prefix_startwith_number_ab1 -select - cast("id" as - bigint -) as "id", - cast(nullif("date", '') as - date -) as "date", - cast("text" as text) as "text", - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from __dbt__cte__1_prefix_startwith_number_ab1 --- 1_prefix_startwith_number -where 1 = 1 - -)-- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__1_prefix_startwith_number_ab2 -select - md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast("date" as text), '') || '-' || coalesce(cast("text" as text), '') as text)) as _airbyte_1_prefix_startwith_number_hashid, - tmp.* -from __dbt__cte__1_prefix_startwith_number_ab2 tmp --- 1_prefix_startwith_number -where 1 = 1 - - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql deleted file mode 100644 index 8b6b3e96bc28c..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql +++ /dev/null @@ -1,26 +0,0 @@ - - - - create table "postgres".test_normalization."dedup_cdc_excluded" - as ( - --- Final base SQL model --- depends_on: "postgres".test_normalization."dedup_cdc_excluded_scd" -select - _airbyte_unique_key, - "id", - "name", - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_dedup_cdc_excluded_hashid -from "postgres".test_normalization."dedup_cdc_excluded_scd" --- dedup_cdc_excluded from "postgres".test_normalization._airbyte_raw_dedup_cdc_excluded -where 1 = 1 -and _airbyte_active_row = 1 - - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_cdc_excluded_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_cdc_excluded_stg.sql deleted file mode 100644 index 1c688fb2faa56..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_cdc_excluded_stg.sql +++ /dev/null @@ -1,59 +0,0 @@ - - - - create table "postgres"._airbyte_test_normalization."dedup_cdc_excluded_stg" - as ( - -with __dbt__cte__dedup_cdc_excluded_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "postgres".test_normalization._airbyte_raw_dedup_cdc_excluded -select - jsonb_extract_path_text(_airbyte_data, 'id') as "id", - jsonb_extract_path_text(_airbyte_data, 'name') as "name", - jsonb_extract_path_text(_airbyte_data, '_ab_cdc_lsn') as _ab_cdc_lsn, - jsonb_extract_path_text(_airbyte_data, '_ab_cdc_updated_at') as _ab_cdc_updated_at, - jsonb_extract_path_text(_airbyte_data, '_ab_cdc_deleted_at') as _ab_cdc_deleted_at, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from "postgres".test_normalization._airbyte_raw_dedup_cdc_excluded as table_alias --- dedup_cdc_excluded -where 1 = 1 - -), __dbt__cte__dedup_cdc_excluded_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__dedup_cdc_excluded_ab1 -select - cast("id" as - bigint -) as "id", - cast("name" as text) as "name", - cast(_ab_cdc_lsn as - float -) as _ab_cdc_lsn, - cast(_ab_cdc_updated_at as - float -) as _ab_cdc_updated_at, - cast(_ab_cdc_deleted_at as - float -) as _ab_cdc_deleted_at, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from __dbt__cte__dedup_cdc_excluded_ab1 --- dedup_cdc_excluded -where 1 = 1 - -)-- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__dedup_cdc_excluded_ab2 -select - md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast("name" as text), '') || '-' || coalesce(cast(_ab_cdc_lsn as text), '') || '-' || coalesce(cast(_ab_cdc_updated_at as text), '') || '-' || coalesce(cast(_ab_cdc_deleted_at as text), '') as text)) as _airbyte_dedup_cdc_excluded_hashid, - tmp.* -from __dbt__cte__dedup_cdc_excluded_ab2 tmp --- dedup_cdc_excluded -where 1 = 1 - - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql deleted file mode 100644 index 93578cc1edcaf..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql +++ /dev/null @@ -1,29 +0,0 @@ - - - - create table "postgres".test_normalization."dedup_exchange_rate" - as ( - --- Final base SQL model --- depends_on: "postgres".test_normalization."dedup_exchange_rate_scd" -select - _airbyte_unique_key, - "id", - currency, - "date", - timestamp_col, - "HKD@spéçiäl & characters", - hkd_special___characters, - nzd, - usd, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_dedup_exchange_rate_hashid -from "postgres".test_normalization."dedup_exchange_rate_scd" --- dedup_exchange_rate from "postgres".test_normalization._airbyte_raw_dedup_exchange_rate -where 1 = 1 -and _airbyte_active_row = 1 - - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql deleted file mode 100644 index 128ec051327d6..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql +++ /dev/null @@ -1,69 +0,0 @@ - - - - create table "postgres"._airbyte_test_normalization."dedup_exchange_rate_stg" - as ( - -with __dbt__cte__dedup_exchange_rate_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "postgres".test_normalization._airbyte_raw_dedup_exchange_rate -select - jsonb_extract_path_text(_airbyte_data, 'id') as "id", - jsonb_extract_path_text(_airbyte_data, 'currency') as currency, - jsonb_extract_path_text(_airbyte_data, 'date') as "date", - jsonb_extract_path_text(_airbyte_data, 'timestamp_col') as timestamp_col, - jsonb_extract_path_text(_airbyte_data, 'HKD@spéçiäl & characters') as "HKD@spéçiäl & characters", - jsonb_extract_path_text(_airbyte_data, 'HKD_special___characters') as hkd_special___characters, - jsonb_extract_path_text(_airbyte_data, 'NZD') as nzd, - jsonb_extract_path_text(_airbyte_data, 'USD') as usd, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from "postgres".test_normalization._airbyte_raw_dedup_exchange_rate as table_alias --- dedup_exchange_rate -where 1 = 1 - -), __dbt__cte__dedup_exchange_rate_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__dedup_exchange_rate_ab1 -select - cast("id" as - bigint -) as "id", - cast(currency as text) as currency, - cast(nullif("date", '') as - date -) as "date", - cast(nullif(timestamp_col, '') as - timestamp with time zone -) as timestamp_col, - cast("HKD@spéçiäl & characters" as - float -) as "HKD@spéçiäl & characters", - cast(hkd_special___characters as text) as hkd_special___characters, - cast(nzd as - float -) as nzd, - cast(usd as - float -) as usd, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from __dbt__cte__dedup_exchange_rate_ab1 --- dedup_exchange_rate -where 1 = 1 - -)-- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__dedup_exchange_rate_ab2 -select - md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast(currency as text), '') || '-' || coalesce(cast("date" as text), '') || '-' || coalesce(cast(timestamp_col as text), '') || '-' || coalesce(cast("HKD@spéçiäl & characters" as text), '') || '-' || coalesce(cast(hkd_special___characters as text), '') || '-' || coalesce(cast(nzd as text), '') || '-' || coalesce(cast(usd as text), '') as text)) as _airbyte_dedup_exchange_rate_hashid, - tmp.* -from __dbt__cte__dedup_exchange_rate_ab2 tmp --- dedup_exchange_rate -where 1 = 1 - - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts.sql deleted file mode 100644 index eba2d8af4fcee..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts.sql +++ /dev/null @@ -1,28 +0,0 @@ - - - - create table "postgres".test_normalization."multiple_column_names_conflicts" - as ( - --- Final base SQL model --- depends_on: "postgres".test_normalization."multiple_column_names_conflicts_scd" -select - _airbyte_unique_key, - "id", - "User Id", - user_id, - "User id", - "user id", - "User@Id", - userid, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_multiple_co__ames_conflicts_hashid -from "postgres".test_normalization."multiple_column_names_conflicts_scd" --- multiple_column_names_conflicts from "postgres".test_normalization._airbyte_raw_multiple_column_names_conflicts -where 1 = 1 -and _airbyte_active_row = 1 - - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts_stg.sql deleted file mode 100644 index dbb4726faf8f3..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts_stg.sql +++ /dev/null @@ -1,65 +0,0 @@ - - - - create table "postgres"._airbyte_test_normalization."multiple_column_names_conflicts_stg" - as ( - -with __dbt__cte__multiple_column_names_conflicts_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "postgres".test_normalization._airbyte_raw_multiple_column_names_conflicts -select - jsonb_extract_path_text(_airbyte_data, 'id') as "id", - jsonb_extract_path_text(_airbyte_data, 'User Id') as "User Id", - jsonb_extract_path_text(_airbyte_data, 'user_id') as user_id, - jsonb_extract_path_text(_airbyte_data, 'User id') as "User id", - jsonb_extract_path_text(_airbyte_data, 'user id') as "user id", - jsonb_extract_path_text(_airbyte_data, 'User@Id') as "User@Id", - jsonb_extract_path_text(_airbyte_data, 'UserId') as userid, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from "postgres".test_normalization._airbyte_raw_multiple_column_names_conflicts as table_alias --- multiple_column_names_conflicts -where 1 = 1 - -), __dbt__cte__multiple_column_names_conflicts_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__multiple_column_names_conflicts_ab1 -select - cast("id" as - bigint -) as "id", - cast("User Id" as text) as "User Id", - cast(user_id as - float -) as user_id, - cast("User id" as - float -) as "User id", - cast("user id" as - float -) as "user id", - cast("User@Id" as text) as "User@Id", - cast(userid as - float -) as userid, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from __dbt__cte__multiple_column_names_conflicts_ab1 --- multiple_column_names_conflicts -where 1 = 1 - -)-- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__multiple_column_names_conflicts_ab2 -select - md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast("User Id" as text), '') || '-' || coalesce(cast(user_id as text), '') || '-' || coalesce(cast("User id" as text), '') || '-' || coalesce(cast("user id" as text), '') || '-' || coalesce(cast("User@Id" as text), '') || '-' || coalesce(cast(userid as text), '') as text)) as _airbyte_multiple_co__ames_conflicts_hashid, - tmp.* -from __dbt__cte__multiple_column_names_conflicts_ab2 tmp --- multiple_column_names_conflicts -where 1 = 1 - - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/pos_dedup_cdcx.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/pos_dedup_cdcx.sql deleted file mode 100644 index 59b2696002723..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/pos_dedup_cdcx.sql +++ /dev/null @@ -1,27 +0,0 @@ - - - - create table "postgres".test_normalization."pos_dedup_cdcx" - as ( - --- Final base SQL model --- depends_on: "postgres".test_normalization."pos_dedup_cdcx_scd" -select - _airbyte_unique_key, - "id", - "name", - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _ab_cdc_log_pos, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_pos_dedup_cdcx_hashid -from "postgres".test_normalization."pos_dedup_cdcx_scd" --- pos_dedup_cdcx from "postgres".test_normalization._airbyte_raw_pos_dedup_cdcx -where 1 = 1 -and _airbyte_active_row = 1 - - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/pos_dedup_cdcx_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/pos_dedup_cdcx_stg.sql deleted file mode 100644 index 1b28a6bd09ddc..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/pos_dedup_cdcx_stg.sql +++ /dev/null @@ -1,63 +0,0 @@ - - - - create table "postgres"._airbyte_test_normalization."pos_dedup_cdcx_stg" - as ( - -with __dbt__cte__pos_dedup_cdcx_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "postgres".test_normalization._airbyte_raw_pos_dedup_cdcx -select - jsonb_extract_path_text(_airbyte_data, 'id') as "id", - jsonb_extract_path_text(_airbyte_data, 'name') as "name", - jsonb_extract_path_text(_airbyte_data, '_ab_cdc_lsn') as _ab_cdc_lsn, - jsonb_extract_path_text(_airbyte_data, '_ab_cdc_updated_at') as _ab_cdc_updated_at, - jsonb_extract_path_text(_airbyte_data, '_ab_cdc_deleted_at') as _ab_cdc_deleted_at, - jsonb_extract_path_text(_airbyte_data, '_ab_cdc_log_pos') as _ab_cdc_log_pos, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from "postgres".test_normalization._airbyte_raw_pos_dedup_cdcx as table_alias --- pos_dedup_cdcx -where 1 = 1 - -), __dbt__cte__pos_dedup_cdcx_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__pos_dedup_cdcx_ab1 -select - cast("id" as - bigint -) as "id", - cast("name" as text) as "name", - cast(_ab_cdc_lsn as - float -) as _ab_cdc_lsn, - cast(_ab_cdc_updated_at as - float -) as _ab_cdc_updated_at, - cast(_ab_cdc_deleted_at as - float -) as _ab_cdc_deleted_at, - cast(_ab_cdc_log_pos as - float -) as _ab_cdc_log_pos, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from __dbt__cte__pos_dedup_cdcx_ab1 --- pos_dedup_cdcx -where 1 = 1 - -)-- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__pos_dedup_cdcx_ab2 -select - md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast("name" as text), '') || '-' || coalesce(cast(_ab_cdc_lsn as text), '') || '-' || coalesce(cast(_ab_cdc_updated_at as text), '') || '-' || coalesce(cast(_ab_cdc_deleted_at as text), '') || '-' || coalesce(cast(_ab_cdc_log_pos as text), '') as text)) as _airbyte_pos_dedup_cdcx_hashid, - tmp.* -from __dbt__cte__pos_dedup_cdcx_ab2 tmp --- pos_dedup_cdcx -where 1 = 1 - - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql deleted file mode 100644 index 36303d71ef60e..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql +++ /dev/null @@ -1,23 +0,0 @@ - - - - create table "postgres".test_normalization."renamed_dedup_cdc_excluded" - as ( - --- Final base SQL model --- depends_on: "postgres".test_normalization."renamed_dedup_cdc_excluded_scd" -select - _airbyte_unique_key, - "id", - _ab_cdc_updated_at, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_renamed_dedup_cdc_excluded_hashid -from "postgres".test_normalization."renamed_dedup_cdc_excluded_scd" --- renamed_dedup_cdc_excluded from "postgres".test_normalization._airbyte_raw_renamed_dedup_cdc_excluded -where 1 = 1 -and _airbyte_active_row = 1 - - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql deleted file mode 100644 index 7fba3805f3967..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql +++ /dev/null @@ -1,49 +0,0 @@ - - - - create table "postgres"._airbyte_test_normalization."renamed_dedup_cdc_excluded_stg" - as ( - -with __dbt__cte__renamed_dedup_cdc_excluded_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "postgres".test_normalization._airbyte_raw_renamed_dedup_cdc_excluded -select - jsonb_extract_path_text(_airbyte_data, 'id') as "id", - jsonb_extract_path_text(_airbyte_data, '_ab_cdc_updated_at') as _ab_cdc_updated_at, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from "postgres".test_normalization._airbyte_raw_renamed_dedup_cdc_excluded as table_alias --- renamed_dedup_cdc_excluded -where 1 = 1 - -), __dbt__cte__renamed_dedup_cdc_excluded_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__renamed_dedup_cdc_excluded_ab1 -select - cast("id" as - bigint -) as "id", - cast(_ab_cdc_updated_at as - float -) as _ab_cdc_updated_at, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from __dbt__cte__renamed_dedup_cdc_excluded_ab1 --- renamed_dedup_cdc_excluded -where 1 = 1 - -)-- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__renamed_dedup_cdc_excluded_ab2 -select - md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast(_ab_cdc_updated_at as text), '') as text)) as _airbyte_renamed_dedup_cdc_excluded_hashid, - tmp.* -from __dbt__cte__renamed_dedup_cdc_excluded_ab2 tmp --- renamed_dedup_cdc_excluded -where 1 = 1 - - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/types_testing.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/types_testing.sql deleted file mode 100644 index 424c1918935bd..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/types_testing.sql +++ /dev/null @@ -1,24 +0,0 @@ - - - - create table "postgres".test_normalization."types_testing" - as ( - --- Final base SQL model --- depends_on: "postgres".test_normalization."types_testing_scd" -select - _airbyte_unique_key, - "id", - airbyte_integer_column, - nullable_airbyte_integer_column, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_types_testing_hashid -from "postgres".test_normalization."types_testing_scd" --- types_testing from "postgres".test_normalization._airbyte_raw_types_testing -where 1 = 1 -and _airbyte_active_row = 1 - - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/types_testing_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/types_testing_stg.sql deleted file mode 100644 index 7eccd56d06093..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/types_testing_stg.sql +++ /dev/null @@ -1,53 +0,0 @@ - - - - create table "postgres"._airbyte_test_normalization."types_testing_stg" - as ( - -with __dbt__cte__types_testing_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "postgres".test_normalization._airbyte_raw_types_testing -select - jsonb_extract_path_text(_airbyte_data, 'id') as "id", - jsonb_extract_path_text(_airbyte_data, 'airbyte_integer_column') as airbyte_integer_column, - jsonb_extract_path_text(_airbyte_data, 'nullable_airbyte_integer_column') as nullable_airbyte_integer_column, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from "postgres".test_normalization._airbyte_raw_types_testing as table_alias --- types_testing -where 1 = 1 - -), __dbt__cte__types_testing_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__types_testing_ab1 -select - cast("id" as - bigint -) as "id", - cast(airbyte_integer_column as - bigint -) as airbyte_integer_column, - cast(nullable_airbyte_integer_column as - bigint -) as nullable_airbyte_integer_column, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from __dbt__cte__types_testing_ab1 --- types_testing -where 1 = 1 - -)-- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__types_testing_ab2 -select - md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast(airbyte_integer_column as text), '') || '-' || coalesce(cast(nullable_airbyte_integer_column as text), '') as text)) as _airbyte_types_testing_hashid, - tmp.* -from __dbt__cte__types_testing_ab2 tmp --- types_testing -where 1 = 1 - - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql deleted file mode 100644 index 2773af0d8fa35..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql +++ /dev/null @@ -1,107 +0,0 @@ - - - create table "postgres".test_normalization."exchange_rate__dbt_tmp" - as ( - -with __dbt__cte__exchange_rate_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "postgres".test_normalization._airbyte_raw_exchange_rate -select - jsonb_extract_path_text(_airbyte_data, 'id') as "id", - jsonb_extract_path_text(_airbyte_data, 'currency') as currency, - jsonb_extract_path_text(_airbyte_data, 'date') as "date", - jsonb_extract_path_text(_airbyte_data, 'timestamp_col') as timestamp_col, - jsonb_extract_path_text(_airbyte_data, 'HKD@spéçiäl & characters') as "HKD@spéçiäl & characters", - jsonb_extract_path_text(_airbyte_data, 'HKD_special___characters') as hkd_special___characters, - jsonb_extract_path_text(_airbyte_data, 'NZD') as nzd, - jsonb_extract_path_text(_airbyte_data, 'USD') as usd, - jsonb_extract_path_text(_airbyte_data, 'column`_''with"_quotes') as "column`_'with""_quotes", - jsonb_extract_path_text(_airbyte_data, 'datetime_tz') as datetime_tz, - jsonb_extract_path_text(_airbyte_data, 'datetime_no_tz') as datetime_no_tz, - jsonb_extract_path_text(_airbyte_data, 'time_tz') as time_tz, - jsonb_extract_path_text(_airbyte_data, 'time_no_tz') as time_no_tz, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from "postgres".test_normalization._airbyte_raw_exchange_rate as table_alias --- exchange_rate -where 1 = 1 -), __dbt__cte__exchange_rate_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__exchange_rate_ab1 -select - cast("id" as - bigint -) as "id", - cast(currency as text) as currency, - cast(nullif("date", '') as - date -) as "date", - cast(nullif(timestamp_col, '') as - timestamp with time zone -) as timestamp_col, - cast("HKD@spéçiäl & characters" as - float -) as "HKD@spéçiäl & characters", - cast(hkd_special___characters as text) as hkd_special___characters, - cast(nzd as - float -) as nzd, - cast(usd as - float -) as usd, - cast("column`_'with""_quotes" as text) as "column`_'with""_quotes", - cast(nullif(datetime_tz, '') as - timestamp with time zone -) as datetime_tz, - cast(nullif(datetime_no_tz, '') as - timestamp -) as datetime_no_tz, - cast(nullif(time_tz, '') as - time with time zone -) as time_tz, - cast(nullif(time_no_tz, '') as - time -) as time_no_tz, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from __dbt__cte__exchange_rate_ab1 --- exchange_rate -where 1 = 1 -), __dbt__cte__exchange_rate_ab3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__exchange_rate_ab2 -select - md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast(currency as text), '') || '-' || coalesce(cast("date" as text), '') || '-' || coalesce(cast(timestamp_col as text), '') || '-' || coalesce(cast("HKD@spéçiäl & characters" as text), '') || '-' || coalesce(cast(hkd_special___characters as text), '') || '-' || coalesce(cast(nzd as text), '') || '-' || coalesce(cast(usd as text), '') || '-' || coalesce(cast("column`_'with""_quotes" as text), '') || '-' || coalesce(cast(datetime_tz as text), '') || '-' || coalesce(cast(datetime_no_tz as text), '') || '-' || coalesce(cast(time_tz as text), '') || '-' || coalesce(cast(time_no_tz as text), '') as text)) as _airbyte_exchange_rate_hashid, - tmp.* -from __dbt__cte__exchange_rate_ab2 tmp --- exchange_rate -where 1 = 1 -)-- Final base SQL model --- depends_on: __dbt__cte__exchange_rate_ab3 -select - "id", - currency, - "date", - timestamp_col, - "HKD@spéçiäl & characters", - hkd_special___characters, - nzd, - usd, - "column`_'with""_quotes", - datetime_tz, - datetime_no_tz, - time_tz, - time_no_tz, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_exchange_rate_hashid -from __dbt__cte__exchange_rate_ab3 --- exchange_rate from "postgres".test_normalization._airbyte_raw_exchange_rate -where 1 = 1 - ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/1_prefix_startwith_number_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/1_prefix_startwith_number_ab1.sql deleted file mode 100644 index f6697dcec7577..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/1_prefix_startwith_number_ab1.sql +++ /dev/null @@ -1,20 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ source('test_normalization', '_airbyte_raw_1_prefix_startwith_number') }} -select - {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as {{ adapter.quote('id') }}, - {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as {{ adapter.quote('date') }}, - {{ json_extract_scalar('_airbyte_data', ['text'], ['text']) }} as {{ adapter.quote('text') }}, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ source('test_normalization', '_airbyte_raw_1_prefix_startwith_number') }} as table_alias --- 1_prefix_startwith_number -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/1_prefix_startwith_number_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/1_prefix_startwith_number_ab2.sql deleted file mode 100644 index a9dd516725858..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/1_prefix_startwith_number_ab2.sql +++ /dev/null @@ -1,20 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: {{ ref('1_prefix_startwith_number_ab1') }} -select - cast({{ adapter.quote('id') }} as {{ dbt_utils.type_bigint() }}) as {{ adapter.quote('id') }}, - cast({{ empty_string_to_null(adapter.quote('date')) }} as {{ type_date() }}) as {{ adapter.quote('date') }}, - cast({{ adapter.quote('text') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('text') }}, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('1_prefix_startwith_number_ab1') }} --- 1_prefix_startwith_number -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab1.sql deleted file mode 100644 index 99a03831a8ba8..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab1.sql +++ /dev/null @@ -1,22 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} -select - {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as {{ adapter.quote('id') }}, - {{ json_extract_scalar('_airbyte_data', ['name'], ['name']) }} as {{ adapter.quote('name') }}, - {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_lsn'], ['_ab_cdc_lsn']) }} as _ab_cdc_lsn, - {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_updated_at'], ['_ab_cdc_updated_at']) }} as _ab_cdc_updated_at, - {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_deleted_at'], ['_ab_cdc_deleted_at']) }} as _ab_cdc_deleted_at, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} as table_alias --- dedup_cdc_excluded -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab2.sql deleted file mode 100644 index 3d8803e27a664..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab2.sql +++ /dev/null @@ -1,22 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: {{ ref('dedup_cdc_excluded_ab1') }} -select - cast({{ adapter.quote('id') }} as {{ dbt_utils.type_bigint() }}) as {{ adapter.quote('id') }}, - cast({{ adapter.quote('name') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('name') }}, - cast(_ab_cdc_lsn as {{ dbt_utils.type_float() }}) as _ab_cdc_lsn, - cast(_ab_cdc_updated_at as {{ dbt_utils.type_float() }}) as _ab_cdc_updated_at, - cast(_ab_cdc_deleted_at as {{ dbt_utils.type_float() }}) as _ab_cdc_deleted_at, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('dedup_cdc_excluded_ab1') }} --- dedup_cdc_excluded -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql deleted file mode 100644 index 5009554c3391c..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql +++ /dev/null @@ -1,25 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} -select - {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as {{ adapter.quote('id') }}, - {{ json_extract_scalar('_airbyte_data', ['currency'], ['currency']) }} as currency, - {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as {{ adapter.quote('date') }}, - {{ json_extract_scalar('_airbyte_data', ['timestamp_col'], ['timestamp_col']) }} as timestamp_col, - {{ json_extract_scalar('_airbyte_data', ['HKD@spéçiäl & characters'], ['HKD@spéçiäl & characters']) }} as {{ adapter.quote('HKD@spéçiäl & characters') }}, - {{ json_extract_scalar('_airbyte_data', ['HKD_special___characters'], ['HKD_special___characters']) }} as hkd_special___characters, - {{ json_extract_scalar('_airbyte_data', ['NZD'], ['NZD']) }} as nzd, - {{ json_extract_scalar('_airbyte_data', ['USD'], ['USD']) }} as usd, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} as table_alias --- dedup_exchange_rate -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql deleted file mode 100644 index 187fc05ccc6fe..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql +++ /dev/null @@ -1,25 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: {{ ref('dedup_exchange_rate_ab1') }} -select - cast({{ adapter.quote('id') }} as {{ dbt_utils.type_bigint() }}) as {{ adapter.quote('id') }}, - cast(currency as {{ dbt_utils.type_string() }}) as currency, - cast({{ empty_string_to_null(adapter.quote('date')) }} as {{ type_date() }}) as {{ adapter.quote('date') }}, - cast({{ empty_string_to_null('timestamp_col') }} as {{ type_timestamp_with_timezone() }}) as timestamp_col, - cast({{ adapter.quote('HKD@spéçiäl & characters') }} as {{ dbt_utils.type_float() }}) as {{ adapter.quote('HKD@spéçiäl & characters') }}, - cast(hkd_special___characters as {{ dbt_utils.type_string() }}) as hkd_special___characters, - cast(nzd as {{ dbt_utils.type_float() }}) as nzd, - cast(usd as {{ dbt_utils.type_float() }}) as usd, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('dedup_exchange_rate_ab1') }} --- dedup_exchange_rate -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab1.sql deleted file mode 100644 index ca2b2520a2585..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab1.sql +++ /dev/null @@ -1,29 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ source('test_normalization', '_airbyte_raw_exchange_rate') }} -select - {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as {{ adapter.quote('id') }}, - {{ json_extract_scalar('_airbyte_data', ['currency'], ['currency']) }} as currency, - {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as {{ adapter.quote('date') }}, - {{ json_extract_scalar('_airbyte_data', ['timestamp_col'], ['timestamp_col']) }} as timestamp_col, - {{ json_extract_scalar('_airbyte_data', ['HKD@spéçiäl & characters'], ['HKD@spéçiäl & characters']) }} as {{ adapter.quote('HKD@spéçiäl & characters') }}, - {{ json_extract_scalar('_airbyte_data', ['HKD_special___characters'], ['HKD_special___characters']) }} as hkd_special___characters, - {{ json_extract_scalar('_airbyte_data', ['NZD'], ['NZD']) }} as nzd, - {{ json_extract_scalar('_airbyte_data', ['USD'], ['USD']) }} as usd, - {{ json_extract_scalar('_airbyte_data', ['column`_\'with"_quotes'], ['column___with__quotes']) }} as {{ adapter.quote('column`_\'with""_quotes') }}, - {{ json_extract_scalar('_airbyte_data', ['datetime_tz'], ['datetime_tz']) }} as datetime_tz, - {{ json_extract_scalar('_airbyte_data', ['datetime_no_tz'], ['datetime_no_tz']) }} as datetime_no_tz, - {{ json_extract_scalar('_airbyte_data', ['time_tz'], ['time_tz']) }} as time_tz, - {{ json_extract_scalar('_airbyte_data', ['time_no_tz'], ['time_no_tz']) }} as time_no_tz, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ source('test_normalization', '_airbyte_raw_exchange_rate') }} as table_alias --- exchange_rate -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab2.sql deleted file mode 100644 index 0f457acbee982..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab2.sql +++ /dev/null @@ -1,29 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: {{ ref('exchange_rate_ab1') }} -select - cast({{ adapter.quote('id') }} as {{ dbt_utils.type_bigint() }}) as {{ adapter.quote('id') }}, - cast(currency as {{ dbt_utils.type_string() }}) as currency, - cast({{ empty_string_to_null(adapter.quote('date')) }} as {{ type_date() }}) as {{ adapter.quote('date') }}, - cast({{ empty_string_to_null('timestamp_col') }} as {{ type_timestamp_with_timezone() }}) as timestamp_col, - cast({{ adapter.quote('HKD@spéçiäl & characters') }} as {{ dbt_utils.type_float() }}) as {{ adapter.quote('HKD@spéçiäl & characters') }}, - cast(hkd_special___characters as {{ dbt_utils.type_string() }}) as hkd_special___characters, - cast(nzd as {{ dbt_utils.type_float() }}) as nzd, - cast(usd as {{ dbt_utils.type_float() }}) as usd, - cast({{ adapter.quote('column`_\'with""_quotes') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('column`_\'with""_quotes') }}, - cast({{ empty_string_to_null('datetime_tz') }} as {{ type_timestamp_with_timezone() }}) as datetime_tz, - cast({{ empty_string_to_null('datetime_no_tz') }} as {{ type_timestamp_without_timezone() }}) as datetime_no_tz, - cast({{ empty_string_to_null('time_tz') }} as {{ type_time_with_timezone() }}) as time_tz, - cast({{ empty_string_to_null('time_no_tz') }} as {{ type_time_without_timezone() }}) as time_no_tz, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('exchange_rate_ab1') }} --- exchange_rate -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab3.sql deleted file mode 100644 index 789086fe147aa..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab3.sql +++ /dev/null @@ -1,29 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to build a hash column based on the values of this record --- depends_on: {{ ref('exchange_rate_ab2') }} -select - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - 'currency', - adapter.quote('date'), - 'timestamp_col', - adapter.quote('HKD@spéçiäl & characters'), - 'hkd_special___characters', - 'nzd', - 'usd', - adapter.quote('column`_\'with""_quotes'), - 'datetime_tz', - 'datetime_no_tz', - 'time_tz', - 'time_no_tz', - ]) }} as _airbyte_exchange_rate_hashid, - tmp.* -from {{ ref('exchange_rate_ab2') }} tmp --- exchange_rate -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/multiple_column_names_conflicts_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/multiple_column_names_conflicts_ab1.sql deleted file mode 100644 index 3444e2fe46f97..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/multiple_column_names_conflicts_ab1.sql +++ /dev/null @@ -1,24 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ source('test_normalization', '_airbyte_raw_multiple_column_names_conflicts') }} -select - {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as {{ adapter.quote('id') }}, - {{ json_extract_scalar('_airbyte_data', ['User Id'], ['User Id']) }} as {{ adapter.quote('User Id') }}, - {{ json_extract_scalar('_airbyte_data', ['user_id'], ['user_id']) }} as user_id, - {{ json_extract_scalar('_airbyte_data', ['User id'], ['User id']) }} as {{ adapter.quote('User id') }}, - {{ json_extract_scalar('_airbyte_data', ['user id'], ['user id']) }} as {{ adapter.quote('user id') }}, - {{ json_extract_scalar('_airbyte_data', ['User@Id'], ['User@Id']) }} as {{ adapter.quote('User@Id') }}, - {{ json_extract_scalar('_airbyte_data', ['UserId'], ['UserId']) }} as userid, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ source('test_normalization', '_airbyte_raw_multiple_column_names_conflicts') }} as table_alias --- multiple_column_names_conflicts -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/multiple_column_names_conflicts_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/multiple_column_names_conflicts_ab2.sql deleted file mode 100644 index 263d011d1bdeb..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/multiple_column_names_conflicts_ab2.sql +++ /dev/null @@ -1,24 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: {{ ref('multiple_column_names_conflicts_ab1') }} -select - cast({{ adapter.quote('id') }} as {{ dbt_utils.type_bigint() }}) as {{ adapter.quote('id') }}, - cast({{ adapter.quote('User Id') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('User Id') }}, - cast(user_id as {{ dbt_utils.type_float() }}) as user_id, - cast({{ adapter.quote('User id') }} as {{ dbt_utils.type_float() }}) as {{ adapter.quote('User id') }}, - cast({{ adapter.quote('user id') }} as {{ dbt_utils.type_float() }}) as {{ adapter.quote('user id') }}, - cast({{ adapter.quote('User@Id') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('User@Id') }}, - cast(userid as {{ dbt_utils.type_float() }}) as userid, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('multiple_column_names_conflicts_ab1') }} --- multiple_column_names_conflicts -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab1.sql deleted file mode 100644 index ee8f1538acb46..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab1.sql +++ /dev/null @@ -1,23 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ source('test_normalization', '_airbyte_raw_pos_dedup_cdcx') }} -select - {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as {{ adapter.quote('id') }}, - {{ json_extract_scalar('_airbyte_data', ['name'], ['name']) }} as {{ adapter.quote('name') }}, - {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_lsn'], ['_ab_cdc_lsn']) }} as _ab_cdc_lsn, - {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_updated_at'], ['_ab_cdc_updated_at']) }} as _ab_cdc_updated_at, - {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_deleted_at'], ['_ab_cdc_deleted_at']) }} as _ab_cdc_deleted_at, - {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_log_pos'], ['_ab_cdc_log_pos']) }} as _ab_cdc_log_pos, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ source('test_normalization', '_airbyte_raw_pos_dedup_cdcx') }} as table_alias --- pos_dedup_cdcx -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab2.sql deleted file mode 100644 index 96c252758b6d4..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab2.sql +++ /dev/null @@ -1,23 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: {{ ref('pos_dedup_cdcx_ab1') }} -select - cast({{ adapter.quote('id') }} as {{ dbt_utils.type_bigint() }}) as {{ adapter.quote('id') }}, - cast({{ adapter.quote('name') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('name') }}, - cast(_ab_cdc_lsn as {{ dbt_utils.type_float() }}) as _ab_cdc_lsn, - cast(_ab_cdc_updated_at as {{ dbt_utils.type_float() }}) as _ab_cdc_updated_at, - cast(_ab_cdc_deleted_at as {{ dbt_utils.type_float() }}) as _ab_cdc_deleted_at, - cast(_ab_cdc_log_pos as {{ dbt_utils.type_float() }}) as _ab_cdc_log_pos, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('pos_dedup_cdcx_ab1') }} --- pos_dedup_cdcx -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql deleted file mode 100644 index fbe40aebf3c7c..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql +++ /dev/null @@ -1,19 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} -select - {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as {{ adapter.quote('id') }}, - {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_updated_at'], ['_ab_cdc_updated_at']) }} as _ab_cdc_updated_at, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} as table_alias --- renamed_dedup_cdc_excluded -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql deleted file mode 100644 index f0b99802de8b2..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql +++ /dev/null @@ -1,19 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: {{ ref('renamed_dedup_cdc_excluded_ab1') }} -select - cast({{ adapter.quote('id') }} as {{ dbt_utils.type_bigint() }}) as {{ adapter.quote('id') }}, - cast(_ab_cdc_updated_at as {{ dbt_utils.type_float() }}) as _ab_cdc_updated_at, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('renamed_dedup_cdc_excluded_ab1') }} --- renamed_dedup_cdc_excluded -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/types_testing_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/types_testing_ab1.sql deleted file mode 100644 index 2fca430a9c393..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/types_testing_ab1.sql +++ /dev/null @@ -1,20 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ source('test_normalization', '_airbyte_raw_types_testing') }} -select - {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as {{ adapter.quote('id') }}, - {{ json_extract_scalar('_airbyte_data', ['airbyte_integer_column'], ['airbyte_integer_column']) }} as airbyte_integer_column, - {{ json_extract_scalar('_airbyte_data', ['nullable_airbyte_integer_column'], ['nullable_airbyte_integer_column']) }} as nullable_airbyte_integer_column, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ source('test_normalization', '_airbyte_raw_types_testing') }} as table_alias --- types_testing -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/types_testing_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/types_testing_ab2.sql deleted file mode 100644 index da93832f7f778..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/types_testing_ab2.sql +++ /dev/null @@ -1,20 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: {{ ref('types_testing_ab1') }} -select - cast({{ adapter.quote('id') }} as {{ dbt_utils.type_bigint() }}) as {{ adapter.quote('id') }}, - cast(airbyte_integer_column as {{ dbt_utils.type_bigint() }}) as airbyte_integer_column, - cast(nullable_airbyte_integer_column as {{ dbt_utils.type_bigint() }}) as nullable_airbyte_integer_column, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('types_testing_ab1') }} --- types_testing -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql deleted file mode 100644 index 01e0c49d1c7c4..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql +++ /dev/null @@ -1,163 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_active_row','_airbyte_unique_key_scd','_airbyte_emitted_at'],'type': 'btree'}], - unique_key = "_airbyte_unique_key_scd", - schema = "test_normalization", - post_hook = [" - {% - set final_table_relation = adapter.get_relation( - database=this.database, - schema=this.schema, - identifier='1_prefix_startwith_number' - ) - %} - {# - If the final table doesn't exist, then obviously we can't delete anything from it. - Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) - So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) - #} - {% - if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') - %} - -- Delete records which are no longer active: - -- This query is equivalent, but the left join version is more performant: - -- delete from final_table where unique_key in ( - -- select unique_key from scd_table where 1 = 1 - -- ) and unique_key not in ( - -- select unique_key from scd_table where active_row = 1 - -- ) - -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD - -- entries that were _updated_ recently. This is because a deleted record will have an SCD record - -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. - delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select recent_records.unique_key - from ( - select distinct _airbyte_unique_key as unique_key - from {{ this }} - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('1_prefix_startwith_number')) }} - ) recent_records - left join ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('1_prefix_startwith_number')) }} - group by _airbyte_unique_key - ) active_counts - on recent_records.unique_key = active_counts.unique_key - where active_count is null or active_count = 0 - ) - {% else %} - -- We have to have a non-empty query, so just do a noop delete - delete from {{ this }} where 1=0 - {% endif %} - ","delete from _airbyte_test_normalization.{{ adapter.quote('1_prefix_startwith_number_stg') }} where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.{{ adapter.quote('1_prefix_startwith_number_stg') }})"], - tags = [ "top-level" ] -) }} --- depends_on: ref('1_prefix_startwith_number_stg') -with -{% if is_incremental() %} -new_data as ( - -- retrieve incremental "new" data - select - * - from {{ ref('1_prefix_startwith_number_stg') }} - -- 1_prefix_startwith_number from {{ source('test_normalization', '_airbyte_raw_1_prefix_startwith_number') }} - where 1 = 1 - {{ incremental_clause('_airbyte_emitted_at', this) }} -), -new_data_ids as ( - -- build a subset of _airbyte_unique_key from rows that are new - select distinct - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - ]) }} as _airbyte_unique_key - from new_data -), -empty_new_data as ( - -- build an empty table to only keep the table's column types - select * from new_data where 1 = 0 -), -previous_active_scd_data as ( - -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes - select - {{ star_intersect(ref('1_prefix_startwith_number_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} - from {{ this }} as this_data - -- make a join with new_data using primary key to filter active data that need to be updated only - join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key - -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) - left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id - where _airbyte_active_row = 1 -), -input_data as ( - select {{ dbt_utils.star(ref('1_prefix_startwith_number_stg')) }} from new_data - union all - select {{ dbt_utils.star(ref('1_prefix_startwith_number_stg')) }} from previous_active_scd_data -), -{% else %} -input_data as ( - select * - from {{ ref('1_prefix_startwith_number_stg') }} - -- 1_prefix_startwith_number from {{ source('test_normalization', '_airbyte_raw_1_prefix_startwith_number') }} -), -{% endif %} -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - ]) }} as _airbyte_unique_key, - {{ adapter.quote('id') }}, - {{ adapter.quote('date') }}, - {{ adapter.quote('text') }}, - {{ adapter.quote('date') }} as _airbyte_start_at, - lag({{ adapter.quote('date') }}) over ( - partition by {{ adapter.quote('id') }} - order by - {{ adapter.quote('date') }} is null asc, - {{ adapter.quote('date') }} desc, - _airbyte_emitted_at desc - ) as _airbyte_end_at, - case when row_number() over ( - partition by {{ adapter.quote('id') }} - order by - {{ adapter.quote('date') }} is null asc, - {{ adapter.quote('date') }} desc, - _airbyte_emitted_at desc - ) = 1 then 1 else 0 end as _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_1_prefix_startwith_number_hashid - from input_data -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by - _airbyte_unique_key, - _airbyte_start_at, - _airbyte_emitted_at - order by _airbyte_active_row desc, _airbyte_ab_id - ) as _airbyte_row_num, - {{ dbt_utils.surrogate_key([ - '_airbyte_unique_key', - '_airbyte_start_at', - '_airbyte_emitted_at' - ]) }} as _airbyte_unique_key_scd, - scd_data.* - from scd_data -) -select - _airbyte_unique_key, - _airbyte_unique_key_scd, - {{ adapter.quote('id') }}, - {{ adapter.quote('date') }}, - {{ adapter.quote('text') }}, - _airbyte_start_at, - _airbyte_end_at, - _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_1_prefix_startwith_number_hashid -from dedup_data where _airbyte_row_num = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql deleted file mode 100644 index 5affe9825e3be..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql +++ /dev/null @@ -1,169 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_active_row','_airbyte_unique_key_scd','_airbyte_emitted_at'],'type': 'btree'}], - unique_key = "_airbyte_unique_key_scd", - schema = "test_normalization", - post_hook = [" - {% - set final_table_relation = adapter.get_relation( - database=this.database, - schema=this.schema, - identifier='dedup_cdc_excluded' - ) - %} - {# - If the final table doesn't exist, then obviously we can't delete anything from it. - Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) - So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) - #} - {% - if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') - %} - -- Delete records which are no longer active: - -- This query is equivalent, but the left join version is more performant: - -- delete from final_table where unique_key in ( - -- select unique_key from scd_table where 1 = 1 - -- ) and unique_key not in ( - -- select unique_key from scd_table where active_row = 1 - -- ) - -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD - -- entries that were _updated_ recently. This is because a deleted record will have an SCD record - -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. - delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select recent_records.unique_key - from ( - select distinct _airbyte_unique_key as unique_key - from {{ this }} - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_cdc_excluded')) }} - ) recent_records - left join ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_cdc_excluded')) }} - group by _airbyte_unique_key - ) active_counts - on recent_records.unique_key = active_counts.unique_key - where active_count is null or active_count = 0 - ) - {% else %} - -- We have to have a non-empty query, so just do a noop delete - delete from {{ this }} where 1=0 - {% endif %} - ","delete from _airbyte_test_normalization.dedup_cdc_excluded_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.dedup_cdc_excluded_stg)"], - tags = [ "top-level" ] -) }} --- depends_on: ref('dedup_cdc_excluded_stg') -with -{% if is_incremental() %} -new_data as ( - -- retrieve incremental "new" data - select - * - from {{ ref('dedup_cdc_excluded_stg') }} - -- dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} - where 1 = 1 - {{ incremental_clause('_airbyte_emitted_at', this) }} -), -new_data_ids as ( - -- build a subset of _airbyte_unique_key from rows that are new - select distinct - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - ]) }} as _airbyte_unique_key - from new_data -), -empty_new_data as ( - -- build an empty table to only keep the table's column types - select * from new_data where 1 = 0 -), -previous_active_scd_data as ( - -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes - select - {{ star_intersect(ref('dedup_cdc_excluded_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} - from {{ this }} as this_data - -- make a join with new_data using primary key to filter active data that need to be updated only - join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key - -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) - left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id - where _airbyte_active_row = 1 -), -input_data as ( - select {{ dbt_utils.star(ref('dedup_cdc_excluded_stg')) }} from new_data - union all - select {{ dbt_utils.star(ref('dedup_cdc_excluded_stg')) }} from previous_active_scd_data -), -{% else %} -input_data as ( - select * - from {{ ref('dedup_cdc_excluded_stg') }} - -- dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} -), -{% endif %} -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - ]) }} as _airbyte_unique_key, - {{ adapter.quote('id') }}, - {{ adapter.quote('name') }}, - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _ab_cdc_lsn as _airbyte_start_at, - lag(_ab_cdc_lsn) over ( - partition by {{ adapter.quote('id') }} - order by - _ab_cdc_lsn is null asc, - _ab_cdc_lsn desc, - _ab_cdc_updated_at desc, - _airbyte_emitted_at desc - ) as _airbyte_end_at, - case when row_number() over ( - partition by {{ adapter.quote('id') }} - order by - _ab_cdc_lsn is null asc, - _ab_cdc_lsn desc, - _ab_cdc_updated_at desc, - _airbyte_emitted_at desc - ) = 1 and _ab_cdc_deleted_at is null then 1 else 0 end as _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_dedup_cdc_excluded_hashid - from input_data -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by - _airbyte_unique_key, - _airbyte_start_at, - _airbyte_emitted_at, cast(_ab_cdc_deleted_at as {{ dbt_utils.type_string() }}), cast(_ab_cdc_updated_at as {{ dbt_utils.type_string() }}) - order by _airbyte_active_row desc, _airbyte_ab_id - ) as _airbyte_row_num, - {{ dbt_utils.surrogate_key([ - '_airbyte_unique_key', - '_airbyte_start_at', - '_airbyte_emitted_at', '_ab_cdc_deleted_at', '_ab_cdc_updated_at' - ]) }} as _airbyte_unique_key_scd, - scd_data.* - from scd_data -) -select - _airbyte_unique_key, - _airbyte_unique_key_scd, - {{ adapter.quote('id') }}, - {{ adapter.quote('name') }}, - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _airbyte_start_at, - _airbyte_end_at, - _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_dedup_cdc_excluded_hashid -from dedup_data where _airbyte_row_num = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql deleted file mode 100644 index ef0cf7e1e95f5..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ /dev/null @@ -1,177 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_active_row','_airbyte_unique_key_scd','_airbyte_emitted_at'],'type': 'btree'}], - unique_key = "_airbyte_unique_key_scd", - schema = "test_normalization", - post_hook = [" - {% - set final_table_relation = adapter.get_relation( - database=this.database, - schema=this.schema, - identifier='dedup_exchange_rate' - ) - %} - {# - If the final table doesn't exist, then obviously we can't delete anything from it. - Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) - So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) - #} - {% - if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') - %} - -- Delete records which are no longer active: - -- This query is equivalent, but the left join version is more performant: - -- delete from final_table where unique_key in ( - -- select unique_key from scd_table where 1 = 1 - -- ) and unique_key not in ( - -- select unique_key from scd_table where active_row = 1 - -- ) - -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD - -- entries that were _updated_ recently. This is because a deleted record will have an SCD record - -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. - delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select recent_records.unique_key - from ( - select distinct _airbyte_unique_key as unique_key - from {{ this }} - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - ) recent_records - left join ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - group by _airbyte_unique_key - ) active_counts - on recent_records.unique_key = active_counts.unique_key - where active_count is null or active_count = 0 - ) - {% else %} - -- We have to have a non-empty query, so just do a noop delete - delete from {{ this }} where 1=0 - {% endif %} - ","delete from _airbyte_test_normalization.dedup_exchange_rate_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.dedup_exchange_rate_stg)"], - tags = [ "top-level" ] -) }} --- depends_on: ref('dedup_exchange_rate_stg') -with -{% if is_incremental() %} -new_data as ( - -- retrieve incremental "new" data - select - * - from {{ ref('dedup_exchange_rate_stg') }} - -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} - where 1 = 1 - {{ incremental_clause('_airbyte_emitted_at', this) }} -), -new_data_ids as ( - -- build a subset of _airbyte_unique_key from rows that are new - select distinct - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - 'currency', - 'nzd', - ]) }} as _airbyte_unique_key - from new_data -), -empty_new_data as ( - -- build an empty table to only keep the table's column types - select * from new_data where 1 = 0 -), -previous_active_scd_data as ( - -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes - select - {{ star_intersect(ref('dedup_exchange_rate_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} - from {{ this }} as this_data - -- make a join with new_data using primary key to filter active data that need to be updated only - join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key - -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) - left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id - where _airbyte_active_row = 1 -), -input_data as ( - select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from new_data - union all - select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from previous_active_scd_data -), -{% else %} -input_data as ( - select * - from {{ ref('dedup_exchange_rate_stg') }} - -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} -), -{% endif %} -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - 'currency', - 'nzd', - ]) }} as _airbyte_unique_key, - {{ adapter.quote('id') }}, - currency, - {{ adapter.quote('date') }}, - timestamp_col, - {{ adapter.quote('HKD@spéçiäl & characters') }}, - hkd_special___characters, - nzd, - usd, - {{ adapter.quote('date') }} as _airbyte_start_at, - lag({{ adapter.quote('date') }}) over ( - partition by {{ adapter.quote('id') }}, currency, cast(nzd as {{ dbt_utils.type_string() }}) - order by - {{ adapter.quote('date') }} is null asc, - {{ adapter.quote('date') }} desc, - _airbyte_emitted_at desc - ) as _airbyte_end_at, - case when row_number() over ( - partition by {{ adapter.quote('id') }}, currency, cast(nzd as {{ dbt_utils.type_string() }}) - order by - {{ adapter.quote('date') }} is null asc, - {{ adapter.quote('date') }} desc, - _airbyte_emitted_at desc - ) = 1 then 1 else 0 end as _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_dedup_exchange_rate_hashid - from input_data -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by - _airbyte_unique_key, - _airbyte_start_at, - _airbyte_emitted_at - order by _airbyte_active_row desc, _airbyte_ab_id - ) as _airbyte_row_num, - {{ dbt_utils.surrogate_key([ - '_airbyte_unique_key', - '_airbyte_start_at', - '_airbyte_emitted_at' - ]) }} as _airbyte_unique_key_scd, - scd_data.* - from scd_data -) -select - _airbyte_unique_key, - _airbyte_unique_key_scd, - {{ adapter.quote('id') }}, - currency, - {{ adapter.quote('date') }}, - timestamp_col, - {{ adapter.quote('HKD@spéçiäl & characters') }}, - hkd_special___characters, - nzd, - usd, - _airbyte_start_at, - _airbyte_end_at, - _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_dedup_exchange_rate_hashid -from dedup_data where _airbyte_row_num = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql deleted file mode 100644 index 77d393c856892..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql +++ /dev/null @@ -1,171 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_active_row','_airbyte_unique_key_scd','_airbyte_emitted_at'],'type': 'btree'}], - unique_key = "_airbyte_unique_key_scd", - schema = "test_normalization", - post_hook = [" - {% - set final_table_relation = adapter.get_relation( - database=this.database, - schema=this.schema, - identifier='multiple_column_names_conflicts' - ) - %} - {# - If the final table doesn't exist, then obviously we can't delete anything from it. - Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) - So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) - #} - {% - if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') - %} - -- Delete records which are no longer active: - -- This query is equivalent, but the left join version is more performant: - -- delete from final_table where unique_key in ( - -- select unique_key from scd_table where 1 = 1 - -- ) and unique_key not in ( - -- select unique_key from scd_table where active_row = 1 - -- ) - -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD - -- entries that were _updated_ recently. This is because a deleted record will have an SCD record - -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. - delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select recent_records.unique_key - from ( - select distinct _airbyte_unique_key as unique_key - from {{ this }} - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('multiple_column_names_conflicts')) }} - ) recent_records - left join ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('multiple_column_names_conflicts')) }} - group by _airbyte_unique_key - ) active_counts - on recent_records.unique_key = active_counts.unique_key - where active_count is null or active_count = 0 - ) - {% else %} - -- We have to have a non-empty query, so just do a noop delete - delete from {{ this }} where 1=0 - {% endif %} - ","delete from _airbyte_test_normalization.multiple_column_names_conflicts_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.multiple_column_names_conflicts_stg)"], - tags = [ "top-level" ] -) }} --- depends_on: ref('multiple_column_names_conflicts_stg') -with -{% if is_incremental() %} -new_data as ( - -- retrieve incremental "new" data - select - * - from {{ ref('multiple_column_names_conflicts_stg') }} - -- multiple_column_names_conflicts from {{ source('test_normalization', '_airbyte_raw_multiple_column_names_conflicts') }} - where 1 = 1 - {{ incremental_clause('_airbyte_emitted_at', this) }} -), -new_data_ids as ( - -- build a subset of _airbyte_unique_key from rows that are new - select distinct - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - ]) }} as _airbyte_unique_key - from new_data -), -empty_new_data as ( - -- build an empty table to only keep the table's column types - select * from new_data where 1 = 0 -), -previous_active_scd_data as ( - -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes - select - {{ star_intersect(ref('multiple_column_names_conflicts_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} - from {{ this }} as this_data - -- make a join with new_data using primary key to filter active data that need to be updated only - join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key - -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) - left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id - where _airbyte_active_row = 1 -), -input_data as ( - select {{ dbt_utils.star(ref('multiple_column_names_conflicts_stg')) }} from new_data - union all - select {{ dbt_utils.star(ref('multiple_column_names_conflicts_stg')) }} from previous_active_scd_data -), -{% else %} -input_data as ( - select * - from {{ ref('multiple_column_names_conflicts_stg') }} - -- multiple_column_names_conflicts from {{ source('test_normalization', '_airbyte_raw_multiple_column_names_conflicts') }} -), -{% endif %} -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - ]) }} as _airbyte_unique_key, - {{ adapter.quote('id') }}, - {{ adapter.quote('User Id') }}, - user_id, - {{ adapter.quote('User id') }}, - {{ adapter.quote('user id') }}, - {{ adapter.quote('User@Id') }}, - userid, - _airbyte_emitted_at as _airbyte_start_at, - lag(_airbyte_emitted_at) over ( - partition by {{ adapter.quote('id') }} - order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, - _airbyte_emitted_at desc - ) as _airbyte_end_at, - case when row_number() over ( - partition by {{ adapter.quote('id') }} - order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, - _airbyte_emitted_at desc - ) = 1 then 1 else 0 end as _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_multiple_co__ames_conflicts_hashid - from input_data -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by - _airbyte_unique_key, - _airbyte_start_at, - _airbyte_emitted_at - order by _airbyte_active_row desc, _airbyte_ab_id - ) as _airbyte_row_num, - {{ dbt_utils.surrogate_key([ - '_airbyte_unique_key', - '_airbyte_start_at', - '_airbyte_emitted_at' - ]) }} as _airbyte_unique_key_scd, - scd_data.* - from scd_data -) -select - _airbyte_unique_key, - _airbyte_unique_key_scd, - {{ adapter.quote('id') }}, - {{ adapter.quote('User Id') }}, - user_id, - {{ adapter.quote('User id') }}, - {{ adapter.quote('user id') }}, - {{ adapter.quote('User@Id') }}, - userid, - _airbyte_start_at, - _airbyte_end_at, - _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_multiple_co__ames_conflicts_hashid -from dedup_data where _airbyte_row_num = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql deleted file mode 100644 index ff471c6abaab1..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql +++ /dev/null @@ -1,173 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_active_row','_airbyte_unique_key_scd','_airbyte_emitted_at'],'type': 'btree'}], - unique_key = "_airbyte_unique_key_scd", - schema = "test_normalization", - post_hook = [" - {% - set final_table_relation = adapter.get_relation( - database=this.database, - schema=this.schema, - identifier='pos_dedup_cdcx' - ) - %} - {# - If the final table doesn't exist, then obviously we can't delete anything from it. - Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) - So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) - #} - {% - if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') - %} - -- Delete records which are no longer active: - -- This query is equivalent, but the left join version is more performant: - -- delete from final_table where unique_key in ( - -- select unique_key from scd_table where 1 = 1 - -- ) and unique_key not in ( - -- select unique_key from scd_table where active_row = 1 - -- ) - -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD - -- entries that were _updated_ recently. This is because a deleted record will have an SCD record - -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. - delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select recent_records.unique_key - from ( - select distinct _airbyte_unique_key as unique_key - from {{ this }} - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('pos_dedup_cdcx')) }} - ) recent_records - left join ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('pos_dedup_cdcx')) }} - group by _airbyte_unique_key - ) active_counts - on recent_records.unique_key = active_counts.unique_key - where active_count is null or active_count = 0 - ) - {% else %} - -- We have to have a non-empty query, so just do a noop delete - delete from {{ this }} where 1=0 - {% endif %} - ","delete from _airbyte_test_normalization.pos_dedup_cdcx_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.pos_dedup_cdcx_stg)"], - tags = [ "top-level" ] -) }} --- depends_on: ref('pos_dedup_cdcx_stg') -with -{% if is_incremental() %} -new_data as ( - -- retrieve incremental "new" data - select - * - from {{ ref('pos_dedup_cdcx_stg') }} - -- pos_dedup_cdcx from {{ source('test_normalization', '_airbyte_raw_pos_dedup_cdcx') }} - where 1 = 1 - {{ incremental_clause('_airbyte_emitted_at', this) }} -), -new_data_ids as ( - -- build a subset of _airbyte_unique_key from rows that are new - select distinct - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - ]) }} as _airbyte_unique_key - from new_data -), -empty_new_data as ( - -- build an empty table to only keep the table's column types - select * from new_data where 1 = 0 -), -previous_active_scd_data as ( - -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes - select - {{ star_intersect(ref('pos_dedup_cdcx_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} - from {{ this }} as this_data - -- make a join with new_data using primary key to filter active data that need to be updated only - join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key - -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) - left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id - where _airbyte_active_row = 1 -), -input_data as ( - select {{ dbt_utils.star(ref('pos_dedup_cdcx_stg')) }} from new_data - union all - select {{ dbt_utils.star(ref('pos_dedup_cdcx_stg')) }} from previous_active_scd_data -), -{% else %} -input_data as ( - select * - from {{ ref('pos_dedup_cdcx_stg') }} - -- pos_dedup_cdcx from {{ source('test_normalization', '_airbyte_raw_pos_dedup_cdcx') }} -), -{% endif %} -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - ]) }} as _airbyte_unique_key, - {{ adapter.quote('id') }}, - {{ adapter.quote('name') }}, - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _ab_cdc_log_pos, - _ab_cdc_updated_at as _airbyte_start_at, - lag(_ab_cdc_updated_at) over ( - partition by {{ adapter.quote('id') }} - order by - _ab_cdc_updated_at is null asc, - _ab_cdc_updated_at desc, - _ab_cdc_updated_at desc, - _ab_cdc_log_pos desc, - _airbyte_emitted_at desc - ) as _airbyte_end_at, - case when row_number() over ( - partition by {{ adapter.quote('id') }} - order by - _ab_cdc_updated_at is null asc, - _ab_cdc_updated_at desc, - _ab_cdc_updated_at desc, - _ab_cdc_log_pos desc, - _airbyte_emitted_at desc - ) = 1 and _ab_cdc_deleted_at is null then 1 else 0 end as _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_pos_dedup_cdcx_hashid - from input_data -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by - _airbyte_unique_key, - _airbyte_start_at, - _airbyte_emitted_at, cast(_ab_cdc_deleted_at as {{ dbt_utils.type_string() }}), cast(_ab_cdc_updated_at as {{ dbt_utils.type_string() }}), cast(_ab_cdc_log_pos as {{ dbt_utils.type_string() }}) - order by _airbyte_active_row desc, _airbyte_ab_id - ) as _airbyte_row_num, - {{ dbt_utils.surrogate_key([ - '_airbyte_unique_key', - '_airbyte_start_at', - '_airbyte_emitted_at', '_ab_cdc_deleted_at', '_ab_cdc_updated_at', '_ab_cdc_log_pos' - ]) }} as _airbyte_unique_key_scd, - scd_data.* - from scd_data -) -select - _airbyte_unique_key, - _airbyte_unique_key_scd, - {{ adapter.quote('id') }}, - {{ adapter.quote('name') }}, - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _ab_cdc_log_pos, - _airbyte_start_at, - _airbyte_end_at, - _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_pos_dedup_cdcx_hashid -from dedup_data where _airbyte_row_num = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql deleted file mode 100644 index d8da713c68711..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql +++ /dev/null @@ -1,161 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_active_row','_airbyte_unique_key_scd','_airbyte_emitted_at'],'type': 'btree'}], - unique_key = "_airbyte_unique_key_scd", - schema = "test_normalization", - post_hook = [" - {% - set final_table_relation = adapter.get_relation( - database=this.database, - schema=this.schema, - identifier='renamed_dedup_cdc_excluded' - ) - %} - {# - If the final table doesn't exist, then obviously we can't delete anything from it. - Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) - So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) - #} - {% - if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') - %} - -- Delete records which are no longer active: - -- This query is equivalent, but the left join version is more performant: - -- delete from final_table where unique_key in ( - -- select unique_key from scd_table where 1 = 1 - -- ) and unique_key not in ( - -- select unique_key from scd_table where active_row = 1 - -- ) - -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD - -- entries that were _updated_ recently. This is because a deleted record will have an SCD record - -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. - delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select recent_records.unique_key - from ( - select distinct _airbyte_unique_key as unique_key - from {{ this }} - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('renamed_dedup_cdc_excluded')) }} - ) recent_records - left join ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('renamed_dedup_cdc_excluded')) }} - group by _airbyte_unique_key - ) active_counts - on recent_records.unique_key = active_counts.unique_key - where active_count is null or active_count = 0 - ) - {% else %} - -- We have to have a non-empty query, so just do a noop delete - delete from {{ this }} where 1=0 - {% endif %} - ","delete from _airbyte_test_normalization.renamed_dedup_cdc_excluded_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.renamed_dedup_cdc_excluded_stg)"], - tags = [ "top-level" ] -) }} --- depends_on: ref('renamed_dedup_cdc_excluded_stg') -with -{% if is_incremental() %} -new_data as ( - -- retrieve incremental "new" data - select - * - from {{ ref('renamed_dedup_cdc_excluded_stg') }} - -- renamed_dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} - where 1 = 1 - {{ incremental_clause('_airbyte_emitted_at', this) }} -), -new_data_ids as ( - -- build a subset of _airbyte_unique_key from rows that are new - select distinct - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - ]) }} as _airbyte_unique_key - from new_data -), -empty_new_data as ( - -- build an empty table to only keep the table's column types - select * from new_data where 1 = 0 -), -previous_active_scd_data as ( - -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes - select - {{ star_intersect(ref('renamed_dedup_cdc_excluded_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} - from {{ this }} as this_data - -- make a join with new_data using primary key to filter active data that need to be updated only - join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key - -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) - left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id - where _airbyte_active_row = 1 -), -input_data as ( - select {{ dbt_utils.star(ref('renamed_dedup_cdc_excluded_stg')) }} from new_data - union all - select {{ dbt_utils.star(ref('renamed_dedup_cdc_excluded_stg')) }} from previous_active_scd_data -), -{% else %} -input_data as ( - select * - from {{ ref('renamed_dedup_cdc_excluded_stg') }} - -- renamed_dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} -), -{% endif %} -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - ]) }} as _airbyte_unique_key, - {{ adapter.quote('id') }}, - _ab_cdc_updated_at, - _ab_cdc_updated_at as _airbyte_start_at, - lag(_ab_cdc_updated_at) over ( - partition by {{ adapter.quote('id') }} - order by - _ab_cdc_updated_at is null asc, - _ab_cdc_updated_at desc, - _airbyte_emitted_at desc - ) as _airbyte_end_at, - case when row_number() over ( - partition by {{ adapter.quote('id') }} - order by - _ab_cdc_updated_at is null asc, - _ab_cdc_updated_at desc, - _airbyte_emitted_at desc - ) = 1 then 1 else 0 end as _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_renamed_dedup_cdc_excluded_hashid - from input_data -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by - _airbyte_unique_key, - _airbyte_start_at, - _airbyte_emitted_at - order by _airbyte_active_row desc, _airbyte_ab_id - ) as _airbyte_row_num, - {{ dbt_utils.surrogate_key([ - '_airbyte_unique_key', - '_airbyte_start_at', - '_airbyte_emitted_at' - ]) }} as _airbyte_unique_key_scd, - scd_data.* - from scd_data -) -select - _airbyte_unique_key, - _airbyte_unique_key_scd, - {{ adapter.quote('id') }}, - _ab_cdc_updated_at, - _airbyte_start_at, - _airbyte_end_at, - _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_renamed_dedup_cdc_excluded_hashid -from dedup_data where _airbyte_row_num = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/types_testing_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/types_testing_scd.sql deleted file mode 100644 index 0a0b409c90b72..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/types_testing_scd.sql +++ /dev/null @@ -1,163 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_active_row','_airbyte_unique_key_scd','_airbyte_emitted_at'],'type': 'btree'}], - unique_key = "_airbyte_unique_key_scd", - schema = "test_normalization", - post_hook = [" - {% - set final_table_relation = adapter.get_relation( - database=this.database, - schema=this.schema, - identifier='types_testing' - ) - %} - {# - If the final table doesn't exist, then obviously we can't delete anything from it. - Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) - So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) - #} - {% - if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') - %} - -- Delete records which are no longer active: - -- This query is equivalent, but the left join version is more performant: - -- delete from final_table where unique_key in ( - -- select unique_key from scd_table where 1 = 1 - -- ) and unique_key not in ( - -- select unique_key from scd_table where active_row = 1 - -- ) - -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD - -- entries that were _updated_ recently. This is because a deleted record will have an SCD record - -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. - delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select recent_records.unique_key - from ( - select distinct _airbyte_unique_key as unique_key - from {{ this }} - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('types_testing')) }} - ) recent_records - left join ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('types_testing')) }} - group by _airbyte_unique_key - ) active_counts - on recent_records.unique_key = active_counts.unique_key - where active_count is null or active_count = 0 - ) - {% else %} - -- We have to have a non-empty query, so just do a noop delete - delete from {{ this }} where 1=0 - {% endif %} - ","delete from _airbyte_test_normalization.types_testing_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.types_testing_stg)"], - tags = [ "top-level" ] -) }} --- depends_on: ref('types_testing_stg') -with -{% if is_incremental() %} -new_data as ( - -- retrieve incremental "new" data - select - * - from {{ ref('types_testing_stg') }} - -- types_testing from {{ source('test_normalization', '_airbyte_raw_types_testing') }} - where 1 = 1 - {{ incremental_clause('_airbyte_emitted_at', this) }} -), -new_data_ids as ( - -- build a subset of _airbyte_unique_key from rows that are new - select distinct - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - ]) }} as _airbyte_unique_key - from new_data -), -empty_new_data as ( - -- build an empty table to only keep the table's column types - select * from new_data where 1 = 0 -), -previous_active_scd_data as ( - -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes - select - {{ star_intersect(ref('types_testing_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} - from {{ this }} as this_data - -- make a join with new_data using primary key to filter active data that need to be updated only - join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key - -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) - left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id - where _airbyte_active_row = 1 -), -input_data as ( - select {{ dbt_utils.star(ref('types_testing_stg')) }} from new_data - union all - select {{ dbt_utils.star(ref('types_testing_stg')) }} from previous_active_scd_data -), -{% else %} -input_data as ( - select * - from {{ ref('types_testing_stg') }} - -- types_testing from {{ source('test_normalization', '_airbyte_raw_types_testing') }} -), -{% endif %} -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - ]) }} as _airbyte_unique_key, - {{ adapter.quote('id') }}, - airbyte_integer_column, - nullable_airbyte_integer_column, - _airbyte_emitted_at as _airbyte_start_at, - lag(_airbyte_emitted_at) over ( - partition by {{ adapter.quote('id') }} - order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, - _airbyte_emitted_at desc - ) as _airbyte_end_at, - case when row_number() over ( - partition by {{ adapter.quote('id') }} - order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, - _airbyte_emitted_at desc - ) = 1 then 1 else 0 end as _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_types_testing_hashid - from input_data -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by - _airbyte_unique_key, - _airbyte_start_at, - _airbyte_emitted_at - order by _airbyte_active_row desc, _airbyte_ab_id - ) as _airbyte_row_num, - {{ dbt_utils.surrogate_key([ - '_airbyte_unique_key', - '_airbyte_start_at', - '_airbyte_emitted_at' - ]) }} as _airbyte_unique_key_scd, - scd_data.* - from scd_data -) -select - _airbyte_unique_key, - _airbyte_unique_key_scd, - {{ adapter.quote('id') }}, - airbyte_integer_column, - nullable_airbyte_integer_column, - _airbyte_start_at, - _airbyte_end_at, - _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_types_testing_hashid -from dedup_data where _airbyte_row_num = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/1_prefix_startwith_number.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/1_prefix_startwith_number.sql deleted file mode 100644 index f3ea9897b65a4..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/1_prefix_startwith_number.sql +++ /dev/null @@ -1,23 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_unique_key'],'unique':True}], - unique_key = "_airbyte_unique_key", - schema = "test_normalization", - tags = [ "top-level" ] -) }} --- Final base SQL model --- depends_on: {{ ref('1_prefix_startwith_number_scd') }} -select - _airbyte_unique_key, - {{ adapter.quote('id') }}, - {{ adapter.quote('date') }}, - {{ adapter.quote('text') }}, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_1_prefix_startwith_number_hashid -from {{ ref('1_prefix_startwith_number_scd') }} --- 1_prefix_startwith_number from {{ source('test_normalization', '_airbyte_raw_1_prefix_startwith_number') }} -where 1 = 1 -and _airbyte_active_row = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/1_prefix_startwith_number_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/1_prefix_startwith_number_stg.sql deleted file mode 100644 index c387201c974c8..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/1_prefix_startwith_number_stg.sql +++ /dev/null @@ -1,20 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to build a hash column based on the values of this record --- depends_on: {{ ref('1_prefix_startwith_number_ab2') }} -select - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - adapter.quote('date'), - adapter.quote('text'), - ]) }} as _airbyte_1_prefix_startwith_number_hashid, - tmp.* -from {{ ref('1_prefix_startwith_number_ab2') }} tmp --- 1_prefix_startwith_number -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql deleted file mode 100644 index 32d70c680aa9d..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql +++ /dev/null @@ -1,25 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_unique_key'],'unique':True}], - unique_key = "_airbyte_unique_key", - schema = "test_normalization", - tags = [ "top-level" ] -) }} --- Final base SQL model --- depends_on: {{ ref('dedup_cdc_excluded_scd') }} -select - _airbyte_unique_key, - {{ adapter.quote('id') }}, - {{ adapter.quote('name') }}, - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_dedup_cdc_excluded_hashid -from {{ ref('dedup_cdc_excluded_scd') }} --- dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} -where 1 = 1 -and _airbyte_active_row = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded_stg.sql deleted file mode 100644 index b0cd4bf7cb134..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded_stg.sql +++ /dev/null @@ -1,22 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to build a hash column based on the values of this record --- depends_on: {{ ref('dedup_cdc_excluded_ab2') }} -select - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - adapter.quote('name'), - '_ab_cdc_lsn', - '_ab_cdc_updated_at', - '_ab_cdc_deleted_at', - ]) }} as _airbyte_dedup_cdc_excluded_hashid, - tmp.* -from {{ ref('dedup_cdc_excluded_ab2') }} tmp --- dedup_cdc_excluded -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql deleted file mode 100644 index 42f7540dc6b9f..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql +++ /dev/null @@ -1,28 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_unique_key'],'unique':True}], - unique_key = "_airbyte_unique_key", - schema = "test_normalization", - tags = [ "top-level" ] -) }} --- Final base SQL model --- depends_on: {{ ref('dedup_exchange_rate_scd') }} -select - _airbyte_unique_key, - {{ adapter.quote('id') }}, - currency, - {{ adapter.quote('date') }}, - timestamp_col, - {{ adapter.quote('HKD@spéçiäl & characters') }}, - hkd_special___characters, - nzd, - usd, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_dedup_exchange_rate_hashid -from {{ ref('dedup_exchange_rate_scd') }} --- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} -where 1 = 1 -and _airbyte_active_row = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql deleted file mode 100644 index f892feed3fe7d..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql +++ /dev/null @@ -1,25 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to build a hash column based on the values of this record --- depends_on: {{ ref('dedup_exchange_rate_ab2') }} -select - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - 'currency', - adapter.quote('date'), - 'timestamp_col', - adapter.quote('HKD@spéçiäl & characters'), - 'hkd_special___characters', - 'nzd', - 'usd', - ]) }} as _airbyte_dedup_exchange_rate_hashid, - tmp.* -from {{ ref('dedup_exchange_rate_ab2') }} tmp --- dedup_exchange_rate -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/multiple_column_names_conflicts.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/multiple_column_names_conflicts.sql deleted file mode 100644 index 3451ce406b4d2..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/multiple_column_names_conflicts.sql +++ /dev/null @@ -1,27 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_unique_key'],'unique':True}], - unique_key = "_airbyte_unique_key", - schema = "test_normalization", - tags = [ "top-level" ] -) }} --- Final base SQL model --- depends_on: {{ ref('multiple_column_names_conflicts_scd') }} -select - _airbyte_unique_key, - {{ adapter.quote('id') }}, - {{ adapter.quote('User Id') }}, - user_id, - {{ adapter.quote('User id') }}, - {{ adapter.quote('user id') }}, - {{ adapter.quote('User@Id') }}, - userid, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_multiple_co__ames_conflicts_hashid -from {{ ref('multiple_column_names_conflicts_scd') }} --- multiple_column_names_conflicts from {{ source('test_normalization', '_airbyte_raw_multiple_column_names_conflicts') }} -where 1 = 1 -and _airbyte_active_row = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/multiple_column_names_conflicts_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/multiple_column_names_conflicts_stg.sql deleted file mode 100644 index c549b49128a62..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/multiple_column_names_conflicts_stg.sql +++ /dev/null @@ -1,24 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to build a hash column based on the values of this record --- depends_on: {{ ref('multiple_column_names_conflicts_ab2') }} -select - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - adapter.quote('User Id'), - 'user_id', - adapter.quote('User id'), - adapter.quote('user id'), - adapter.quote('User@Id'), - 'userid', - ]) }} as _airbyte_multiple_co__ames_conflicts_hashid, - tmp.* -from {{ ref('multiple_column_names_conflicts_ab2') }} tmp --- multiple_column_names_conflicts -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/pos_dedup_cdcx.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/pos_dedup_cdcx.sql deleted file mode 100644 index 57ddb1908b9d6..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/pos_dedup_cdcx.sql +++ /dev/null @@ -1,26 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_unique_key'],'unique':True}], - unique_key = "_airbyte_unique_key", - schema = "test_normalization", - tags = [ "top-level" ] -) }} --- Final base SQL model --- depends_on: {{ ref('pos_dedup_cdcx_scd') }} -select - _airbyte_unique_key, - {{ adapter.quote('id') }}, - {{ adapter.quote('name') }}, - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _ab_cdc_log_pos, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_pos_dedup_cdcx_hashid -from {{ ref('pos_dedup_cdcx_scd') }} --- pos_dedup_cdcx from {{ source('test_normalization', '_airbyte_raw_pos_dedup_cdcx') }} -where 1 = 1 -and _airbyte_active_row = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/pos_dedup_cdcx_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/pos_dedup_cdcx_stg.sql deleted file mode 100644 index 692867ceaf4ed..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/pos_dedup_cdcx_stg.sql +++ /dev/null @@ -1,23 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to build a hash column based on the values of this record --- depends_on: {{ ref('pos_dedup_cdcx_ab2') }} -select - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - adapter.quote('name'), - '_ab_cdc_lsn', - '_ab_cdc_updated_at', - '_ab_cdc_deleted_at', - '_ab_cdc_log_pos', - ]) }} as _airbyte_pos_dedup_cdcx_hashid, - tmp.* -from {{ ref('pos_dedup_cdcx_ab2') }} tmp --- pos_dedup_cdcx -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql deleted file mode 100644 index 603af9d4f80c3..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql +++ /dev/null @@ -1,22 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_unique_key'],'unique':True}], - unique_key = "_airbyte_unique_key", - schema = "test_normalization", - tags = [ "top-level" ] -) }} --- Final base SQL model --- depends_on: {{ ref('renamed_dedup_cdc_excluded_scd') }} -select - _airbyte_unique_key, - {{ adapter.quote('id') }}, - _ab_cdc_updated_at, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_renamed_dedup_cdc_excluded_hashid -from {{ ref('renamed_dedup_cdc_excluded_scd') }} --- renamed_dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} -where 1 = 1 -and _airbyte_active_row = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql deleted file mode 100644 index 96371bb4931a9..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql +++ /dev/null @@ -1,19 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to build a hash column based on the values of this record --- depends_on: {{ ref('renamed_dedup_cdc_excluded_ab2') }} -select - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - '_ab_cdc_updated_at', - ]) }} as _airbyte_renamed_dedup_cdc_excluded_hashid, - tmp.* -from {{ ref('renamed_dedup_cdc_excluded_ab2') }} tmp --- renamed_dedup_cdc_excluded -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/types_testing.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/types_testing.sql deleted file mode 100644 index 8f979379656dc..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/types_testing.sql +++ /dev/null @@ -1,23 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_unique_key'],'unique':True}], - unique_key = "_airbyte_unique_key", - schema = "test_normalization", - tags = [ "top-level" ] -) }} --- Final base SQL model --- depends_on: {{ ref('types_testing_scd') }} -select - _airbyte_unique_key, - {{ adapter.quote('id') }}, - airbyte_integer_column, - nullable_airbyte_integer_column, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_types_testing_hashid -from {{ ref('types_testing_scd') }} --- types_testing from {{ source('test_normalization', '_airbyte_raw_types_testing') }} -where 1 = 1 -and _airbyte_active_row = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/types_testing_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/types_testing_stg.sql deleted file mode 100644 index 3eabf9e4ae69a..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/types_testing_stg.sql +++ /dev/null @@ -1,20 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to build a hash column based on the values of this record --- depends_on: {{ ref('types_testing_ab2') }} -select - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - 'airbyte_integer_column', - 'nullable_airbyte_integer_column', - ]) }} as _airbyte_types_testing_hashid, - tmp.* -from {{ ref('types_testing_ab2') }} tmp --- types_testing -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql deleted file mode 100644 index 72e4956780448..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql +++ /dev/null @@ -1,30 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "test_normalization", - tags = [ "top-level" ] -) }} --- Final base SQL model --- depends_on: {{ ref('exchange_rate_ab3') }} -select - {{ adapter.quote('id') }}, - currency, - {{ adapter.quote('date') }}, - timestamp_col, - {{ adapter.quote('HKD@spéçiäl & characters') }}, - hkd_special___characters, - nzd, - usd, - {{ adapter.quote('column`_\'with""_quotes') }}, - datetime_tz, - datetime_no_tz, - time_tz, - time_no_tz, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_exchange_rate_hashid -from {{ ref('exchange_rate_ab3') }} --- exchange_rate from {{ source('test_normalization', '_airbyte_raw_exchange_rate') }} -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/sources.yml deleted file mode 100644 index f51802427655e..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/sources.yml +++ /dev/null @@ -1,16 +0,0 @@ -version: 2 -sources: -- name: test_normalization - quoting: - database: true - schema: false - identifier: false - tables: - - name: _airbyte_raw_1_prefix_startwith_number - - name: _airbyte_raw_dedup_cdc_excluded - - name: _airbyte_raw_dedup_exchange_rate - - name: _airbyte_raw_exchange_rate - - name: _airbyte_raw_multiple_column_names_conflicts - - name: _airbyte_raw_pos_dedup_cdcx - - name: _airbyte_raw_renamed_dedup_cdc_excluded - - name: _airbyte_raw_types_testing diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab1.sql deleted file mode 100644 index 99a03831a8ba8..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab1.sql +++ /dev/null @@ -1,22 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} -select - {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as {{ adapter.quote('id') }}, - {{ json_extract_scalar('_airbyte_data', ['name'], ['name']) }} as {{ adapter.quote('name') }}, - {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_lsn'], ['_ab_cdc_lsn']) }} as _ab_cdc_lsn, - {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_updated_at'], ['_ab_cdc_updated_at']) }} as _ab_cdc_updated_at, - {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_deleted_at'], ['_ab_cdc_deleted_at']) }} as _ab_cdc_deleted_at, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} as table_alias --- dedup_cdc_excluded -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab2.sql deleted file mode 100644 index 3d8803e27a664..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab2.sql +++ /dev/null @@ -1,22 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: {{ ref('dedup_cdc_excluded_ab1') }} -select - cast({{ adapter.quote('id') }} as {{ dbt_utils.type_bigint() }}) as {{ adapter.quote('id') }}, - cast({{ adapter.quote('name') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('name') }}, - cast(_ab_cdc_lsn as {{ dbt_utils.type_float() }}) as _ab_cdc_lsn, - cast(_ab_cdc_updated_at as {{ dbt_utils.type_float() }}) as _ab_cdc_updated_at, - cast(_ab_cdc_deleted_at as {{ dbt_utils.type_float() }}) as _ab_cdc_deleted_at, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('dedup_cdc_excluded_ab1') }} --- dedup_cdc_excluded -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql deleted file mode 100644 index 8dd3aff00d2cd..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql +++ /dev/null @@ -1,25 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} -select - {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as {{ adapter.quote('id') }}, - {{ json_extract_scalar('_airbyte_data', ['currency'], ['currency']) }} as currency, - {{ json_extract_scalar('_airbyte_data', ['new_column'], ['new_column']) }} as new_column, - {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as {{ adapter.quote('date') }}, - {{ json_extract_scalar('_airbyte_data', ['timestamp_col'], ['timestamp_col']) }} as timestamp_col, - {{ json_extract_scalar('_airbyte_data', ['HKD@spéçiäl & characters'], ['HKD@spéçiäl & characters']) }} as {{ adapter.quote('HKD@spéçiäl & characters') }}, - {{ json_extract_scalar('_airbyte_data', ['NZD'], ['NZD']) }} as nzd, - {{ json_extract_scalar('_airbyte_data', ['USD'], ['USD']) }} as usd, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} as table_alias --- dedup_exchange_rate -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql deleted file mode 100644 index b5e700b36aa6a..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql +++ /dev/null @@ -1,25 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: {{ ref('dedup_exchange_rate_ab1') }} -select - cast({{ adapter.quote('id') }} as {{ dbt_utils.type_float() }}) as {{ adapter.quote('id') }}, - cast(currency as {{ dbt_utils.type_string() }}) as currency, - cast(new_column as {{ dbt_utils.type_float() }}) as new_column, - cast({{ empty_string_to_null(adapter.quote('date')) }} as {{ type_date() }}) as {{ adapter.quote('date') }}, - cast({{ empty_string_to_null('timestamp_col') }} as {{ type_timestamp_with_timezone() }}) as timestamp_col, - cast({{ adapter.quote('HKD@spéçiäl & characters') }} as {{ dbt_utils.type_float() }}) as {{ adapter.quote('HKD@spéçiäl & characters') }}, - cast(nzd as {{ dbt_utils.type_float() }}) as nzd, - cast(usd as {{ dbt_utils.type_bigint() }}) as usd, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('dedup_exchange_rate_ab1') }} --- dedup_exchange_rate -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/exchange_rate_ab1.sql deleted file mode 100644 index ba88ffa22b0d9..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/exchange_rate_ab1.sql +++ /dev/null @@ -1,25 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ source('test_normalization', '_airbyte_raw_exchange_rate') }} -select - {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as {{ adapter.quote('id') }}, - {{ json_extract_scalar('_airbyte_data', ['currency'], ['currency']) }} as currency, - {{ json_extract_scalar('_airbyte_data', ['new_column'], ['new_column']) }} as new_column, - {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as {{ adapter.quote('date') }}, - {{ json_extract_scalar('_airbyte_data', ['timestamp_col'], ['timestamp_col']) }} as timestamp_col, - {{ json_extract_scalar('_airbyte_data', ['HKD@spéçiäl & characters'], ['HKD@spéçiäl & characters']) }} as {{ adapter.quote('HKD@spéçiäl & characters') }}, - {{ json_extract_scalar('_airbyte_data', ['NZD'], ['NZD']) }} as nzd, - {{ json_extract_scalar('_airbyte_data', ['USD'], ['USD']) }} as usd, - {{ json_extract_scalar('_airbyte_data', ['column`_\'with"_quotes'], ['column___with__quotes']) }} as {{ adapter.quote('column`_\'with""_quotes') }}, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ source('test_normalization', '_airbyte_raw_exchange_rate') }} as table_alias --- exchange_rate -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/exchange_rate_ab2.sql deleted file mode 100644 index e6cf7ee1e5760..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/exchange_rate_ab2.sql +++ /dev/null @@ -1,25 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: {{ ref('exchange_rate_ab1') }} -select - cast({{ adapter.quote('id') }} as {{ dbt_utils.type_float() }}) as {{ adapter.quote('id') }}, - cast(currency as {{ dbt_utils.type_string() }}) as currency, - cast(new_column as {{ dbt_utils.type_float() }}) as new_column, - cast({{ empty_string_to_null(adapter.quote('date')) }} as {{ type_date() }}) as {{ adapter.quote('date') }}, - cast({{ empty_string_to_null('timestamp_col') }} as {{ type_timestamp_with_timezone() }}) as timestamp_col, - cast({{ adapter.quote('HKD@spéçiäl & characters') }} as {{ dbt_utils.type_float() }}) as {{ adapter.quote('HKD@spéçiäl & characters') }}, - cast(nzd as {{ dbt_utils.type_float() }}) as nzd, - cast(usd as {{ dbt_utils.type_float() }}) as usd, - cast({{ adapter.quote('column`_\'with""_quotes') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('column`_\'with""_quotes') }}, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('exchange_rate_ab1') }} --- exchange_rate -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/exchange_rate_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/exchange_rate_ab3.sql deleted file mode 100644 index 96c96a4d4799c..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/exchange_rate_ab3.sql +++ /dev/null @@ -1,25 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to build a hash column based on the values of this record --- depends_on: {{ ref('exchange_rate_ab2') }} -select - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - 'currency', - 'new_column', - adapter.quote('date'), - 'timestamp_col', - adapter.quote('HKD@spéçiäl & characters'), - 'nzd', - 'usd', - adapter.quote('column`_\'with""_quotes'), - ]) }} as _airbyte_exchange_rate_hashid, - tmp.* -from {{ ref('exchange_rate_ab2') }} tmp --- exchange_rate -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql deleted file mode 100644 index dfa39c2a71eb7..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql +++ /dev/null @@ -1,22 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} -select - {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as {{ adapter.quote('id') }}, - {{ json_extract_scalar('_airbyte_data', ['name'], ['name']) }} as {{ adapter.quote('name') }}, - {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_lsn'], ['_ab_cdc_lsn']) }} as _ab_cdc_lsn, - {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_updated_at'], ['_ab_cdc_updated_at']) }} as _ab_cdc_updated_at, - {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_deleted_at'], ['_ab_cdc_deleted_at']) }} as _ab_cdc_deleted_at, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} as table_alias --- renamed_dedup_cdc_excluded -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql deleted file mode 100644 index 72f80140e0076..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql +++ /dev/null @@ -1,22 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: {{ ref('renamed_dedup_cdc_excluded_ab1') }} -select - cast({{ adapter.quote('id') }} as {{ dbt_utils.type_bigint() }}) as {{ adapter.quote('id') }}, - cast({{ adapter.quote('name') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('name') }}, - cast(_ab_cdc_lsn as {{ dbt_utils.type_float() }}) as _ab_cdc_lsn, - cast(_ab_cdc_updated_at as {{ dbt_utils.type_float() }}) as _ab_cdc_updated_at, - cast(_ab_cdc_deleted_at as {{ dbt_utils.type_float() }}) as _ab_cdc_deleted_at, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('renamed_dedup_cdc_excluded_ab1') }} --- renamed_dedup_cdc_excluded -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql deleted file mode 100644 index 5affe9825e3be..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql +++ /dev/null @@ -1,169 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_active_row','_airbyte_unique_key_scd','_airbyte_emitted_at'],'type': 'btree'}], - unique_key = "_airbyte_unique_key_scd", - schema = "test_normalization", - post_hook = [" - {% - set final_table_relation = adapter.get_relation( - database=this.database, - schema=this.schema, - identifier='dedup_cdc_excluded' - ) - %} - {# - If the final table doesn't exist, then obviously we can't delete anything from it. - Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) - So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) - #} - {% - if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') - %} - -- Delete records which are no longer active: - -- This query is equivalent, but the left join version is more performant: - -- delete from final_table where unique_key in ( - -- select unique_key from scd_table where 1 = 1 - -- ) and unique_key not in ( - -- select unique_key from scd_table where active_row = 1 - -- ) - -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD - -- entries that were _updated_ recently. This is because a deleted record will have an SCD record - -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. - delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select recent_records.unique_key - from ( - select distinct _airbyte_unique_key as unique_key - from {{ this }} - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_cdc_excluded')) }} - ) recent_records - left join ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_cdc_excluded')) }} - group by _airbyte_unique_key - ) active_counts - on recent_records.unique_key = active_counts.unique_key - where active_count is null or active_count = 0 - ) - {% else %} - -- We have to have a non-empty query, so just do a noop delete - delete from {{ this }} where 1=0 - {% endif %} - ","delete from _airbyte_test_normalization.dedup_cdc_excluded_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.dedup_cdc_excluded_stg)"], - tags = [ "top-level" ] -) }} --- depends_on: ref('dedup_cdc_excluded_stg') -with -{% if is_incremental() %} -new_data as ( - -- retrieve incremental "new" data - select - * - from {{ ref('dedup_cdc_excluded_stg') }} - -- dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} - where 1 = 1 - {{ incremental_clause('_airbyte_emitted_at', this) }} -), -new_data_ids as ( - -- build a subset of _airbyte_unique_key from rows that are new - select distinct - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - ]) }} as _airbyte_unique_key - from new_data -), -empty_new_data as ( - -- build an empty table to only keep the table's column types - select * from new_data where 1 = 0 -), -previous_active_scd_data as ( - -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes - select - {{ star_intersect(ref('dedup_cdc_excluded_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} - from {{ this }} as this_data - -- make a join with new_data using primary key to filter active data that need to be updated only - join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key - -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) - left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id - where _airbyte_active_row = 1 -), -input_data as ( - select {{ dbt_utils.star(ref('dedup_cdc_excluded_stg')) }} from new_data - union all - select {{ dbt_utils.star(ref('dedup_cdc_excluded_stg')) }} from previous_active_scd_data -), -{% else %} -input_data as ( - select * - from {{ ref('dedup_cdc_excluded_stg') }} - -- dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} -), -{% endif %} -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - ]) }} as _airbyte_unique_key, - {{ adapter.quote('id') }}, - {{ adapter.quote('name') }}, - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _ab_cdc_lsn as _airbyte_start_at, - lag(_ab_cdc_lsn) over ( - partition by {{ adapter.quote('id') }} - order by - _ab_cdc_lsn is null asc, - _ab_cdc_lsn desc, - _ab_cdc_updated_at desc, - _airbyte_emitted_at desc - ) as _airbyte_end_at, - case when row_number() over ( - partition by {{ adapter.quote('id') }} - order by - _ab_cdc_lsn is null asc, - _ab_cdc_lsn desc, - _ab_cdc_updated_at desc, - _airbyte_emitted_at desc - ) = 1 and _ab_cdc_deleted_at is null then 1 else 0 end as _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_dedup_cdc_excluded_hashid - from input_data -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by - _airbyte_unique_key, - _airbyte_start_at, - _airbyte_emitted_at, cast(_ab_cdc_deleted_at as {{ dbt_utils.type_string() }}), cast(_ab_cdc_updated_at as {{ dbt_utils.type_string() }}) - order by _airbyte_active_row desc, _airbyte_ab_id - ) as _airbyte_row_num, - {{ dbt_utils.surrogate_key([ - '_airbyte_unique_key', - '_airbyte_start_at', - '_airbyte_emitted_at', '_ab_cdc_deleted_at', '_ab_cdc_updated_at' - ]) }} as _airbyte_unique_key_scd, - scd_data.* - from scd_data -) -select - _airbyte_unique_key, - _airbyte_unique_key_scd, - {{ adapter.quote('id') }}, - {{ adapter.quote('name') }}, - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _airbyte_start_at, - _airbyte_end_at, - _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_dedup_cdc_excluded_hashid -from dedup_data where _airbyte_row_num = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql deleted file mode 100644 index 7e6225fb7cfc4..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ /dev/null @@ -1,177 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_active_row','_airbyte_unique_key_scd','_airbyte_emitted_at'],'type': 'btree'}], - unique_key = "_airbyte_unique_key_scd", - schema = "test_normalization", - post_hook = [" - {% - set final_table_relation = adapter.get_relation( - database=this.database, - schema=this.schema, - identifier='dedup_exchange_rate' - ) - %} - {# - If the final table doesn't exist, then obviously we can't delete anything from it. - Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) - So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) - #} - {% - if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') - %} - -- Delete records which are no longer active: - -- This query is equivalent, but the left join version is more performant: - -- delete from final_table where unique_key in ( - -- select unique_key from scd_table where 1 = 1 - -- ) and unique_key not in ( - -- select unique_key from scd_table where active_row = 1 - -- ) - -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD - -- entries that were _updated_ recently. This is because a deleted record will have an SCD record - -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. - delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select recent_records.unique_key - from ( - select distinct _airbyte_unique_key as unique_key - from {{ this }} - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - ) recent_records - left join ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - group by _airbyte_unique_key - ) active_counts - on recent_records.unique_key = active_counts.unique_key - where active_count is null or active_count = 0 - ) - {% else %} - -- We have to have a non-empty query, so just do a noop delete - delete from {{ this }} where 1=0 - {% endif %} - ","delete from _airbyte_test_normalization.dedup_exchange_rate_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.dedup_exchange_rate_stg)"], - tags = [ "top-level" ] -) }} --- depends_on: ref('dedup_exchange_rate_stg') -with -{% if is_incremental() %} -new_data as ( - -- retrieve incremental "new" data - select - * - from {{ ref('dedup_exchange_rate_stg') }} - -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} - where 1 = 1 - {{ incremental_clause('_airbyte_emitted_at', this) }} -), -new_data_ids as ( - -- build a subset of _airbyte_unique_key from rows that are new - select distinct - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - 'currency', - 'nzd', - ]) }} as _airbyte_unique_key - from new_data -), -empty_new_data as ( - -- build an empty table to only keep the table's column types - select * from new_data where 1 = 0 -), -previous_active_scd_data as ( - -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes - select - {{ star_intersect(ref('dedup_exchange_rate_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} - from {{ this }} as this_data - -- make a join with new_data using primary key to filter active data that need to be updated only - join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key - -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) - left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id - where _airbyte_active_row = 1 -), -input_data as ( - select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from new_data - union all - select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from previous_active_scd_data -), -{% else %} -input_data as ( - select * - from {{ ref('dedup_exchange_rate_stg') }} - -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} -), -{% endif %} -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - 'currency', - 'nzd', - ]) }} as _airbyte_unique_key, - {{ adapter.quote('id') }}, - currency, - new_column, - {{ adapter.quote('date') }}, - timestamp_col, - {{ adapter.quote('HKD@spéçiäl & characters') }}, - nzd, - usd, - {{ adapter.quote('date') }} as _airbyte_start_at, - lag({{ adapter.quote('date') }}) over ( - partition by cast({{ adapter.quote('id') }} as {{ dbt_utils.type_string() }}), currency, cast(nzd as {{ dbt_utils.type_string() }}) - order by - {{ adapter.quote('date') }} is null asc, - {{ adapter.quote('date') }} desc, - _airbyte_emitted_at desc - ) as _airbyte_end_at, - case when row_number() over ( - partition by cast({{ adapter.quote('id') }} as {{ dbt_utils.type_string() }}), currency, cast(nzd as {{ dbt_utils.type_string() }}) - order by - {{ adapter.quote('date') }} is null asc, - {{ adapter.quote('date') }} desc, - _airbyte_emitted_at desc - ) = 1 then 1 else 0 end as _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_dedup_exchange_rate_hashid - from input_data -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by - _airbyte_unique_key, - _airbyte_start_at, - _airbyte_emitted_at - order by _airbyte_active_row desc, _airbyte_ab_id - ) as _airbyte_row_num, - {{ dbt_utils.surrogate_key([ - '_airbyte_unique_key', - '_airbyte_start_at', - '_airbyte_emitted_at' - ]) }} as _airbyte_unique_key_scd, - scd_data.* - from scd_data -) -select - _airbyte_unique_key, - _airbyte_unique_key_scd, - {{ adapter.quote('id') }}, - currency, - new_column, - {{ adapter.quote('date') }}, - timestamp_col, - {{ adapter.quote('HKD@spéçiäl & characters') }}, - nzd, - usd, - _airbyte_start_at, - _airbyte_end_at, - _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_dedup_exchange_rate_hashid -from dedup_data where _airbyte_row_num = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql deleted file mode 100644 index 96f720b3d2659..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql +++ /dev/null @@ -1,169 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_active_row','_airbyte_unique_key_scd','_airbyte_emitted_at'],'type': 'btree'}], - unique_key = "_airbyte_unique_key_scd", - schema = "test_normalization", - post_hook = [" - {% - set final_table_relation = adapter.get_relation( - database=this.database, - schema=this.schema, - identifier='renamed_dedup_cdc_excluded' - ) - %} - {# - If the final table doesn't exist, then obviously we can't delete anything from it. - Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) - So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) - #} - {% - if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') - %} - -- Delete records which are no longer active: - -- This query is equivalent, but the left join version is more performant: - -- delete from final_table where unique_key in ( - -- select unique_key from scd_table where 1 = 1 - -- ) and unique_key not in ( - -- select unique_key from scd_table where active_row = 1 - -- ) - -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD - -- entries that were _updated_ recently. This is because a deleted record will have an SCD record - -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. - delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select recent_records.unique_key - from ( - select distinct _airbyte_unique_key as unique_key - from {{ this }} - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('renamed_dedup_cdc_excluded')) }} - ) recent_records - left join ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('renamed_dedup_cdc_excluded')) }} - group by _airbyte_unique_key - ) active_counts - on recent_records.unique_key = active_counts.unique_key - where active_count is null or active_count = 0 - ) - {% else %} - -- We have to have a non-empty query, so just do a noop delete - delete from {{ this }} where 1=0 - {% endif %} - ","delete from _airbyte_test_normalization.renamed_dedup_cdc_excluded_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.renamed_dedup_cdc_excluded_stg)"], - tags = [ "top-level" ] -) }} --- depends_on: ref('renamed_dedup_cdc_excluded_stg') -with -{% if is_incremental() %} -new_data as ( - -- retrieve incremental "new" data - select - * - from {{ ref('renamed_dedup_cdc_excluded_stg') }} - -- renamed_dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} - where 1 = 1 - {{ incremental_clause('_airbyte_emitted_at', this) }} -), -new_data_ids as ( - -- build a subset of _airbyte_unique_key from rows that are new - select distinct - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - ]) }} as _airbyte_unique_key - from new_data -), -empty_new_data as ( - -- build an empty table to only keep the table's column types - select * from new_data where 1 = 0 -), -previous_active_scd_data as ( - -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes - select - {{ star_intersect(ref('renamed_dedup_cdc_excluded_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} - from {{ this }} as this_data - -- make a join with new_data using primary key to filter active data that need to be updated only - join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key - -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) - left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id - where _airbyte_active_row = 1 -), -input_data as ( - select {{ dbt_utils.star(ref('renamed_dedup_cdc_excluded_stg')) }} from new_data - union all - select {{ dbt_utils.star(ref('renamed_dedup_cdc_excluded_stg')) }} from previous_active_scd_data -), -{% else %} -input_data as ( - select * - from {{ ref('renamed_dedup_cdc_excluded_stg') }} - -- renamed_dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} -), -{% endif %} -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - ]) }} as _airbyte_unique_key, - {{ adapter.quote('id') }}, - {{ adapter.quote('name') }}, - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _ab_cdc_updated_at as _airbyte_start_at, - lag(_ab_cdc_updated_at) over ( - partition by {{ adapter.quote('id') }} - order by - _ab_cdc_updated_at is null asc, - _ab_cdc_updated_at desc, - _ab_cdc_updated_at desc, - _airbyte_emitted_at desc - ) as _airbyte_end_at, - case when row_number() over ( - partition by {{ adapter.quote('id') }} - order by - _ab_cdc_updated_at is null asc, - _ab_cdc_updated_at desc, - _ab_cdc_updated_at desc, - _airbyte_emitted_at desc - ) = 1 and _ab_cdc_deleted_at is null then 1 else 0 end as _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_renamed_dedup_cdc_excluded_hashid - from input_data -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by - _airbyte_unique_key, - _airbyte_start_at, - _airbyte_emitted_at, cast(_ab_cdc_deleted_at as {{ dbt_utils.type_string() }}), cast(_ab_cdc_updated_at as {{ dbt_utils.type_string() }}) - order by _airbyte_active_row desc, _airbyte_ab_id - ) as _airbyte_row_num, - {{ dbt_utils.surrogate_key([ - '_airbyte_unique_key', - '_airbyte_start_at', - '_airbyte_emitted_at', '_ab_cdc_deleted_at', '_ab_cdc_updated_at' - ]) }} as _airbyte_unique_key_scd, - scd_data.* - from scd_data -) -select - _airbyte_unique_key, - _airbyte_unique_key_scd, - {{ adapter.quote('id') }}, - {{ adapter.quote('name') }}, - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _airbyte_start_at, - _airbyte_end_at, - _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_renamed_dedup_cdc_excluded_hashid -from dedup_data where _airbyte_row_num = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql deleted file mode 100644 index 32d70c680aa9d..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql +++ /dev/null @@ -1,25 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_unique_key'],'unique':True}], - unique_key = "_airbyte_unique_key", - schema = "test_normalization", - tags = [ "top-level" ] -) }} --- Final base SQL model --- depends_on: {{ ref('dedup_cdc_excluded_scd') }} -select - _airbyte_unique_key, - {{ adapter.quote('id') }}, - {{ adapter.quote('name') }}, - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_dedup_cdc_excluded_hashid -from {{ ref('dedup_cdc_excluded_scd') }} --- dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} -where 1 = 1 -and _airbyte_active_row = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded_stg.sql deleted file mode 100644 index b0cd4bf7cb134..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded_stg.sql +++ /dev/null @@ -1,22 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to build a hash column based on the values of this record --- depends_on: {{ ref('dedup_cdc_excluded_ab2') }} -select - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - adapter.quote('name'), - '_ab_cdc_lsn', - '_ab_cdc_updated_at', - '_ab_cdc_deleted_at', - ]) }} as _airbyte_dedup_cdc_excluded_hashid, - tmp.* -from {{ ref('dedup_cdc_excluded_ab2') }} tmp --- dedup_cdc_excluded -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql deleted file mode 100644 index 3e51ad4d72565..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql +++ /dev/null @@ -1,28 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_unique_key'],'unique':True}], - unique_key = "_airbyte_unique_key", - schema = "test_normalization", - tags = [ "top-level" ] -) }} --- Final base SQL model --- depends_on: {{ ref('dedup_exchange_rate_scd') }} -select - _airbyte_unique_key, - {{ adapter.quote('id') }}, - currency, - new_column, - {{ adapter.quote('date') }}, - timestamp_col, - {{ adapter.quote('HKD@spéçiäl & characters') }}, - nzd, - usd, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_dedup_exchange_rate_hashid -from {{ ref('dedup_exchange_rate_scd') }} --- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} -where 1 = 1 -and _airbyte_active_row = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql deleted file mode 100644 index 35c866ac4d364..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql +++ /dev/null @@ -1,25 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to build a hash column based on the values of this record --- depends_on: {{ ref('dedup_exchange_rate_ab2') }} -select - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - 'currency', - 'new_column', - adapter.quote('date'), - 'timestamp_col', - adapter.quote('HKD@spéçiäl & characters'), - 'nzd', - 'usd', - ]) }} as _airbyte_dedup_exchange_rate_hashid, - tmp.* -from {{ ref('dedup_exchange_rate_ab2') }} tmp --- dedup_exchange_rate -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql deleted file mode 100644 index 672118dcf045c..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql +++ /dev/null @@ -1,25 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_unique_key'],'unique':True}], - unique_key = "_airbyte_unique_key", - schema = "test_normalization", - tags = [ "top-level" ] -) }} --- Final base SQL model --- depends_on: {{ ref('renamed_dedup_cdc_excluded_scd') }} -select - _airbyte_unique_key, - {{ adapter.quote('id') }}, - {{ adapter.quote('name') }}, - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_renamed_dedup_cdc_excluded_hashid -from {{ ref('renamed_dedup_cdc_excluded_scd') }} --- renamed_dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} -where 1 = 1 -and _airbyte_active_row = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql deleted file mode 100644 index b2d5002b934a3..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql +++ /dev/null @@ -1,22 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to build a hash column based on the values of this record --- depends_on: {{ ref('renamed_dedup_cdc_excluded_ab2') }} -select - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - adapter.quote('name'), - '_ab_cdc_lsn', - '_ab_cdc_updated_at', - '_ab_cdc_deleted_at', - ]) }} as _airbyte_renamed_dedup_cdc_excluded_hashid, - tmp.* -from {{ ref('renamed_dedup_cdc_excluded_ab2') }} tmp --- renamed_dedup_cdc_excluded -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_tables/test_normalization/exchange_rate.sql deleted file mode 100644 index 40b5ffb3f87d9..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_tables/test_normalization/exchange_rate.sql +++ /dev/null @@ -1,26 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "test_normalization", - tags = [ "top-level" ] -) }} --- Final base SQL model --- depends_on: {{ ref('exchange_rate_ab3') }} -select - {{ adapter.quote('id') }}, - currency, - new_column, - {{ adapter.quote('date') }}, - timestamp_col, - {{ adapter.quote('HKD@spéçiäl & characters') }}, - nzd, - usd, - {{ adapter.quote('column`_\'with""_quotes') }}, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_exchange_rate_hashid -from {{ ref('exchange_rate_ab3') }} --- exchange_rate from {{ source('test_normalization', '_airbyte_raw_exchange_rate') }} -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/sources.yml deleted file mode 100644 index 6a5d7bdc09a16..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/sources.yml +++ /dev/null @@ -1,12 +0,0 @@ -version: 2 -sources: -- name: test_normalization - quoting: - database: true - schema: false - identifier: false - tables: - - name: _airbyte_raw_dedup_cdc_excluded - - name: _airbyte_raw_dedup_exchange_rate - - name: _airbyte_raw_exchange_rate - - name: _airbyte_raw_renamed_dedup_cdc_excluded diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql deleted file mode 100644 index 6fe661c181e0b..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql +++ /dev/null @@ -1,15 +0,0 @@ - - - delete from "postgres".test_normalization."1_prefix_startwith_number_scd" - where (_airbyte_unique_key_scd) in ( - select (_airbyte_unique_key_scd) - from "1_prefix_startwith_number_scd__dbt_tmp" - ); - - - insert into "postgres".test_normalization."1_prefix_startwith_number_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "date", "text", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_1_prefix_startwith_number_hashid") - ( - select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "date", "text", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_1_prefix_startwith_number_hashid" - from "1_prefix_startwith_number_scd__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql deleted file mode 100644 index a1fba0a6d7ff4..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql +++ /dev/null @@ -1,15 +0,0 @@ - - - delete from "postgres".test_normalization."dedup_cdc_excluded_scd" - where (_airbyte_unique_key_scd) in ( - select (_airbyte_unique_key_scd) - from "dedup_cdc_excluded_scd__dbt_tmp" - ); - - - insert into "postgres".test_normalization."dedup_cdc_excluded_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_cdc_excluded_hashid") - ( - select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_cdc_excluded_hashid" - from "dedup_cdc_excluded_scd__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql deleted file mode 100644 index 0155cd0360b1e..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ /dev/null @@ -1,15 +0,0 @@ - - - delete from "postgres".test_normalization."dedup_exchange_rate_scd" - where (_airbyte_unique_key_scd) in ( - select (_airbyte_unique_key_scd) - from "dedup_exchange_rate_scd__dbt_tmp" - ); - - - insert into "postgres".test_normalization."dedup_exchange_rate_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "hkd_special___characters", "nzd", "usd", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid") - ( - select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "hkd_special___characters", "nzd", "usd", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid" - from "dedup_exchange_rate_scd__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql deleted file mode 100644 index 76e8539124374..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql +++ /dev/null @@ -1,15 +0,0 @@ - - - delete from "postgres".test_normalization."multiple_column_names_conflicts_scd" - where (_airbyte_unique_key_scd) in ( - select (_airbyte_unique_key_scd) - from "multiple_column_names_conflicts_scd__dbt_tmp" - ); - - - insert into "postgres".test_normalization."multiple_column_names_conflicts_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "User Id", "user_id", "User id", "user id", "User@Id", "userid", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_multiple_co__ames_conflicts_hashid") - ( - select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "User Id", "user_id", "User id", "user id", "User@Id", "userid", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_multiple_co__ames_conflicts_hashid" - from "multiple_column_names_conflicts_scd__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql deleted file mode 100644 index cafd98c1c127f..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql +++ /dev/null @@ -1,15 +0,0 @@ - - - delete from "postgres".test_normalization."pos_dedup_cdcx_scd" - where (_airbyte_unique_key_scd) in ( - select (_airbyte_unique_key_scd) - from "pos_dedup_cdcx_scd__dbt_tmp" - ); - - - insert into "postgres".test_normalization."pos_dedup_cdcx_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_ab_cdc_log_pos", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_pos_dedup_cdcx_hashid") - ( - select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_ab_cdc_log_pos", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_pos_dedup_cdcx_hashid" - from "pos_dedup_cdcx_scd__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql deleted file mode 100644 index e6d1d5fd01605..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql +++ /dev/null @@ -1,15 +0,0 @@ - - - delete from "postgres".test_normalization."renamed_dedup_cdc_excluded_scd" - where (_airbyte_unique_key_scd) in ( - select (_airbyte_unique_key_scd) - from "renamed_dedup_cdc_excluded_scd__dbt_tmp" - ); - - - insert into "postgres".test_normalization."renamed_dedup_cdc_excluded_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "_ab_cdc_updated_at", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid") - ( - select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "_ab_cdc_updated_at", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid" - from "renamed_dedup_cdc_excluded_scd__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/types_testing_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/types_testing_scd.sql deleted file mode 100644 index 8388a44777b85..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/types_testing_scd.sql +++ /dev/null @@ -1,15 +0,0 @@ - - - delete from "postgres".test_normalization."types_testing_scd" - where (_airbyte_unique_key_scd) in ( - select (_airbyte_unique_key_scd) - from "types_testing_scd__dbt_tmp" - ); - - - insert into "postgres".test_normalization."types_testing_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "airbyte_integer_column", "nullable_airbyte_integer_column", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_types_testing_hashid") - ( - select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "airbyte_integer_column", "nullable_airbyte_integer_column", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_types_testing_hashid" - from "types_testing_scd__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/1_prefix_startwith_number.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/1_prefix_startwith_number.sql deleted file mode 100644 index 181af872ea063..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/1_prefix_startwith_number.sql +++ /dev/null @@ -1,15 +0,0 @@ - - - delete from "postgres".test_normalization."1_prefix_startwith_number" - where (_airbyte_unique_key) in ( - select (_airbyte_unique_key) - from "1_prefix_startwith_number__dbt_tmp" - ); - - - insert into "postgres".test_normalization."1_prefix_startwith_number" ("_airbyte_unique_key", "id", "date", "text", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_1_prefix_startwith_number_hashid") - ( - select "_airbyte_unique_key", "id", "date", "text", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_1_prefix_startwith_number_hashid" - from "1_prefix_startwith_number__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/1_prefix_startwith_number_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/1_prefix_startwith_number_stg.sql deleted file mode 100644 index d9a69c73ea41a..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/1_prefix_startwith_number_stg.sql +++ /dev/null @@ -1,15 +0,0 @@ - - - delete from "postgres"._airbyte_test_normalization."1_prefix_startwith_number_stg" - where (_airbyte_ab_id) in ( - select (_airbyte_ab_id) - from "1_prefix_startwith_number_stg__dbt_tmp" - ); - - - insert into "postgres"._airbyte_test_normalization."1_prefix_startwith_number_stg" ("_airbyte_1_prefix_startwith_number_hashid", "id", "date", "text", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at") - ( - select "_airbyte_1_prefix_startwith_number_hashid", "id", "date", "text", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at" - from "1_prefix_startwith_number_stg__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql deleted file mode 100644 index b3012059b462d..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql +++ /dev/null @@ -1,15 +0,0 @@ - - - delete from "postgres".test_normalization."dedup_cdc_excluded" - where (_airbyte_unique_key) in ( - select (_airbyte_unique_key) - from "dedup_cdc_excluded__dbt_tmp" - ); - - - insert into "postgres".test_normalization."dedup_cdc_excluded" ("_airbyte_unique_key", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_cdc_excluded_hashid") - ( - select "_airbyte_unique_key", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_cdc_excluded_hashid" - from "dedup_cdc_excluded__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_cdc_excluded_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_cdc_excluded_stg.sql deleted file mode 100644 index d9f833d441bfa..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_cdc_excluded_stg.sql +++ /dev/null @@ -1,15 +0,0 @@ - - - delete from "postgres"._airbyte_test_normalization."dedup_cdc_excluded_stg" - where (_airbyte_ab_id) in ( - select (_airbyte_ab_id) - from "dedup_cdc_excluded_stg__dbt_tmp" - ); - - - insert into "postgres"._airbyte_test_normalization."dedup_cdc_excluded_stg" ("_airbyte_dedup_cdc_excluded_hashid", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at") - ( - select "_airbyte_dedup_cdc_excluded_hashid", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at" - from "dedup_cdc_excluded_stg__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql deleted file mode 100644 index 871b95c607c94..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql +++ /dev/null @@ -1,15 +0,0 @@ - - - delete from "postgres".test_normalization."dedup_exchange_rate" - where (_airbyte_unique_key) in ( - select (_airbyte_unique_key) - from "dedup_exchange_rate__dbt_tmp" - ); - - - insert into "postgres".test_normalization."dedup_exchange_rate" ("_airbyte_unique_key", "id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "hkd_special___characters", "nzd", "usd", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid") - ( - select "_airbyte_unique_key", "id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "hkd_special___characters", "nzd", "usd", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid" - from "dedup_exchange_rate__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql deleted file mode 100644 index 1be7a088845ed..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql +++ /dev/null @@ -1,15 +0,0 @@ - - - delete from "postgres"._airbyte_test_normalization."dedup_exchange_rate_stg" - where (_airbyte_ab_id) in ( - select (_airbyte_ab_id) - from "dedup_exchange_rate_stg__dbt_tmp" - ); - - - insert into "postgres"._airbyte_test_normalization."dedup_exchange_rate_stg" ("_airbyte_dedup_exchange_rate_hashid", "id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "hkd_special___characters", "nzd", "usd", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at") - ( - select "_airbyte_dedup_exchange_rate_hashid", "id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "hkd_special___characters", "nzd", "usd", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at" - from "dedup_exchange_rate_stg__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts.sql deleted file mode 100644 index 525dc2add9077..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts.sql +++ /dev/null @@ -1,15 +0,0 @@ - - - delete from "postgres".test_normalization."multiple_column_names_conflicts" - where (_airbyte_unique_key) in ( - select (_airbyte_unique_key) - from "multiple_column_names_conflicts__dbt_tmp" - ); - - - insert into "postgres".test_normalization."multiple_column_names_conflicts" ("_airbyte_unique_key", "id", "User Id", "user_id", "User id", "user id", "User@Id", "userid", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_multiple_co__ames_conflicts_hashid") - ( - select "_airbyte_unique_key", "id", "User Id", "user_id", "User id", "user id", "User@Id", "userid", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_multiple_co__ames_conflicts_hashid" - from "multiple_column_names_conflicts__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts_stg.sql deleted file mode 100644 index 391889ecb40a9..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts_stg.sql +++ /dev/null @@ -1,15 +0,0 @@ - - - delete from "postgres"._airbyte_test_normalization."multiple_column_names_conflicts_stg" - where (_airbyte_ab_id) in ( - select (_airbyte_ab_id) - from "multiple_column_names_conflicts_stg__dbt_tmp" - ); - - - insert into "postgres"._airbyte_test_normalization."multiple_column_names_conflicts_stg" ("_airbyte_multiple_co__ames_conflicts_hashid", "id", "User Id", "user_id", "User id", "user id", "User@Id", "userid", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at") - ( - select "_airbyte_multiple_co__ames_conflicts_hashid", "id", "User Id", "user_id", "User id", "user id", "User@Id", "userid", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at" - from "multiple_column_names_conflicts_stg__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/pos_dedup_cdcx.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/pos_dedup_cdcx.sql deleted file mode 100644 index 1d618406e5c6d..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/pos_dedup_cdcx.sql +++ /dev/null @@ -1,15 +0,0 @@ - - - delete from "postgres".test_normalization."pos_dedup_cdcx" - where (_airbyte_unique_key) in ( - select (_airbyte_unique_key) - from "pos_dedup_cdcx__dbt_tmp" - ); - - - insert into "postgres".test_normalization."pos_dedup_cdcx" ("_airbyte_unique_key", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_ab_cdc_log_pos", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_pos_dedup_cdcx_hashid") - ( - select "_airbyte_unique_key", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_ab_cdc_log_pos", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_pos_dedup_cdcx_hashid" - from "pos_dedup_cdcx__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/pos_dedup_cdcx_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/pos_dedup_cdcx_stg.sql deleted file mode 100644 index c627c7bea1b0e..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/pos_dedup_cdcx_stg.sql +++ /dev/null @@ -1,15 +0,0 @@ - - - delete from "postgres"._airbyte_test_normalization."pos_dedup_cdcx_stg" - where (_airbyte_ab_id) in ( - select (_airbyte_ab_id) - from "pos_dedup_cdcx_stg__dbt_tmp" - ); - - - insert into "postgres"._airbyte_test_normalization."pos_dedup_cdcx_stg" ("_airbyte_pos_dedup_cdcx_hashid", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_ab_cdc_log_pos", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at") - ( - select "_airbyte_pos_dedup_cdcx_hashid", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_ab_cdc_log_pos", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at" - from "pos_dedup_cdcx_stg__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql deleted file mode 100644 index de66b557fa186..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql +++ /dev/null @@ -1,15 +0,0 @@ - - - delete from "postgres".test_normalization."renamed_dedup_cdc_excluded" - where (_airbyte_unique_key) in ( - select (_airbyte_unique_key) - from "renamed_dedup_cdc_excluded__dbt_tmp" - ); - - - insert into "postgres".test_normalization."renamed_dedup_cdc_excluded" ("_airbyte_unique_key", "id", "_ab_cdc_updated_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid") - ( - select "_airbyte_unique_key", "id", "_ab_cdc_updated_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid" - from "renamed_dedup_cdc_excluded__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql deleted file mode 100644 index 6711170dbc9c4..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql +++ /dev/null @@ -1,15 +0,0 @@ - - - delete from "postgres"._airbyte_test_normalization."renamed_dedup_cdc_excluded_stg" - where (_airbyte_ab_id) in ( - select (_airbyte_ab_id) - from "renamed_dedup_cdc_excluded_stg__dbt_tmp" - ); - - - insert into "postgres"._airbyte_test_normalization."renamed_dedup_cdc_excluded_stg" ("_airbyte_renamed_dedup_cdc_excluded_hashid", "id", "_ab_cdc_updated_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at") - ( - select "_airbyte_renamed_dedup_cdc_excluded_hashid", "id", "_ab_cdc_updated_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at" - from "renamed_dedup_cdc_excluded_stg__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/types_testing.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/types_testing.sql deleted file mode 100644 index f01bbf8941931..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/types_testing.sql +++ /dev/null @@ -1,15 +0,0 @@ - - - delete from "postgres".test_normalization."types_testing" - where (_airbyte_unique_key) in ( - select (_airbyte_unique_key) - from "types_testing__dbt_tmp" - ); - - - insert into "postgres".test_normalization."types_testing" ("_airbyte_unique_key", "id", "airbyte_integer_column", "nullable_airbyte_integer_column", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_types_testing_hashid") - ( - select "_airbyte_unique_key", "id", "airbyte_integer_column", "nullable_airbyte_integer_column", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_types_testing_hashid" - from "types_testing__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/types_testing_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/types_testing_stg.sql deleted file mode 100644 index 1295c519d0d7d..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/types_testing_stg.sql +++ /dev/null @@ -1,15 +0,0 @@ - - - delete from "postgres"._airbyte_test_normalization."types_testing_stg" - where (_airbyte_ab_id) in ( - select (_airbyte_ab_id) - from "types_testing_stg__dbt_tmp" - ); - - - insert into "postgres"._airbyte_test_normalization."types_testing_stg" ("_airbyte_types_testing_hashid", "id", "airbyte_integer_column", "nullable_airbyte_integer_column", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at") - ( - select "_airbyte_types_testing_hashid", "id", "airbyte_integer_column", "nullable_airbyte_integer_column", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at" - from "types_testing_stg__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql deleted file mode 100644 index 2773af0d8fa35..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql +++ /dev/null @@ -1,107 +0,0 @@ - - - create table "postgres".test_normalization."exchange_rate__dbt_tmp" - as ( - -with __dbt__cte__exchange_rate_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "postgres".test_normalization._airbyte_raw_exchange_rate -select - jsonb_extract_path_text(_airbyte_data, 'id') as "id", - jsonb_extract_path_text(_airbyte_data, 'currency') as currency, - jsonb_extract_path_text(_airbyte_data, 'date') as "date", - jsonb_extract_path_text(_airbyte_data, 'timestamp_col') as timestamp_col, - jsonb_extract_path_text(_airbyte_data, 'HKD@spéçiäl & characters') as "HKD@spéçiäl & characters", - jsonb_extract_path_text(_airbyte_data, 'HKD_special___characters') as hkd_special___characters, - jsonb_extract_path_text(_airbyte_data, 'NZD') as nzd, - jsonb_extract_path_text(_airbyte_data, 'USD') as usd, - jsonb_extract_path_text(_airbyte_data, 'column`_''with"_quotes') as "column`_'with""_quotes", - jsonb_extract_path_text(_airbyte_data, 'datetime_tz') as datetime_tz, - jsonb_extract_path_text(_airbyte_data, 'datetime_no_tz') as datetime_no_tz, - jsonb_extract_path_text(_airbyte_data, 'time_tz') as time_tz, - jsonb_extract_path_text(_airbyte_data, 'time_no_tz') as time_no_tz, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from "postgres".test_normalization._airbyte_raw_exchange_rate as table_alias --- exchange_rate -where 1 = 1 -), __dbt__cte__exchange_rate_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__exchange_rate_ab1 -select - cast("id" as - bigint -) as "id", - cast(currency as text) as currency, - cast(nullif("date", '') as - date -) as "date", - cast(nullif(timestamp_col, '') as - timestamp with time zone -) as timestamp_col, - cast("HKD@spéçiäl & characters" as - float -) as "HKD@spéçiäl & characters", - cast(hkd_special___characters as text) as hkd_special___characters, - cast(nzd as - float -) as nzd, - cast(usd as - float -) as usd, - cast("column`_'with""_quotes" as text) as "column`_'with""_quotes", - cast(nullif(datetime_tz, '') as - timestamp with time zone -) as datetime_tz, - cast(nullif(datetime_no_tz, '') as - timestamp -) as datetime_no_tz, - cast(nullif(time_tz, '') as - time with time zone -) as time_tz, - cast(nullif(time_no_tz, '') as - time -) as time_no_tz, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from __dbt__cte__exchange_rate_ab1 --- exchange_rate -where 1 = 1 -), __dbt__cte__exchange_rate_ab3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__exchange_rate_ab2 -select - md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast(currency as text), '') || '-' || coalesce(cast("date" as text), '') || '-' || coalesce(cast(timestamp_col as text), '') || '-' || coalesce(cast("HKD@spéçiäl & characters" as text), '') || '-' || coalesce(cast(hkd_special___characters as text), '') || '-' || coalesce(cast(nzd as text), '') || '-' || coalesce(cast(usd as text), '') || '-' || coalesce(cast("column`_'with""_quotes" as text), '') || '-' || coalesce(cast(datetime_tz as text), '') || '-' || coalesce(cast(datetime_no_tz as text), '') || '-' || coalesce(cast(time_tz as text), '') || '-' || coalesce(cast(time_no_tz as text), '') as text)) as _airbyte_exchange_rate_hashid, - tmp.* -from __dbt__cte__exchange_rate_ab2 tmp --- exchange_rate -where 1 = 1 -)-- Final base SQL model --- depends_on: __dbt__cte__exchange_rate_ab3 -select - "id", - currency, - "date", - timestamp_col, - "HKD@spéçiäl & characters", - hkd_special___characters, - nzd, - usd, - "column`_'with""_quotes", - datetime_tz, - datetime_no_tz, - time_tz, - time_no_tz, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_exchange_rate_hashid -from __dbt__cte__exchange_rate_ab3 --- exchange_rate from "postgres".test_normalization._airbyte_raw_exchange_rate -where 1 = 1 - ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql deleted file mode 100644 index a1fba0a6d7ff4..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql +++ /dev/null @@ -1,15 +0,0 @@ - - - delete from "postgres".test_normalization."dedup_cdc_excluded_scd" - where (_airbyte_unique_key_scd) in ( - select (_airbyte_unique_key_scd) - from "dedup_cdc_excluded_scd__dbt_tmp" - ); - - - insert into "postgres".test_normalization."dedup_cdc_excluded_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_cdc_excluded_hashid") - ( - select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_cdc_excluded_hashid" - from "dedup_cdc_excluded_scd__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql deleted file mode 100644 index 521c016411b86..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ /dev/null @@ -1,15 +0,0 @@ - - - delete from "postgres".test_normalization."dedup_exchange_rate_scd" - where (_airbyte_unique_key_scd) in ( - select (_airbyte_unique_key_scd) - from "dedup_exchange_rate_scd__dbt_tmp" - ); - - - insert into "postgres".test_normalization."dedup_exchange_rate_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "currency", "new_column", "date", "timestamp_col", "HKD@spéçiäl & characters", "nzd", "usd", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid") - ( - select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "currency", "new_column", "date", "timestamp_col", "HKD@spéçiäl & characters", "nzd", "usd", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid" - from "dedup_exchange_rate_scd__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql deleted file mode 100644 index 3a30f5175e851..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql +++ /dev/null @@ -1,15 +0,0 @@ - - - delete from "postgres".test_normalization."renamed_dedup_cdc_excluded_scd" - where (_airbyte_unique_key_scd) in ( - select (_airbyte_unique_key_scd) - from "renamed_dedup_cdc_excluded_scd__dbt_tmp" - ); - - - insert into "postgres".test_normalization."renamed_dedup_cdc_excluded_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid") - ( - select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid" - from "renamed_dedup_cdc_excluded_scd__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql deleted file mode 100644 index b3012059b462d..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql +++ /dev/null @@ -1,15 +0,0 @@ - - - delete from "postgres".test_normalization."dedup_cdc_excluded" - where (_airbyte_unique_key) in ( - select (_airbyte_unique_key) - from "dedup_cdc_excluded__dbt_tmp" - ); - - - insert into "postgres".test_normalization."dedup_cdc_excluded" ("_airbyte_unique_key", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_cdc_excluded_hashid") - ( - select "_airbyte_unique_key", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_cdc_excluded_hashid" - from "dedup_cdc_excluded__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_cdc_excluded_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_cdc_excluded_stg.sql deleted file mode 100644 index d9f833d441bfa..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_cdc_excluded_stg.sql +++ /dev/null @@ -1,15 +0,0 @@ - - - delete from "postgres"._airbyte_test_normalization."dedup_cdc_excluded_stg" - where (_airbyte_ab_id) in ( - select (_airbyte_ab_id) - from "dedup_cdc_excluded_stg__dbt_tmp" - ); - - - insert into "postgres"._airbyte_test_normalization."dedup_cdc_excluded_stg" ("_airbyte_dedup_cdc_excluded_hashid", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at") - ( - select "_airbyte_dedup_cdc_excluded_hashid", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at" - from "dedup_cdc_excluded_stg__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql deleted file mode 100644 index 9c85a59293773..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql +++ /dev/null @@ -1,15 +0,0 @@ - - - delete from "postgres".test_normalization."dedup_exchange_rate" - where (_airbyte_unique_key) in ( - select (_airbyte_unique_key) - from "dedup_exchange_rate__dbt_tmp" - ); - - - insert into "postgres".test_normalization."dedup_exchange_rate" ("_airbyte_unique_key", "id", "currency", "new_column", "date", "timestamp_col", "HKD@spéçiäl & characters", "nzd", "usd", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid") - ( - select "_airbyte_unique_key", "id", "currency", "new_column", "date", "timestamp_col", "HKD@spéçiäl & characters", "nzd", "usd", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid" - from "dedup_exchange_rate__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql deleted file mode 100644 index 1cca439173314..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql +++ /dev/null @@ -1,15 +0,0 @@ - - - delete from "postgres"._airbyte_test_normalization."dedup_exchange_rate_stg" - where (_airbyte_ab_id) in ( - select (_airbyte_ab_id) - from "dedup_exchange_rate_stg__dbt_tmp" - ); - - - insert into "postgres"._airbyte_test_normalization."dedup_exchange_rate_stg" ("_airbyte_dedup_exchange_rate_hashid", "id", "currency", "new_column", "date", "timestamp_col", "HKD@spéçiäl & characters", "nzd", "usd", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at") - ( - select "_airbyte_dedup_exchange_rate_hashid", "id", "currency", "new_column", "date", "timestamp_col", "HKD@spéçiäl & characters", "nzd", "usd", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at" - from "dedup_exchange_rate_stg__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql deleted file mode 100644 index f4ce2e8305828..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql +++ /dev/null @@ -1,15 +0,0 @@ - - - delete from "postgres".test_normalization."renamed_dedup_cdc_excluded" - where (_airbyte_unique_key) in ( - select (_airbyte_unique_key) - from "renamed_dedup_cdc_excluded__dbt_tmp" - ); - - - insert into "postgres".test_normalization."renamed_dedup_cdc_excluded" ("_airbyte_unique_key", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid") - ( - select "_airbyte_unique_key", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid" - from "renamed_dedup_cdc_excluded__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql deleted file mode 100644 index 18d5b4ab827c2..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql +++ /dev/null @@ -1,15 +0,0 @@ - - - delete from "postgres"._airbyte_test_normalization."renamed_dedup_cdc_excluded_stg" - where (_airbyte_ab_id) in ( - select (_airbyte_ab_id) - from "renamed_dedup_cdc_excluded_stg__dbt_tmp" - ); - - - insert into "postgres"._airbyte_test_normalization."renamed_dedup_cdc_excluded_stg" ("_airbyte_renamed_dedup_cdc_excluded_hashid", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at") - ( - select "_airbyte_renamed_dedup_cdc_excluded_hashid", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at" - from "renamed_dedup_cdc_excluded_stg__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_tables/test_normalization/exchange_rate.sql deleted file mode 100644 index 155df4698f2d1..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_tables/test_normalization/exchange_rate.sql +++ /dev/null @@ -1,89 +0,0 @@ - - - create table "postgres".test_normalization."exchange_rate__dbt_tmp" - as ( - -with __dbt__cte__exchange_rate_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "postgres".test_normalization._airbyte_raw_exchange_rate -select - jsonb_extract_path_text(_airbyte_data, 'id') as "id", - jsonb_extract_path_text(_airbyte_data, 'currency') as currency, - jsonb_extract_path_text(_airbyte_data, 'new_column') as new_column, - jsonb_extract_path_text(_airbyte_data, 'date') as "date", - jsonb_extract_path_text(_airbyte_data, 'timestamp_col') as timestamp_col, - jsonb_extract_path_text(_airbyte_data, 'HKD@spéçiäl & characters') as "HKD@spéçiäl & characters", - jsonb_extract_path_text(_airbyte_data, 'NZD') as nzd, - jsonb_extract_path_text(_airbyte_data, 'USD') as usd, - jsonb_extract_path_text(_airbyte_data, 'column`_''with"_quotes') as "column`_'with""_quotes", - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from "postgres".test_normalization._airbyte_raw_exchange_rate as table_alias --- exchange_rate -where 1 = 1 -), __dbt__cte__exchange_rate_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__exchange_rate_ab1 -select - cast("id" as - float -) as "id", - cast(currency as text) as currency, - cast(new_column as - float -) as new_column, - cast(nullif("date", '') as - date -) as "date", - cast(nullif(timestamp_col, '') as - timestamp with time zone -) as timestamp_col, - cast("HKD@spéçiäl & characters" as - float -) as "HKD@spéçiäl & characters", - cast(nzd as - float -) as nzd, - cast(usd as - float -) as usd, - cast("column`_'with""_quotes" as text) as "column`_'with""_quotes", - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from __dbt__cte__exchange_rate_ab1 --- exchange_rate -where 1 = 1 -), __dbt__cte__exchange_rate_ab3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__exchange_rate_ab2 -select - md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast(currency as text), '') || '-' || coalesce(cast(new_column as text), '') || '-' || coalesce(cast("date" as text), '') || '-' || coalesce(cast(timestamp_col as text), '') || '-' || coalesce(cast("HKD@spéçiäl & characters" as text), '') || '-' || coalesce(cast(nzd as text), '') || '-' || coalesce(cast(usd as text), '') || '-' || coalesce(cast("column`_'with""_quotes" as text), '') as text)) as _airbyte_exchange_rate_hashid, - tmp.* -from __dbt__cte__exchange_rate_ab2 tmp --- exchange_rate -where 1 = 1 -)-- Final base SQL model --- depends_on: __dbt__cte__exchange_rate_ab3 -select - "id", - currency, - new_column, - "date", - timestamp_col, - "HKD@spéçiäl & characters", - nzd, - usd, - "column`_'with""_quotes", - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_exchange_rate_hashid -from __dbt__cte__exchange_rate_ab3 --- exchange_rate from "postgres".test_normalization._airbyte_raw_exchange_rate -where 1 = 1 - ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/dbt_project.yml deleted file mode 100755 index 767544968e0b7..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/dbt_project.yml +++ /dev/null @@ -1,127 +0,0 @@ -name: airbyte_utils -version: '1.0' -config-version: 2 -profile: normalize -model-paths: -- models -docs-paths: -- docs -analysis-paths: -- analysis -test-paths: -- tests -seed-paths: -- data -macro-paths: -- macros -target-path: ../build -log-path: ../logs -packages-install-path: /dbt -clean-targets: -- build -- dbt_modules -quoting: - database: true - schema: false - identifier: true -models: - +transient: false - +pre-hook: SET enable_case_sensitive_identifier to TRUE - airbyte_utils: - +materialized: table - generated: - airbyte_ctes: - +tags: airbyte_internal_cte - +materialized: ephemeral - airbyte_incremental: - +tags: incremental_tables - +materialized: incremental - +on_schema_change: sync_all_columns - airbyte_tables: - +tags: normalized_tables - +materialized: table - airbyte_views: - +tags: airbyte_internal_views - +materialized: view -dispatch: -- macro_namespace: dbt_utils - search_order: - - airbyte_utils - - dbt_utils -vars: - json_column: _airbyte_data - models_to_source: - nested_stream_with_complex_columns_resulting_into_long_names_ab1: test_normalization_xjvlg._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_complex_columns_resulting_into_long_names_ab2: test_normalization_xjvlg._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_complex_columns_resulting_into_long_names_stg: test_normalization_xjvlg._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_complex_columns_resulting_into_long_names_scd: test_normalization_xjvlg._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_complex_columns_resulting_into_long_names: test_normalization_xjvlg._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - non_nested_stream_without_namespace_resulting_into_long_names_ab1: test_normalization_xjvlg._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names - non_nested_stream_without_namespace_resulting_into_long_names_ab2: test_normalization_xjvlg._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names - non_nested_stream_without_namespace_resulting_into_long_names_ab3: test_normalization_xjvlg._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names - non_nested_stream_without_namespace_resulting_into_long_names: test_normalization_xjvlg._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names - some_stream_that_was_empty_ab1: test_normalization_xjvlg._airbyte_raw_some_stream_that_was_empty - some_stream_that_was_empty_ab2: test_normalization_xjvlg._airbyte_raw_some_stream_that_was_empty - some_stream_that_was_empty_stg: test_normalization_xjvlg._airbyte_raw_some_stream_that_was_empty - some_stream_that_was_empty_scd: test_normalization_xjvlg._airbyte_raw_some_stream_that_was_empty - some_stream_that_was_empty: test_normalization_xjvlg._airbyte_raw_some_stream_that_was_empty - simple_stream_with_namespace_resulting_into_long_names_ab1: test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names - simple_stream_with_namespace_resulting_into_long_names_ab2: test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names - simple_stream_with_namespace_resulting_into_long_names_ab3: test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names - simple_stream_with_namespace_resulting_into_long_names: test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names - conflict_stream_name_ab1: test_normalization_xjvlg._airbyte_raw_conflict_stream_name - conflict_stream_name_ab2: test_normalization_xjvlg._airbyte_raw_conflict_stream_name - conflict_stream_name_ab3: test_normalization_xjvlg._airbyte_raw_conflict_stream_name - conflict_stream_name: test_normalization_xjvlg._airbyte_raw_conflict_stream_name - conflict_stream_scalar_ab1: test_normalization_xjvlg._airbyte_raw_conflict_stream_scalar - conflict_stream_scalar_ab2: test_normalization_xjvlg._airbyte_raw_conflict_stream_scalar - conflict_stream_scalar_ab3: test_normalization_xjvlg._airbyte_raw_conflict_stream_scalar - conflict_stream_scalar: test_normalization_xjvlg._airbyte_raw_conflict_stream_scalar - conflict_stream_array_ab1: test_normalization_xjvlg._airbyte_raw_conflict_stream_array - conflict_stream_array_ab2: test_normalization_xjvlg._airbyte_raw_conflict_stream_array - conflict_stream_array_ab3: test_normalization_xjvlg._airbyte_raw_conflict_stream_array - conflict_stream_array: test_normalization_xjvlg._airbyte_raw_conflict_stream_array - unnest_alias_ab1: test_normalization_xjvlg._airbyte_raw_unnest_alias - unnest_alias_ab2: test_normalization_xjvlg._airbyte_raw_unnest_alias - unnest_alias_ab3: test_normalization_xjvlg._airbyte_raw_unnest_alias - unnest_alias: test_normalization_xjvlg._airbyte_raw_unnest_alias - arrays_ab1: test_normalization_xjvlg._airbyte_raw_arrays - arrays_ab2: test_normalization_xjvlg._airbyte_raw_arrays - arrays_ab3: test_normalization_xjvlg._airbyte_raw_arrays - arrays: test_normalization_xjvlg._airbyte_raw_arrays - nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1: test_normalization_xjvlg._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_complex_columns_resulting_into_long_names_partition_ab2: test_normalization_xjvlg._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_complex_columns_resulting_into_long_names_partition_ab3: test_normalization_xjvlg._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_complex_columns_resulting_into_long_names_partition: test_normalization_xjvlg._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - conflict_stream_name_conflict_stream_name_ab1: test_normalization_xjvlg._airbyte_raw_conflict_stream_name - conflict_stream_name_conflict_stream_name_ab2: test_normalization_xjvlg._airbyte_raw_conflict_stream_name - conflict_stream_name_conflict_stream_name_ab3: test_normalization_xjvlg._airbyte_raw_conflict_stream_name - conflict_stream_name_conflict_stream_name: test_normalization_xjvlg._airbyte_raw_conflict_stream_name - unnest_alias_children_ab1: test_normalization_xjvlg._airbyte_raw_unnest_alias - unnest_alias_children_ab2: test_normalization_xjvlg._airbyte_raw_unnest_alias - unnest_alias_children_ab3: test_normalization_xjvlg._airbyte_raw_unnest_alias - unnest_alias_children: test_normalization_xjvlg._airbyte_raw_unnest_alias - arrays_nested_array_parent_ab1: test_normalization_xjvlg._airbyte_raw_arrays - arrays_nested_array_parent_ab2: test_normalization_xjvlg._airbyte_raw_arrays - arrays_nested_array_parent_ab3: test_normalization_xjvlg._airbyte_raw_arrays - arrays_nested_array_parent: test_normalization_xjvlg._airbyte_raw_arrays - nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1: test_normalization_xjvlg._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab2: test_normalization_xjvlg._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab3: test_normalization_xjvlg._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data: test_normalization_xjvlg._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab1: test_normalization_xjvlg._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab2: test_normalization_xjvlg._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab3: test_normalization_xjvlg._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_complex_columns_resulting_into_long_names_partition_data: test_normalization_xjvlg._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - conflict_stream_name_conflict_stream_name_conflict_stream_name_ab1: test_normalization_xjvlg._airbyte_raw_conflict_stream_name - conflict_stream_name_conflict_stream_name_conflict_stream_name_ab2: test_normalization_xjvlg._airbyte_raw_conflict_stream_name - conflict_stream_name_conflict_stream_name_conflict_stream_name_ab3: test_normalization_xjvlg._airbyte_raw_conflict_stream_name - conflict_stream_name_conflict_stream_name_conflict_stream_name: test_normalization_xjvlg._airbyte_raw_conflict_stream_name - unnest_alias_children_owner_ab1: test_normalization_xjvlg._airbyte_raw_unnest_alias - unnest_alias_children_owner_ab2: test_normalization_xjvlg._airbyte_raw_unnest_alias - unnest_alias_children_owner_ab3: test_normalization_xjvlg._airbyte_raw_unnest_alias - unnest_alias_children_owner: test_normalization_xjvlg._airbyte_raw_unnest_alias - unnest_alias_children_owner_column___with__quotes_ab1: test_normalization_xjvlg._airbyte_raw_unnest_alias - unnest_alias_children_owner_column___with__quotes_ab2: test_normalization_xjvlg._airbyte_raw_unnest_alias - unnest_alias_children_owner_column___with__quotes_ab3: test_normalization_xjvlg._airbyte_raw_unnest_alias - unnest_alias_children_owner_column___with__quotes: test_normalization_xjvlg._airbyte_raw_unnest_alias diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql deleted file mode 100644 index 9b59d6d77c88c..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql +++ /dev/null @@ -1,78 +0,0 @@ - - - - create table - "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_scd" - - - compound sortkey(_airbyte_active_row,_airbyte_unique_key_scd,_airbyte_emitted_at) - - as ( - --- depends_on: ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') -with - -input_data as ( - select * - from "integrationtests"._airbyte_test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_stg" - -- nested_stream_with_complex_columns_resulting_into_long_names from "integrationtests".test_normalization_xjvlg._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names -), - -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - md5(cast(coalesce(cast(id as text), '') as text)) as _airbyte_unique_key, - id, - date, - "partition", - date as _airbyte_start_at, - lag(date) over ( - partition by id - order by - date is null asc, - date desc, - _airbyte_emitted_at desc - ) as _airbyte_end_at, - case when row_number() over ( - partition by id - order by - date is null asc, - date desc, - _airbyte_emitted_at desc - ) = 1 then 1 else 0 end as _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid - from input_data -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by - _airbyte_unique_key, - _airbyte_start_at, - _airbyte_emitted_at - order by _airbyte_active_row desc, _airbyte_ab_id - ) as _airbyte_row_num, - md5(cast(coalesce(cast(_airbyte_unique_key as text), '') || '-' || coalesce(cast(_airbyte_start_at as text), '') || '-' || coalesce(cast(_airbyte_emitted_at as text), '') as text)) as _airbyte_unique_key_scd, - scd_data.* - from scd_data -) -select - _airbyte_unique_key, - _airbyte_unique_key_scd, - id, - date, - "partition", - _airbyte_start_at, - _airbyte_end_at, - _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - getdate() as _airbyte_normalized_at, - _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid -from dedup_data where _airbyte_row_num = 1 - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql deleted file mode 100644 index 184fa2bf11042..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql +++ /dev/null @@ -1,29 +0,0 @@ - - - - create table - "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names" - - - compound sortkey(_airbyte_unique_key,_airbyte_emitted_at) - - as ( - --- Final base SQL model --- depends_on: "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_scd" -select - _airbyte_unique_key, - id, - date, - "partition", - _airbyte_ab_id, - _airbyte_emitted_at, - getdate() as _airbyte_normalized_at, - _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid -from "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_scd" --- nested_stream_with_complex_columns_resulting_into_long_names from "integrationtests".test_normalization_xjvlg._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names -where 1 = 1 -and _airbyte_active_row = 1 - - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql deleted file mode 100644 index 4e1c7b1f39427..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql +++ /dev/null @@ -1,69 +0,0 @@ - - - - create table - "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_partition" - - - compound sortkey(_airbyte_emitted_at) - - as ( - -with __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_scd" -select - _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid, - "partition"."double_array_data" as double_array_data, - "partition"."DATA" as data, - _airbyte_ab_id, - _airbyte_emitted_at, - getdate() as _airbyte_normalized_at -from "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_scd" as table_alias --- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition -where 1 = 1 -and "partition" is not null - -), __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1 -select - _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid, - double_array_data, - data, - _airbyte_ab_id, - _airbyte_emitted_at, - getdate() as _airbyte_normalized_at -from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1 --- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition -where 1 = 1 - -), __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab2 -select - md5(cast(coalesce(cast(_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid as text), '') || '-' || coalesce(cast(json_serialize(double_array_data) as text), '') || '-' || coalesce(cast(json_serialize(data) as text), '') as text)) as _airbyte_partition_hashid, - tmp.* -from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab2 tmp --- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition -where 1 = 1 - -)-- Final base SQL model --- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab3 -select - _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid, - double_array_data, - data, - _airbyte_ab_id, - _airbyte_emitted_at, - getdate() as _airbyte_normalized_at, - _airbyte_partition_hashid -from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab3 --- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition from "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_scd" -where 1 = 1 - - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_data.sql deleted file mode 100644 index e19271e39a6fb..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_data.sql +++ /dev/null @@ -1,74 +0,0 @@ - - - - create table - "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_partition_data" - - - compound sortkey(_airbyte_emitted_at) - - as ( - -with __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_partition" - - with joined as ( - select - table_alias._airbyte_partition_hashid as _airbyte_hashid, - _airbyte_nested_data - from "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_partition" as table_alias, table_alias.data as _airbyte_nested_data - ) -select - _airbyte_partition_hashid, - case when _airbyte_nested_data."currency" != '' then _airbyte_nested_data."currency" end as currency, - _airbyte_ab_id, - _airbyte_emitted_at, - getdate() as _airbyte_normalized_at -from "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_partition" as table_alias --- data at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA -left join joined on _airbyte_partition_hashid = joined._airbyte_hashid -where 1 = 1 -and data is not null - -), __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab1 -select - _airbyte_partition_hashid, - cast(currency as text) as currency, - _airbyte_ab_id, - _airbyte_emitted_at, - getdate() as _airbyte_normalized_at -from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab1 --- data at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA -where 1 = 1 - -), __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab2 -select - md5(cast(coalesce(cast(_airbyte_partition_hashid as text), '') || '-' || coalesce(cast(currency as text), '') as text)) as _airbyte_data_hashid, - tmp.* -from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab2 tmp --- data at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA -where 1 = 1 - -)-- Final base SQL model --- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab3 -select - _airbyte_partition_hashid, - currency, - _airbyte_ab_id, - _airbyte_emitted_at, - getdate() as _airbyte_normalized_at, - _airbyte_data_hashid -from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab3 --- data at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA from "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_partition" -where 1 = 1 - - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql deleted file mode 100644 index 7e38b76f87fe4..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql +++ /dev/null @@ -1,74 +0,0 @@ - - - - create table - "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data" - - - compound sortkey(_airbyte_emitted_at) - - as ( - -with __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_partition" - - with joined as ( - select - table_alias._airbyte_partition_hashid as _airbyte_hashid, - _airbyte_nested_data - from "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_partition" as table_alias, table_alias.double_array_data as _airbyte_nested_data - ) -select - _airbyte_partition_hashid, - case when _airbyte_nested_data."id" != '' then _airbyte_nested_data."id" end as id, - _airbyte_ab_id, - _airbyte_emitted_at, - getdate() as _airbyte_normalized_at -from "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_partition" as table_alias --- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data -left join joined on _airbyte_partition_hashid = joined._airbyte_hashid -where 1 = 1 -and double_array_data is not null - -), __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1 -select - _airbyte_partition_hashid, - cast(id as text) as id, - _airbyte_ab_id, - _airbyte_emitted_at, - getdate() as _airbyte_normalized_at -from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1 --- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data -where 1 = 1 - -), __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab2 -select - md5(cast(coalesce(cast(_airbyte_partition_hashid as text), '') || '-' || coalesce(cast(id as text), '') as text)) as _airbyte_double_array_data_hashid, - tmp.* -from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab2 tmp --- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data -where 1 = 1 - -)-- Final base SQL model --- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab3 -select - _airbyte_partition_hashid, - id, - _airbyte_ab_id, - _airbyte_emitted_at, - getdate() as _airbyte_normalized_at, - _airbyte_double_array_data_hashid -from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab3 --- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data from "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_partition" -where 1 = 1 - - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_ab1.sql deleted file mode 100644 index ed49a5e916064..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_ab1.sql +++ /dev/null @@ -1,20 +0,0 @@ -{{ config( - sort = "_airbyte_emitted_at", - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization_xjvlg", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ source('test_normalization_xjvlg', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} -select - {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as id, - {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as date, - {{ json_extract('table_alias', '_airbyte_data', ['partition'], ['partition']) }} as {{ adapter.quote('partition') }}, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ source('test_normalization_xjvlg', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} as table_alias --- nested_stream_with_complex_columns_resulting_into_long_names -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_ab2.sql deleted file mode 100644 index 19ab94bca1518..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_ab2.sql +++ /dev/null @@ -1,20 +0,0 @@ -{{ config( - sort = "_airbyte_emitted_at", - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization_xjvlg", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_ab1') }} -select - cast(id as {{ dbt_utils.type_string() }}) as id, - cast(date as {{ dbt_utils.type_string() }}) as date, - cast({{ adapter.quote('partition') }} as {{ type_json() }}) as {{ adapter.quote('partition') }}, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_ab1') }} --- nested_stream_with_complex_columns_resulting_into_long_names -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1.sql deleted file mode 100644 index 18a21b4729811..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1.sql +++ /dev/null @@ -1,20 +0,0 @@ -{{ config( - sort = "_airbyte_emitted_at", - schema = "_airbyte_test_normalization_xjvlg", - tags = [ "nested-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd') }} -select - _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid, - {{ json_extract_array(adapter.quote('partition'), ['double_array_data'], ['double_array_data']) }} as double_array_data, - {{ json_extract_array(adapter.quote('partition'), ['DATA'], ['DATA']) }} as data, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd') }} as table_alias --- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition -where 1 = 1 -and {{ adapter.quote('partition') }} is not null -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab1.sql deleted file mode 100644 index 4cc3285a5f6e2..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab1.sql +++ /dev/null @@ -1,21 +0,0 @@ -{{ config( - sort = "_airbyte_emitted_at", - schema = "_airbyte_test_normalization_xjvlg", - tags = [ "nested-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition') }} -{{ unnest_cte(ref('nested_stream_with_complex_columns_resulting_into_long_names_partition'), 'partition', 'data') }} -select - _airbyte_partition_hashid, - {{ json_extract_scalar(unnested_column_value('data'), ['currency'], ['currency']) }} as currency, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition') }} as table_alias --- data at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA -{{ cross_join_unnest('partition', 'data') }} -where 1 = 1 -and data is not null -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1.sql deleted file mode 100644 index 4876b27d7cc0f..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1.sql +++ /dev/null @@ -1,21 +0,0 @@ -{{ config( - sort = "_airbyte_emitted_at", - schema = "_airbyte_test_normalization_xjvlg", - tags = [ "nested-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition') }} -{{ unnest_cte(ref('nested_stream_with_complex_columns_resulting_into_long_names_partition'), 'partition', 'double_array_data') }} -select - _airbyte_partition_hashid, - {{ json_extract_scalar(unnested_column_value('double_array_data'), ['id'], ['id']) }} as id, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition') }} as table_alias --- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data -{{ cross_join_unnest('partition', 'double_array_data') }} -where 1 = 1 -and double_array_data is not null -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql deleted file mode 100644 index a629e4de4e5d6..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql +++ /dev/null @@ -1,163 +0,0 @@ -{{ config( - sort = ["_airbyte_active_row", "_airbyte_unique_key_scd", "_airbyte_emitted_at"], - unique_key = "_airbyte_unique_key_scd", - schema = "test_normalization_xjvlg", - post_hook = [" - {% - set final_table_relation = adapter.get_relation( - database=this.database, - schema=this.schema, - identifier='nested_stream_with_complex_columns_resulting_into_long_names' - ) - %} - {# - If the final table doesn't exist, then obviously we can't delete anything from it. - Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) - So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) - #} - {% - if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') - %} - -- Delete records which are no longer active: - -- This query is equivalent, but the left join version is more performant: - -- delete from final_table where unique_key in ( - -- select unique_key from scd_table where 1 = 1 - -- ) and unique_key not in ( - -- select unique_key from scd_table where active_row = 1 - -- ) - -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD - -- entries that were _updated_ recently. This is because a deleted record will have an SCD record - -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. - delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select recent_records.unique_key - from ( - select distinct _airbyte_unique_key as unique_key - from {{ this }} - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_complex_columns_resulting_into_long_names')) }} - ) recent_records - left join ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_complex_columns_resulting_into_long_names')) }} - group by _airbyte_unique_key - ) active_counts - on recent_records.unique_key = active_counts.unique_key - where active_count is null or active_count = 0 - ) - {% else %} - -- We have to have a non-empty query, so just do a noop delete - delete from {{ this }} where 1=0 - {% endif %} - ","drop view _airbyte_test_normalization_xjvlg.nested_stream_with_complex_columns_resulting_into_long_names_stg"], - tags = [ "top-level" ] -) }} --- depends_on: ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') -with -{% if is_incremental() %} -new_data as ( - -- retrieve incremental "new" data - select - * - from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') }} - -- nested_stream_with_complex_columns_resulting_into_long_names from {{ source('test_normalization_xjvlg', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} - where 1 = 1 - {{ incremental_clause('_airbyte_emitted_at', this) }} -), -new_data_ids as ( - -- build a subset of _airbyte_unique_key from rows that are new - select distinct - {{ dbt_utils.surrogate_key([ - 'id', - ]) }} as _airbyte_unique_key - from new_data -), -empty_new_data as ( - -- build an empty table to only keep the table's column types - select * from new_data where 1 = 0 -), -previous_active_scd_data as ( - -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes - select - {{ star_intersect(ref('nested_stream_with_complex_columns_resulting_into_long_names_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} - from {{ this }} as this_data - -- make a join with new_data using primary key to filter active data that need to be updated only - join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key - -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) - left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id - where _airbyte_active_row = 1 -), -input_data as ( - select {{ dbt_utils.star(ref('nested_stream_with_complex_columns_resulting_into_long_names_stg')) }} from new_data - union all - select {{ dbt_utils.star(ref('nested_stream_with_complex_columns_resulting_into_long_names_stg')) }} from previous_active_scd_data -), -{% else %} -input_data as ( - select * - from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') }} - -- nested_stream_with_complex_columns_resulting_into_long_names from {{ source('test_normalization_xjvlg', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} -), -{% endif %} -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - {{ dbt_utils.surrogate_key([ - 'id', - ]) }} as _airbyte_unique_key, - id, - date, - {{ adapter.quote('partition') }}, - date as _airbyte_start_at, - lag(date) over ( - partition by id - order by - date is null asc, - date desc, - _airbyte_emitted_at desc - ) as _airbyte_end_at, - case when row_number() over ( - partition by id - order by - date is null asc, - date desc, - _airbyte_emitted_at desc - ) = 1 then 1 else 0 end as _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid - from input_data -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by - _airbyte_unique_key, - _airbyte_start_at, - _airbyte_emitted_at - order by _airbyte_active_row desc, _airbyte_ab_id - ) as _airbyte_row_num, - {{ dbt_utils.surrogate_key([ - '_airbyte_unique_key', - '_airbyte_start_at', - '_airbyte_emitted_at' - ]) }} as _airbyte_unique_key_scd, - scd_data.* - from scd_data -) -select - _airbyte_unique_key, - _airbyte_unique_key_scd, - id, - date, - {{ adapter.quote('partition') }}, - _airbyte_start_at, - _airbyte_end_at, - _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid -from dedup_data where _airbyte_row_num = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql deleted file mode 100644 index f95f159eedc9f..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql +++ /dev/null @@ -1,23 +0,0 @@ -{{ config( - sort = ["_airbyte_unique_key", "_airbyte_emitted_at"], - unique_key = "_airbyte_unique_key", - schema = "test_normalization_xjvlg", - tags = [ "top-level" ] -) }} --- Final base SQL model --- depends_on: {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd') }} -select - _airbyte_unique_key, - id, - date, - {{ adapter.quote('partition') }}, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid -from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd') }} --- nested_stream_with_complex_columns_resulting_into_long_names from {{ source('test_normalization_xjvlg', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} -where 1 = 1 -and _airbyte_active_row = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql deleted file mode 100644 index 18a73cf63b7f7..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql +++ /dev/null @@ -1,20 +0,0 @@ -{{ config( - sort = "_airbyte_emitted_at", - schema = "test_normalization_xjvlg", - tags = [ "nested" ] -) }} --- Final base SQL model --- depends_on: {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition_ab3') }} -select - _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid, - double_array_data, - data, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_partition_hashid -from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition_ab3') }} --- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd') }} -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_data.sql deleted file mode 100644 index ad3d8a9a61b53..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_data.sql +++ /dev/null @@ -1,19 +0,0 @@ -{{ config( - sort = "_airbyte_emitted_at", - schema = "test_normalization_xjvlg", - tags = [ "nested" ] -) }} --- Final base SQL model --- depends_on: {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab3') }} -select - _airbyte_partition_hashid, - currency, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_data_hashid -from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab3') }} --- data at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition') }} -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql deleted file mode 100644 index 2059cb60a01ae..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql +++ /dev/null @@ -1,19 +0,0 @@ -{{ config( - sort = "_airbyte_emitted_at", - schema = "test_normalization_xjvlg", - tags = [ "nested" ] -) }} --- Final base SQL model --- depends_on: {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab3') }} -select - _airbyte_partition_hashid, - id, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_double_array_data_hashid -from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab3') }} --- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition') }} -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/sources.yml deleted file mode 100644 index 56faa01c65dc7..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/sources.yml +++ /dev/null @@ -1,23 +0,0 @@ -version: 2 -sources: -- name: test_normalization_namespace - quoting: - database: true - schema: false - identifier: false - tables: - - name: _airbyte_raw_simple_stream_with_namespace_resulting_into_long_names -- name: test_normalization_xjvlg - quoting: - database: true - schema: false - identifier: false - tables: - - name: _airbyte_raw_arrays - - name: _airbyte_raw_conflict_stream_array - - name: _airbyte_raw_conflict_stream_name - - name: _airbyte_raw_conflict_stream_scalar - - name: _airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - - name: _airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names - - name: _airbyte_raw_some_stream_that_was_empty - - name: _airbyte_raw_unnest_alias diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql deleted file mode 100644 index 45c63e057a5ed..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql +++ /dev/null @@ -1,15 +0,0 @@ - - - delete from "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_scd" - where (_airbyte_unique_key_scd) in ( - select (_airbyte_unique_key_scd) - from "nested_stream_with_complex_columns_resulti__dbt_tmp" - ); - - - insert into "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "date", "partition", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid") - ( - select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "date", "partition", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid" - from "nested_stream_with_complex_columns_resulti__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql deleted file mode 100644 index e32bb140a0990..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql +++ /dev/null @@ -1,15 +0,0 @@ - - - delete from "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names" - where (_airbyte_unique_key) in ( - select (_airbyte_unique_key) - from "nested_stream_with_complex_columns_resulti__dbt_tmp" - ); - - - insert into "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names" ("_airbyte_unique_key", "id", "date", "partition", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid") - ( - select "_airbyte_unique_key", "id", "date", "partition", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid" - from "nested_stream_with_complex_columns_resulti__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql deleted file mode 100644 index 9944a91ca6425..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql +++ /dev/null @@ -1,9 +0,0 @@ - - - - insert into "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_partition" ("_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid", "double_array_data", "data", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_partition_hashid") - ( - select "_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid", "double_array_data", "data", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_partition_hashid" - from "nested_stream_with_complex_columns_resulti__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_data.sql deleted file mode 100644 index 52b4bd4fc5f41..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_data.sql +++ /dev/null @@ -1,9 +0,0 @@ - - - - insert into "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_partition_data" ("_airbyte_partition_hashid", "currency", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_data_hashid") - ( - select "_airbyte_partition_hashid", "currency", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_data_hashid" - from "nested_stream_with_complex_columns_resulti__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql deleted file mode 100644 index 91aaa5e85cc0a..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql +++ /dev/null @@ -1,9 +0,0 @@ - - - - insert into "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data" ("_airbyte_partition_hashid", "id", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_double_array_data_hashid") - ( - select "_airbyte_partition_hashid", "id", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_double_array_data_hashid" - from "nested_stream_with_complex_columns_resulti__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/dbt_project.yml deleted file mode 100755 index c645baf3c5fe8..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/dbt_project.yml +++ /dev/null @@ -1,72 +0,0 @@ -name: airbyte_utils -version: '1.0' -config-version: 2 -profile: normalize -model-paths: -- modified_models -docs-paths: -- docs -analysis-paths: -- analysis -test-paths: -- tests -seed-paths: -- data -macro-paths: -- macros -target-path: ../build -log-path: ../logs -packages-install-path: /dbt -clean-targets: -- build -- dbt_modules -quoting: - database: true - schema: false - identifier: true -models: - +transient: false - +pre-hook: SET enable_case_sensitive_identifier to TRUE - airbyte_utils: - +materialized: table - generated: - airbyte_ctes: - +tags: airbyte_internal_cte - +materialized: ephemeral - airbyte_incremental: - +tags: incremental_tables - +materialized: incremental - +on_schema_change: sync_all_columns - airbyte_tables: - +tags: normalized_tables - +materialized: table - airbyte_views: - +tags: airbyte_internal_views - +materialized: view -dispatch: -- macro_namespace: dbt_utils - search_order: - - airbyte_utils - - dbt_utils -vars: - json_column: _airbyte_data - models_to_source: - exchange_rate_ab1: test_normalization_bhhpj._airbyte_raw_exchange_rate - exchange_rate_ab2: test_normalization_bhhpj._airbyte_raw_exchange_rate - exchange_rate_ab3: test_normalization_bhhpj._airbyte_raw_exchange_rate - exchange_rate: test_normalization_bhhpj._airbyte_raw_exchange_rate - dedup_exchange_rate_ab1: test_normalization_bhhpj._airbyte_raw_dedup_exchange_rate - dedup_exchange_rate_ab2: test_normalization_bhhpj._airbyte_raw_dedup_exchange_rate - dedup_exchange_rate_stg: test_normalization_bhhpj._airbyte_raw_dedup_exchange_rate - dedup_exchange_rate_scd: test_normalization_bhhpj._airbyte_raw_dedup_exchange_rate - dedup_exchange_rate: test_normalization_bhhpj._airbyte_raw_dedup_exchange_rate - renamed_dedup_cdc_excluded_ab1: test_normalization_bhhpj._airbyte_raw_renamed_dedup_cdc_excluded - renamed_dedup_cdc_excluded_ab2: test_normalization_bhhpj._airbyte_raw_renamed_dedup_cdc_excluded - renamed_dedup_cdc_excluded_stg: test_normalization_bhhpj._airbyte_raw_renamed_dedup_cdc_excluded - renamed_dedup_cdc_excluded_scd: test_normalization_bhhpj._airbyte_raw_renamed_dedup_cdc_excluded - renamed_dedup_cdc_excluded: test_normalization_bhhpj._airbyte_raw_renamed_dedup_cdc_excluded - dedup_cdc_excluded_ab1: test_normalization_bhhpj._airbyte_raw_dedup_cdc_excluded - dedup_cdc_excluded_ab2: test_normalization_bhhpj._airbyte_raw_dedup_cdc_excluded - dedup_cdc_excluded_stg: test_normalization_bhhpj._airbyte_raw_dedup_cdc_excluded - dedup_cdc_excluded_scd: test_normalization_bhhpj._airbyte_raw_dedup_cdc_excluded - dedup_cdc_excluded: test_normalization_bhhpj._airbyte_raw_dedup_cdc_excluded diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_dbt_project.yml deleted file mode 100644 index 70d0b5b4fa3b6..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_dbt_project.yml +++ /dev/null @@ -1,92 +0,0 @@ -name: airbyte_utils -version: '1.0' -config-version: 2 -profile: normalize -model-paths: -- models -docs-paths: -- docs -analysis-paths: -- analysis -test-paths: -- tests -seed-paths: -- data -macro-paths: -- macros -target-path: ../build -log-path: ../logs -packages-install-path: /dbt -clean-targets: -- build -- dbt_modules -quoting: - database: true - schema: false - identifier: true -models: - +transient: false - +pre-hook: SET enable_case_sensitive_identifier to TRUE - airbyte_utils: - +materialized: table - generated: - airbyte_ctes: - +tags: airbyte_internal_cte - +materialized: ephemeral - airbyte_incremental: - +tags: incremental_tables - +materialized: incremental - +on_schema_change: sync_all_columns - airbyte_tables: - +tags: normalized_tables - +materialized: table - airbyte_views: - +tags: airbyte_internal_views - +materialized: view -dispatch: -- macro_namespace: dbt_utils - search_order: - - airbyte_utils - - dbt_utils -vars: - json_column: _airbyte_data - models_to_source: - exchange_rate_ab1: test_normalization_bhhpj._airbyte_raw_exchange_rate - exchange_rate_ab2: test_normalization_bhhpj._airbyte_raw_exchange_rate - exchange_rate_ab3: test_normalization_bhhpj._airbyte_raw_exchange_rate - exchange_rate: test_normalization_bhhpj._airbyte_raw_exchange_rate - dedup_exchange_rate_ab1: test_normalization_bhhpj._airbyte_raw_dedup_exchange_rate - dedup_exchange_rate_ab2: test_normalization_bhhpj._airbyte_raw_dedup_exchange_rate - dedup_exchange_rate_stg: test_normalization_bhhpj._airbyte_raw_dedup_exchange_rate - dedup_exchange_rate_scd: test_normalization_bhhpj._airbyte_raw_dedup_exchange_rate - dedup_exchange_rate: test_normalization_bhhpj._airbyte_raw_dedup_exchange_rate - renamed_dedup_cdc_excluded_ab1: test_normalization_bhhpj._airbyte_raw_renamed_dedup_cdc_excluded - renamed_dedup_cdc_excluded_ab2: test_normalization_bhhpj._airbyte_raw_renamed_dedup_cdc_excluded - renamed_dedup_cdc_excluded_stg: test_normalization_bhhpj._airbyte_raw_renamed_dedup_cdc_excluded - renamed_dedup_cdc_excluded_scd: test_normalization_bhhpj._airbyte_raw_renamed_dedup_cdc_excluded - renamed_dedup_cdc_excluded: test_normalization_bhhpj._airbyte_raw_renamed_dedup_cdc_excluded - dedup_cdc_excluded_ab1: test_normalization_bhhpj._airbyte_raw_dedup_cdc_excluded - dedup_cdc_excluded_ab2: test_normalization_bhhpj._airbyte_raw_dedup_cdc_excluded - dedup_cdc_excluded_stg: test_normalization_bhhpj._airbyte_raw_dedup_cdc_excluded - dedup_cdc_excluded_scd: test_normalization_bhhpj._airbyte_raw_dedup_cdc_excluded - dedup_cdc_excluded: test_normalization_bhhpj._airbyte_raw_dedup_cdc_excluded - pos_dedup_cdcx_ab1: test_normalization_bhhpj._airbyte_raw_pos_dedup_cdcx - pos_dedup_cdcx_ab2: test_normalization_bhhpj._airbyte_raw_pos_dedup_cdcx - pos_dedup_cdcx_stg: test_normalization_bhhpj._airbyte_raw_pos_dedup_cdcx - pos_dedup_cdcx_scd: test_normalization_bhhpj._airbyte_raw_pos_dedup_cdcx - pos_dedup_cdcx: test_normalization_bhhpj._airbyte_raw_pos_dedup_cdcx - 1_prefix_startwith_number_ab1: test_normalization_bhhpj._airbyte_raw_1_prefix_startwith_number - 1_prefix_startwith_number_ab2: test_normalization_bhhpj._airbyte_raw_1_prefix_startwith_number - 1_prefix_startwith_number_stg: test_normalization_bhhpj._airbyte_raw_1_prefix_startwith_number - 1_prefix_startwith_number_scd: test_normalization_bhhpj._airbyte_raw_1_prefix_startwith_number - 1_prefix_startwith_number: test_normalization_bhhpj._airbyte_raw_1_prefix_startwith_number - multiple_column_names_conflicts_ab1: test_normalization_bhhpj._airbyte_raw_multiple_column_names_conflicts - multiple_column_names_conflicts_ab2: test_normalization_bhhpj._airbyte_raw_multiple_column_names_conflicts - multiple_column_names_conflicts_stg: test_normalization_bhhpj._airbyte_raw_multiple_column_names_conflicts - multiple_column_names_conflicts_scd: test_normalization_bhhpj._airbyte_raw_multiple_column_names_conflicts - multiple_column_names_conflicts: test_normalization_bhhpj._airbyte_raw_multiple_column_names_conflicts - types_testing_ab1: test_normalization_bhhpj._airbyte_raw_types_testing - types_testing_ab2: test_normalization_bhhpj._airbyte_raw_types_testing - types_testing_stg: test_normalization_bhhpj._airbyte_raw_types_testing - types_testing_scd: test_normalization_bhhpj._airbyte_raw_types_testing - types_testing: test_normalization_bhhpj._airbyte_raw_types_testing diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql deleted file mode 100644 index 3c1032d3297f2..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ /dev/null @@ -1,88 +0,0 @@ - - - - create table - "integrationtests".test_normalization_bhhpj."dedup_exchange_rate_scd" - - - compound sortkey(_airbyte_active_row,_airbyte_unique_key_scd,_airbyte_emitted_at) - - as ( - --- depends_on: ref('dedup_exchange_rate_stg') -with - -input_data as ( - select * - from "integrationtests"._airbyte_test_normalization_bhhpj."dedup_exchange_rate_stg" - -- dedup_exchange_rate from "integrationtests".test_normalization_bhhpj._airbyte_raw_dedup_exchange_rate -), - -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - md5(cast(coalesce(cast(id as text), '') || '-' || coalesce(cast(currency as text), '') || '-' || coalesce(cast(nzd as text), '') as text)) as _airbyte_unique_key, - id, - currency, - date, - timestamp_col, - "hkd@spéçiäl & characters", - hkd_special___characters, - nzd, - usd, - date as _airbyte_start_at, - lag(date) over ( - partition by id, currency, cast(nzd as text) - order by - date is null asc, - date desc, - _airbyte_emitted_at desc - ) as _airbyte_end_at, - case when row_number() over ( - partition by id, currency, cast(nzd as text) - order by - date is null asc, - date desc, - _airbyte_emitted_at desc - ) = 1 then 1 else 0 end as _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_dedup_exchange_rate_hashid - from input_data -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by - _airbyte_unique_key, - _airbyte_start_at, - _airbyte_emitted_at - order by _airbyte_active_row desc, _airbyte_ab_id - ) as _airbyte_row_num, - md5(cast(coalesce(cast(_airbyte_unique_key as text), '') || '-' || coalesce(cast(_airbyte_start_at as text), '') || '-' || coalesce(cast(_airbyte_emitted_at as text), '') as text)) as _airbyte_unique_key_scd, - scd_data.* - from scd_data -) -select - _airbyte_unique_key, - _airbyte_unique_key_scd, - id, - currency, - date, - timestamp_col, - "hkd@spéçiäl & characters", - hkd_special___characters, - nzd, - usd, - _airbyte_start_at, - _airbyte_end_at, - _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - getdate() as _airbyte_normalized_at, - _airbyte_dedup_exchange_rate_hashid -from dedup_data where _airbyte_row_num = 1 - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql deleted file mode 100644 index b6903fe4ceb0d..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql +++ /dev/null @@ -1,34 +0,0 @@ - - - - create table - "integrationtests".test_normalization_bhhpj."dedup_exchange_rate" - - - compound sortkey(_airbyte_unique_key,_airbyte_emitted_at) - - as ( - --- Final base SQL model --- depends_on: "integrationtests".test_normalization_bhhpj."dedup_exchange_rate_scd" -select - _airbyte_unique_key, - id, - currency, - date, - timestamp_col, - "hkd@spéçiäl & characters", - hkd_special___characters, - nzd, - usd, - _airbyte_ab_id, - _airbyte_emitted_at, - getdate() as _airbyte_normalized_at, - _airbyte_dedup_exchange_rate_hashid -from "integrationtests".test_normalization_bhhpj."dedup_exchange_rate_scd" --- dedup_exchange_rate from "integrationtests".test_normalization_bhhpj._airbyte_raw_dedup_exchange_rate -where 1 = 1 -and _airbyte_active_row = 1 - - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql deleted file mode 100644 index e2bd3830cb423..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql +++ /dev/null @@ -1,92 +0,0 @@ - - - create table - "integrationtests".test_normalization_bhhpj."exchange_rate__dbt_tmp" - - - compound sortkey(_airbyte_emitted_at) - - as ( - -with __dbt__cte__exchange_rate_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "integrationtests".test_normalization_bhhpj._airbyte_raw_exchange_rate -select - case when _airbyte_data."id" != '' then _airbyte_data."id" end as id, - case when _airbyte_data."currency" != '' then _airbyte_data."currency" end as currency, - case when _airbyte_data."date" != '' then _airbyte_data."date" end as date, - case when _airbyte_data."timestamp_col" != '' then _airbyte_data."timestamp_col" end as timestamp_col, - case when _airbyte_data."HKD@spéçiäl & characters" != '' then _airbyte_data."HKD@spéçiäl & characters" end as "hkd@spéçiäl & characters", - case when _airbyte_data."HKD_special___characters" != '' then _airbyte_data."HKD_special___characters" end as hkd_special___characters, - case when _airbyte_data."NZD" != '' then _airbyte_data."NZD" end as nzd, - case when _airbyte_data."USD" != '' then _airbyte_data."USD" end as usd, - case when _airbyte_data."column`_'with""_quotes" != '' then _airbyte_data."column`_'with""_quotes" end as "column`_'with""_quotes", - _airbyte_ab_id, - _airbyte_emitted_at, - getdate() as _airbyte_normalized_at -from "integrationtests".test_normalization_bhhpj._airbyte_raw_exchange_rate as table_alias --- exchange_rate -where 1 = 1 -), __dbt__cte__exchange_rate_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__exchange_rate_ab1 -select - cast(id as - bigint -) as id, - cast(currency as text) as currency, - cast(nullif(date::varchar, '') as - date -) as date, - cast(nullif(timestamp_col::varchar, '') as - timestamp with time zone -) as timestamp_col, - cast("hkd@spéçiäl & characters" as - float -) as "hkd@spéçiäl & characters", - cast(hkd_special___characters as text) as hkd_special___characters, - cast(nzd as - float -) as nzd, - cast(usd as - float -) as usd, - cast("column`_'with""_quotes" as text) as "column`_'with""_quotes", - _airbyte_ab_id, - _airbyte_emitted_at, - getdate() as _airbyte_normalized_at -from __dbt__cte__exchange_rate_ab1 --- exchange_rate -where 1 = 1 -), __dbt__cte__exchange_rate_ab3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__exchange_rate_ab2 -select - md5(cast(coalesce(cast(id as text), '') || '-' || coalesce(cast(currency as text), '') || '-' || coalesce(cast(date as text), '') || '-' || coalesce(cast(timestamp_col as text), '') || '-' || coalesce(cast("hkd@spéçiäl & characters" as text), '') || '-' || coalesce(cast(hkd_special___characters as text), '') || '-' || coalesce(cast(nzd as text), '') || '-' || coalesce(cast(usd as text), '') || '-' || coalesce(cast("column`_'with""_quotes" as text), '') as text)) as _airbyte_exchange_rate_hashid, - tmp.* -from __dbt__cte__exchange_rate_ab2 tmp --- exchange_rate -where 1 = 1 -)-- Final base SQL model --- depends_on: __dbt__cte__exchange_rate_ab3 -select - id, - currency, - date, - timestamp_col, - "hkd@spéçiäl & characters", - hkd_special___characters, - nzd, - usd, - "column`_'with""_quotes", - _airbyte_ab_id, - _airbyte_emitted_at, - getdate() as _airbyte_normalized_at, - _airbyte_exchange_rate_hashid -from __dbt__cte__exchange_rate_ab3 --- exchange_rate from "integrationtests".test_normalization_bhhpj._airbyte_raw_exchange_rate -where 1 = 1 - ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql deleted file mode 100644 index 903a3141f6256..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql +++ /dev/null @@ -1,66 +0,0 @@ - - - create view "integrationtests"._airbyte_test_normalization_bhhpj."dedup_exchange_rate_stg__dbt_tmp" as ( - -with __dbt__cte__dedup_exchange_rate_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "integrationtests".test_normalization_bhhpj._airbyte_raw_dedup_exchange_rate -select - case when _airbyte_data."id" != '' then _airbyte_data."id" end as id, - case when _airbyte_data."currency" != '' then _airbyte_data."currency" end as currency, - case when _airbyte_data."date" != '' then _airbyte_data."date" end as date, - case when _airbyte_data."timestamp_col" != '' then _airbyte_data."timestamp_col" end as timestamp_col, - case when _airbyte_data."HKD@spéçiäl & characters" != '' then _airbyte_data."HKD@spéçiäl & characters" end as "hkd@spéçiäl & characters", - case when _airbyte_data."HKD_special___characters" != '' then _airbyte_data."HKD_special___characters" end as hkd_special___characters, - case when _airbyte_data."NZD" != '' then _airbyte_data."NZD" end as nzd, - case when _airbyte_data."USD" != '' then _airbyte_data."USD" end as usd, - _airbyte_ab_id, - _airbyte_emitted_at, - getdate() as _airbyte_normalized_at -from "integrationtests".test_normalization_bhhpj._airbyte_raw_dedup_exchange_rate as table_alias --- dedup_exchange_rate -where 1 = 1 - -), __dbt__cte__dedup_exchange_rate_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__dedup_exchange_rate_ab1 -select - cast(id as - bigint -) as id, - cast(currency as text) as currency, - cast(nullif(date::varchar, '') as - date -) as date, - cast(nullif(timestamp_col::varchar, '') as - timestamp with time zone -) as timestamp_col, - cast("hkd@spéçiäl & characters" as - float -) as "hkd@spéçiäl & characters", - cast(hkd_special___characters as text) as hkd_special___characters, - cast(nzd as - float -) as nzd, - cast(usd as - float -) as usd, - _airbyte_ab_id, - _airbyte_emitted_at, - getdate() as _airbyte_normalized_at -from __dbt__cte__dedup_exchange_rate_ab1 --- dedup_exchange_rate -where 1 = 1 - -)-- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__dedup_exchange_rate_ab2 -select - md5(cast(coalesce(cast(id as text), '') || '-' || coalesce(cast(currency as text), '') || '-' || coalesce(cast(date as text), '') || '-' || coalesce(cast(timestamp_col as text), '') || '-' || coalesce(cast("hkd@spéçiäl & characters" as text), '') || '-' || coalesce(cast(hkd_special___characters as text), '') || '-' || coalesce(cast(nzd as text), '') || '-' || coalesce(cast(usd as text), '') as text)) as _airbyte_dedup_exchange_rate_hashid, - tmp.* -from __dbt__cte__dedup_exchange_rate_ab2 tmp --- dedup_exchange_rate -where 1 = 1 - - ) ; diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql deleted file mode 100644 index b496abf0c5ecd..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql +++ /dev/null @@ -1,62 +0,0 @@ - - - create view "integrationtests"._airbyte_test_normalization_bhhpj."multiple_column_names_conflicts_stg__dbt_tmp" as ( - -with __dbt__cte__multiple_column_names_conflicts_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "integrationtests".test_normalization_bhhpj._airbyte_raw_multiple_column_names_conflicts -select - case when _airbyte_data."id" != '' then _airbyte_data."id" end as id, - case when _airbyte_data."User Id" != '' then _airbyte_data."User Id" end as "user id", - case when _airbyte_data."user_id" != '' then _airbyte_data."user_id" end as user_id, - case when _airbyte_data."User id" != '' then _airbyte_data."User id" end as "user id_1", - case when _airbyte_data."user id" != '' then _airbyte_data."user id" end as "user id_2", - case when _airbyte_data."User@Id" != '' then _airbyte_data."User@Id" end as "user@id", - case when _airbyte_data."UserId" != '' then _airbyte_data."UserId" end as userid, - _airbyte_ab_id, - _airbyte_emitted_at, - getdate() as _airbyte_normalized_at -from "integrationtests".test_normalization_bhhpj._airbyte_raw_multiple_column_names_conflicts as table_alias --- multiple_column_names_conflicts -where 1 = 1 - -), __dbt__cte__multiple_column_names_conflicts_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__multiple_column_names_conflicts_ab1 -select - cast(id as - bigint -) as id, - cast("user id" as text) as "user id", - cast(user_id as - float -) as user_id, - cast("user id_1" as - float -) as "user id_1", - cast("user id_2" as - float -) as "user id_2", - cast("user@id" as text) as "user@id", - cast(userid as - float -) as userid, - _airbyte_ab_id, - _airbyte_emitted_at, - getdate() as _airbyte_normalized_at -from __dbt__cte__multiple_column_names_conflicts_ab1 --- multiple_column_names_conflicts -where 1 = 1 - -)-- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__multiple_column_names_conflicts_ab2 -select - md5(cast(coalesce(cast(id as text), '') || '-' || coalesce(cast("user id" as text), '') || '-' || coalesce(cast(user_id as text), '') || '-' || coalesce(cast("user id_1" as text), '') || '-' || coalesce(cast("user id_2" as text), '') || '-' || coalesce(cast("user@id" as text), '') || '-' || coalesce(cast(userid as text), '') as text)) as _airbyte_multiple_column_names_conflicts_hashid, - tmp.* -from __dbt__cte__multiple_column_names_conflicts_ab2 tmp --- multiple_column_names_conflicts -where 1 = 1 - - ) ; diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql deleted file mode 100644 index b8200f8bf6791..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql +++ /dev/null @@ -1,25 +0,0 @@ -{{ config( - sort = "_airbyte_emitted_at", - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization_bhhpj", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ source('test_normalization_bhhpj', '_airbyte_raw_dedup_exchange_rate') }} -select - {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as id, - {{ json_extract_scalar('_airbyte_data', ['currency'], ['currency']) }} as currency, - {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as date, - {{ json_extract_scalar('_airbyte_data', ['timestamp_col'], ['timestamp_col']) }} as timestamp_col, - {{ json_extract_scalar('_airbyte_data', ['HKD@spéçiäl & characters'], ['HKD@spéçiäl & characters']) }} as {{ adapter.quote('hkd@spéçiäl & characters') }}, - {{ json_extract_scalar('_airbyte_data', ['HKD_special___characters'], ['HKD_special___characters']) }} as hkd_special___characters, - {{ json_extract_scalar('_airbyte_data', ['NZD'], ['NZD']) }} as nzd, - {{ json_extract_scalar('_airbyte_data', ['USD'], ['USD']) }} as usd, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ source('test_normalization_bhhpj', '_airbyte_raw_dedup_exchange_rate') }} as table_alias --- dedup_exchange_rate -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql deleted file mode 100644 index 420c7c9869752..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql +++ /dev/null @@ -1,25 +0,0 @@ -{{ config( - sort = "_airbyte_emitted_at", - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization_bhhpj", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: {{ ref('dedup_exchange_rate_ab1') }} -select - cast(id as {{ dbt_utils.type_bigint() }}) as id, - cast(currency as {{ dbt_utils.type_string() }}) as currency, - cast({{ empty_string_to_null('date') }} as {{ type_date() }}) as date, - cast({{ empty_string_to_null('timestamp_col') }} as {{ type_timestamp_with_timezone() }}) as timestamp_col, - cast({{ adapter.quote('hkd@spéçiäl & characters') }} as {{ dbt_utils.type_float() }}) as {{ adapter.quote('hkd@spéçiäl & characters') }}, - cast(hkd_special___characters as {{ dbt_utils.type_string() }}) as hkd_special___characters, - cast(nzd as {{ dbt_utils.type_float() }}) as nzd, - cast(usd as {{ dbt_utils.type_float() }}) as usd, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('dedup_exchange_rate_ab1') }} --- dedup_exchange_rate -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql deleted file mode 100644 index b716e29bdf6ef..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ /dev/null @@ -1,177 +0,0 @@ -{{ config( - sort = ["_airbyte_active_row", "_airbyte_unique_key_scd", "_airbyte_emitted_at"], - unique_key = "_airbyte_unique_key_scd", - schema = "test_normalization_bhhpj", - post_hook = [" - {% - set final_table_relation = adapter.get_relation( - database=this.database, - schema=this.schema, - identifier='dedup_exchange_rate' - ) - %} - {# - If the final table doesn't exist, then obviously we can't delete anything from it. - Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) - So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) - #} - {% - if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') - %} - -- Delete records which are no longer active: - -- This query is equivalent, but the left join version is more performant: - -- delete from final_table where unique_key in ( - -- select unique_key from scd_table where 1 = 1 - -- ) and unique_key not in ( - -- select unique_key from scd_table where active_row = 1 - -- ) - -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD - -- entries that were _updated_ recently. This is because a deleted record will have an SCD record - -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. - delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select recent_records.unique_key - from ( - select distinct _airbyte_unique_key as unique_key - from {{ this }} - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - ) recent_records - left join ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - group by _airbyte_unique_key - ) active_counts - on recent_records.unique_key = active_counts.unique_key - where active_count is null or active_count = 0 - ) - {% else %} - -- We have to have a non-empty query, so just do a noop delete - delete from {{ this }} where 1=0 - {% endif %} - ","drop view _airbyte_test_normalization_bhhpj.dedup_exchange_rate_stg"], - tags = [ "top-level" ] -) }} --- depends_on: ref('dedup_exchange_rate_stg') -with -{% if is_incremental() %} -new_data as ( - -- retrieve incremental "new" data - select - * - from {{ ref('dedup_exchange_rate_stg') }} - -- dedup_exchange_rate from {{ source('test_normalization_bhhpj', '_airbyte_raw_dedup_exchange_rate') }} - where 1 = 1 - {{ incremental_clause('_airbyte_emitted_at', this) }} -), -new_data_ids as ( - -- build a subset of _airbyte_unique_key from rows that are new - select distinct - {{ dbt_utils.surrogate_key([ - 'id', - 'currency', - 'nzd', - ]) }} as _airbyte_unique_key - from new_data -), -empty_new_data as ( - -- build an empty table to only keep the table's column types - select * from new_data where 1 = 0 -), -previous_active_scd_data as ( - -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes - select - {{ star_intersect(ref('dedup_exchange_rate_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} - from {{ this }} as this_data - -- make a join with new_data using primary key to filter active data that need to be updated only - join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key - -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) - left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id - where _airbyte_active_row = 1 -), -input_data as ( - select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from new_data - union all - select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from previous_active_scd_data -), -{% else %} -input_data as ( - select * - from {{ ref('dedup_exchange_rate_stg') }} - -- dedup_exchange_rate from {{ source('test_normalization_bhhpj', '_airbyte_raw_dedup_exchange_rate') }} -), -{% endif %} -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - {{ dbt_utils.surrogate_key([ - 'id', - 'currency', - 'nzd', - ]) }} as _airbyte_unique_key, - id, - currency, - date, - timestamp_col, - {{ adapter.quote('hkd@spéçiäl & characters') }}, - hkd_special___characters, - nzd, - usd, - date as _airbyte_start_at, - lag(date) over ( - partition by id, currency, cast(nzd as {{ dbt_utils.type_string() }}) - order by - date is null asc, - date desc, - _airbyte_emitted_at desc - ) as _airbyte_end_at, - case when row_number() over ( - partition by id, currency, cast(nzd as {{ dbt_utils.type_string() }}) - order by - date is null asc, - date desc, - _airbyte_emitted_at desc - ) = 1 then 1 else 0 end as _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_dedup_exchange_rate_hashid - from input_data -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by - _airbyte_unique_key, - _airbyte_start_at, - _airbyte_emitted_at - order by _airbyte_active_row desc, _airbyte_ab_id - ) as _airbyte_row_num, - {{ dbt_utils.surrogate_key([ - '_airbyte_unique_key', - '_airbyte_start_at', - '_airbyte_emitted_at' - ]) }} as _airbyte_unique_key_scd, - scd_data.* - from scd_data -) -select - _airbyte_unique_key, - _airbyte_unique_key_scd, - id, - currency, - date, - timestamp_col, - {{ adapter.quote('hkd@spéçiäl & characters') }}, - hkd_special___characters, - nzd, - usd, - _airbyte_start_at, - _airbyte_end_at, - _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_dedup_exchange_rate_hashid -from dedup_data where _airbyte_row_num = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql deleted file mode 100644 index 8f8fd8c8e9bc7..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql +++ /dev/null @@ -1,28 +0,0 @@ -{{ config( - sort = ["_airbyte_unique_key", "_airbyte_emitted_at"], - unique_key = "_airbyte_unique_key", - schema = "test_normalization_bhhpj", - tags = [ "top-level" ] -) }} --- Final base SQL model --- depends_on: {{ ref('dedup_exchange_rate_scd') }} -select - _airbyte_unique_key, - id, - currency, - date, - timestamp_col, - {{ adapter.quote('hkd@spéçiäl & characters') }}, - hkd_special___characters, - nzd, - usd, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_dedup_exchange_rate_hashid -from {{ ref('dedup_exchange_rate_scd') }} --- dedup_exchange_rate from {{ source('test_normalization_bhhpj', '_airbyte_raw_dedup_exchange_rate') }} -where 1 = 1 -and _airbyte_active_row = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql deleted file mode 100644 index a66a0b168c2e4..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql +++ /dev/null @@ -1,26 +0,0 @@ -{{ config( - sort = "_airbyte_emitted_at", - unique_key = '_airbyte_ab_id', - schema = "test_normalization_bhhpj", - tags = [ "top-level" ] -) }} --- Final base SQL model --- depends_on: {{ ref('exchange_rate_ab3') }} -select - id, - currency, - date, - timestamp_col, - {{ adapter.quote('hkd@spéçiäl & characters') }}, - hkd_special___characters, - nzd, - usd, - {{ adapter.quote('column`_\'with""_quotes') }}, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_exchange_rate_hashid -from {{ ref('exchange_rate_ab3') }} --- exchange_rate from {{ source('test_normalization_bhhpj', '_airbyte_raw_exchange_rate') }} -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql deleted file mode 100644 index db45cc80a67aa..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql +++ /dev/null @@ -1,25 +0,0 @@ -{{ config( - sort = "_airbyte_emitted_at", - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization_bhhpj", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to build a hash column based on the values of this record --- depends_on: {{ ref('dedup_exchange_rate_ab2') }} -select - {{ dbt_utils.surrogate_key([ - 'id', - 'currency', - 'date', - 'timestamp_col', - adapter.quote('hkd@spéçiäl & characters'), - 'hkd_special___characters', - 'nzd', - 'usd', - ]) }} as _airbyte_dedup_exchange_rate_hashid, - tmp.* -from {{ ref('dedup_exchange_rate_ab2') }} tmp --- dedup_exchange_rate -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/sources.yml deleted file mode 100644 index 6aa768851a80c..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/sources.yml +++ /dev/null @@ -1,16 +0,0 @@ -version: 2 -sources: -- name: test_normalization_bhhpj - quoting: - database: true - schema: false - identifier: false - tables: - - name: _airbyte_raw_1_prefix_startwith_number - - name: _airbyte_raw_dedup_cdc_excluded - - name: _airbyte_raw_dedup_exchange_rate - - name: _airbyte_raw_exchange_rate - - name: _airbyte_raw_multiple_column_names_conflicts - - name: _airbyte_raw_pos_dedup_cdcx - - name: _airbyte_raw_renamed_dedup_cdc_excluded - - name: _airbyte_raw_types_testing diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql deleted file mode 100644 index cfb1d029d88ff..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql +++ /dev/null @@ -1,25 +0,0 @@ -{{ config( - sort = "_airbyte_emitted_at", - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization_bhhpj", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ source('test_normalization_bhhpj', '_airbyte_raw_dedup_exchange_rate') }} -select - {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as id, - {{ json_extract_scalar('_airbyte_data', ['currency'], ['currency']) }} as currency, - {{ json_extract_scalar('_airbyte_data', ['new_column'], ['new_column']) }} as new_column, - {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as date, - {{ json_extract_scalar('_airbyte_data', ['timestamp_col'], ['timestamp_col']) }} as timestamp_col, - {{ json_extract_scalar('_airbyte_data', ['HKD@spéçiäl & characters'], ['HKD@spéçiäl & characters']) }} as {{ adapter.quote('hkd@spéçiäl & characters') }}, - {{ json_extract_scalar('_airbyte_data', ['NZD'], ['NZD']) }} as nzd, - {{ json_extract_scalar('_airbyte_data', ['USD'], ['USD']) }} as usd, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ source('test_normalization_bhhpj', '_airbyte_raw_dedup_exchange_rate') }} as table_alias --- dedup_exchange_rate -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql deleted file mode 100644 index 2a9275c69a1ec..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql +++ /dev/null @@ -1,25 +0,0 @@ -{{ config( - sort = "_airbyte_emitted_at", - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization_bhhpj", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: {{ ref('dedup_exchange_rate_ab1') }} -select - cast(id as {{ dbt_utils.type_float() }}) as id, - cast(currency as {{ dbt_utils.type_string() }}) as currency, - cast(new_column as {{ dbt_utils.type_float() }}) as new_column, - cast({{ empty_string_to_null('date') }} as {{ type_date() }}) as date, - cast({{ empty_string_to_null('timestamp_col') }} as {{ type_timestamp_with_timezone() }}) as timestamp_col, - cast({{ adapter.quote('hkd@spéçiäl & characters') }} as {{ dbt_utils.type_float() }}) as {{ adapter.quote('hkd@spéçiäl & characters') }}, - cast(nzd as {{ dbt_utils.type_float() }}) as nzd, - cast(usd as {{ dbt_utils.type_bigint() }}) as usd, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at -from {{ ref('dedup_exchange_rate_ab1') }} --- dedup_exchange_rate -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql deleted file mode 100644 index 9f8c382ff834b..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ /dev/null @@ -1,177 +0,0 @@ -{{ config( - sort = ["_airbyte_active_row", "_airbyte_unique_key_scd", "_airbyte_emitted_at"], - unique_key = "_airbyte_unique_key_scd", - schema = "test_normalization_bhhpj", - post_hook = [" - {% - set final_table_relation = adapter.get_relation( - database=this.database, - schema=this.schema, - identifier='dedup_exchange_rate' - ) - %} - {# - If the final table doesn't exist, then obviously we can't delete anything from it. - Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) - So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) - #} - {% - if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') - %} - -- Delete records which are no longer active: - -- This query is equivalent, but the left join version is more performant: - -- delete from final_table where unique_key in ( - -- select unique_key from scd_table where 1 = 1 - -- ) and unique_key not in ( - -- select unique_key from scd_table where active_row = 1 - -- ) - -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD - -- entries that were _updated_ recently. This is because a deleted record will have an SCD record - -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. - delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select recent_records.unique_key - from ( - select distinct _airbyte_unique_key as unique_key - from {{ this }} - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - ) recent_records - left join ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - group by _airbyte_unique_key - ) active_counts - on recent_records.unique_key = active_counts.unique_key - where active_count is null or active_count = 0 - ) - {% else %} - -- We have to have a non-empty query, so just do a noop delete - delete from {{ this }} where 1=0 - {% endif %} - ","drop view _airbyte_test_normalization_bhhpj.dedup_exchange_rate_stg"], - tags = [ "top-level" ] -) }} --- depends_on: ref('dedup_exchange_rate_stg') -with -{% if is_incremental() %} -new_data as ( - -- retrieve incremental "new" data - select - * - from {{ ref('dedup_exchange_rate_stg') }} - -- dedup_exchange_rate from {{ source('test_normalization_bhhpj', '_airbyte_raw_dedup_exchange_rate') }} - where 1 = 1 - {{ incremental_clause('_airbyte_emitted_at', this) }} -), -new_data_ids as ( - -- build a subset of _airbyte_unique_key from rows that are new - select distinct - {{ dbt_utils.surrogate_key([ - 'id', - 'currency', - 'nzd', - ]) }} as _airbyte_unique_key - from new_data -), -empty_new_data as ( - -- build an empty table to only keep the table's column types - select * from new_data where 1 = 0 -), -previous_active_scd_data as ( - -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes - select - {{ star_intersect(ref('dedup_exchange_rate_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} - from {{ this }} as this_data - -- make a join with new_data using primary key to filter active data that need to be updated only - join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key - -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) - left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id - where _airbyte_active_row = 1 -), -input_data as ( - select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from new_data - union all - select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from previous_active_scd_data -), -{% else %} -input_data as ( - select * - from {{ ref('dedup_exchange_rate_stg') }} - -- dedup_exchange_rate from {{ source('test_normalization_bhhpj', '_airbyte_raw_dedup_exchange_rate') }} -), -{% endif %} -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - {{ dbt_utils.surrogate_key([ - 'id', - 'currency', - 'nzd', - ]) }} as _airbyte_unique_key, - id, - currency, - new_column, - date, - timestamp_col, - {{ adapter.quote('hkd@spéçiäl & characters') }}, - nzd, - usd, - date as _airbyte_start_at, - lag(date) over ( - partition by cast(id as {{ dbt_utils.type_string() }}), currency, cast(nzd as {{ dbt_utils.type_string() }}) - order by - date is null asc, - date desc, - _airbyte_emitted_at desc - ) as _airbyte_end_at, - case when row_number() over ( - partition by cast(id as {{ dbt_utils.type_string() }}), currency, cast(nzd as {{ dbt_utils.type_string() }}) - order by - date is null asc, - date desc, - _airbyte_emitted_at desc - ) = 1 then 1 else 0 end as _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_dedup_exchange_rate_hashid - from input_data -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by - _airbyte_unique_key, - _airbyte_start_at, - _airbyte_emitted_at - order by _airbyte_active_row desc, _airbyte_ab_id - ) as _airbyte_row_num, - {{ dbt_utils.surrogate_key([ - '_airbyte_unique_key', - '_airbyte_start_at', - '_airbyte_emitted_at' - ]) }} as _airbyte_unique_key_scd, - scd_data.* - from scd_data -) -select - _airbyte_unique_key, - _airbyte_unique_key_scd, - id, - currency, - new_column, - date, - timestamp_col, - {{ adapter.quote('hkd@spéçiäl & characters') }}, - nzd, - usd, - _airbyte_start_at, - _airbyte_end_at, - _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_dedup_exchange_rate_hashid -from dedup_data where _airbyte_row_num = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql deleted file mode 100644 index c5fed3b30237f..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql +++ /dev/null @@ -1,28 +0,0 @@ -{{ config( - sort = ["_airbyte_unique_key", "_airbyte_emitted_at"], - unique_key = "_airbyte_unique_key", - schema = "test_normalization_bhhpj", - tags = [ "top-level" ] -) }} --- Final base SQL model --- depends_on: {{ ref('dedup_exchange_rate_scd') }} -select - _airbyte_unique_key, - id, - currency, - new_column, - date, - timestamp_col, - {{ adapter.quote('hkd@spéçiäl & characters') }}, - nzd, - usd, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_dedup_exchange_rate_hashid -from {{ ref('dedup_exchange_rate_scd') }} --- dedup_exchange_rate from {{ source('test_normalization_bhhpj', '_airbyte_raw_dedup_exchange_rate') }} -where 1 = 1 -and _airbyte_active_row = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_tables/test_normalization/exchange_rate.sql deleted file mode 100644 index 9a7a498cc3754..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_tables/test_normalization/exchange_rate.sql +++ /dev/null @@ -1,26 +0,0 @@ -{{ config( - sort = "_airbyte_emitted_at", - unique_key = '_airbyte_ab_id', - schema = "test_normalization_bhhpj", - tags = [ "top-level" ] -) }} --- Final base SQL model --- depends_on: {{ ref('exchange_rate_ab3') }} -select - id, - currency, - new_column, - date, - timestamp_col, - {{ adapter.quote('hkd@spéçiäl & characters') }}, - nzd, - usd, - {{ adapter.quote('column`_\'with""_quotes') }}, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_exchange_rate_hashid -from {{ ref('exchange_rate_ab3') }} --- exchange_rate from {{ source('test_normalization_bhhpj', '_airbyte_raw_exchange_rate') }} -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql deleted file mode 100644 index 9d10a9ea94901..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql +++ /dev/null @@ -1,25 +0,0 @@ -{{ config( - sort = "_airbyte_emitted_at", - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization_bhhpj", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to build a hash column based on the values of this record --- depends_on: {{ ref('dedup_exchange_rate_ab2') }} -select - {{ dbt_utils.surrogate_key([ - 'id', - 'currency', - 'new_column', - 'date', - 'timestamp_col', - adapter.quote('hkd@spéçiäl & characters'), - 'nzd', - 'usd', - ]) }} as _airbyte_dedup_exchange_rate_hashid, - tmp.* -from {{ ref('dedup_exchange_rate_ab2') }} tmp --- dedup_exchange_rate -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/sources.yml deleted file mode 100644 index 4daf898b3002b..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/sources.yml +++ /dev/null @@ -1,12 +0,0 @@ -version: 2 -sources: -- name: test_normalization_bhhpj - quoting: - database: true - schema: false - identifier: false - tables: - - name: _airbyte_raw_dedup_cdc_excluded - - name: _airbyte_raw_dedup_exchange_rate - - name: _airbyte_raw_exchange_rate - - name: _airbyte_raw_renamed_dedup_cdc_excluded diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql deleted file mode 100644 index de775a2e5c164..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ /dev/null @@ -1,15 +0,0 @@ - - - delete from "integrationtests".test_normalization_bhhpj."dedup_exchange_rate_scd" - where (_airbyte_unique_key_scd) in ( - select (_airbyte_unique_key_scd) - from "dedup_exchange_rate_scd__dbt_tmp" - ); - - - insert into "integrationtests".test_normalization_bhhpj."dedup_exchange_rate_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "currency", "date", "timestamp_col", "hkd@spéçiäl & characters", "hkd_special___characters", "nzd", "usd", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid") - ( - select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "currency", "date", "timestamp_col", "hkd@spéçiäl & characters", "hkd_special___characters", "nzd", "usd", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid" - from "dedup_exchange_rate_scd__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql deleted file mode 100644 index 372889fb42bda..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql +++ /dev/null @@ -1,15 +0,0 @@ - - - delete from "integrationtests".test_normalization_bhhpj."dedup_exchange_rate" - where (_airbyte_unique_key) in ( - select (_airbyte_unique_key) - from "dedup_exchange_rate__dbt_tmp" - ); - - - insert into "integrationtests".test_normalization_bhhpj."dedup_exchange_rate" ("_airbyte_unique_key", "id", "currency", "date", "timestamp_col", "hkd@spéçiäl & characters", "hkd_special___characters", "nzd", "usd", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid") - ( - select "_airbyte_unique_key", "id", "currency", "date", "timestamp_col", "hkd@spéçiäl & characters", "hkd_special___characters", "nzd", "usd", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid" - from "dedup_exchange_rate__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql deleted file mode 100644 index e2bd3830cb423..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql +++ /dev/null @@ -1,92 +0,0 @@ - - - create table - "integrationtests".test_normalization_bhhpj."exchange_rate__dbt_tmp" - - - compound sortkey(_airbyte_emitted_at) - - as ( - -with __dbt__cte__exchange_rate_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "integrationtests".test_normalization_bhhpj._airbyte_raw_exchange_rate -select - case when _airbyte_data."id" != '' then _airbyte_data."id" end as id, - case when _airbyte_data."currency" != '' then _airbyte_data."currency" end as currency, - case when _airbyte_data."date" != '' then _airbyte_data."date" end as date, - case when _airbyte_data."timestamp_col" != '' then _airbyte_data."timestamp_col" end as timestamp_col, - case when _airbyte_data."HKD@spéçiäl & characters" != '' then _airbyte_data."HKD@spéçiäl & characters" end as "hkd@spéçiäl & characters", - case when _airbyte_data."HKD_special___characters" != '' then _airbyte_data."HKD_special___characters" end as hkd_special___characters, - case when _airbyte_data."NZD" != '' then _airbyte_data."NZD" end as nzd, - case when _airbyte_data."USD" != '' then _airbyte_data."USD" end as usd, - case when _airbyte_data."column`_'with""_quotes" != '' then _airbyte_data."column`_'with""_quotes" end as "column`_'with""_quotes", - _airbyte_ab_id, - _airbyte_emitted_at, - getdate() as _airbyte_normalized_at -from "integrationtests".test_normalization_bhhpj._airbyte_raw_exchange_rate as table_alias --- exchange_rate -where 1 = 1 -), __dbt__cte__exchange_rate_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__exchange_rate_ab1 -select - cast(id as - bigint -) as id, - cast(currency as text) as currency, - cast(nullif(date::varchar, '') as - date -) as date, - cast(nullif(timestamp_col::varchar, '') as - timestamp with time zone -) as timestamp_col, - cast("hkd@spéçiäl & characters" as - float -) as "hkd@spéçiäl & characters", - cast(hkd_special___characters as text) as hkd_special___characters, - cast(nzd as - float -) as nzd, - cast(usd as - float -) as usd, - cast("column`_'with""_quotes" as text) as "column`_'with""_quotes", - _airbyte_ab_id, - _airbyte_emitted_at, - getdate() as _airbyte_normalized_at -from __dbt__cte__exchange_rate_ab1 --- exchange_rate -where 1 = 1 -), __dbt__cte__exchange_rate_ab3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__exchange_rate_ab2 -select - md5(cast(coalesce(cast(id as text), '') || '-' || coalesce(cast(currency as text), '') || '-' || coalesce(cast(date as text), '') || '-' || coalesce(cast(timestamp_col as text), '') || '-' || coalesce(cast("hkd@spéçiäl & characters" as text), '') || '-' || coalesce(cast(hkd_special___characters as text), '') || '-' || coalesce(cast(nzd as text), '') || '-' || coalesce(cast(usd as text), '') || '-' || coalesce(cast("column`_'with""_quotes" as text), '') as text)) as _airbyte_exchange_rate_hashid, - tmp.* -from __dbt__cte__exchange_rate_ab2 tmp --- exchange_rate -where 1 = 1 -)-- Final base SQL model --- depends_on: __dbt__cte__exchange_rate_ab3 -select - id, - currency, - date, - timestamp_col, - "hkd@spéçiäl & characters", - hkd_special___characters, - nzd, - usd, - "column`_'with""_quotes", - _airbyte_ab_id, - _airbyte_emitted_at, - getdate() as _airbyte_normalized_at, - _airbyte_exchange_rate_hashid -from __dbt__cte__exchange_rate_ab3 --- exchange_rate from "integrationtests".test_normalization_bhhpj._airbyte_raw_exchange_rate -where 1 = 1 - ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql deleted file mode 100644 index 903a3141f6256..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql +++ /dev/null @@ -1,66 +0,0 @@ - - - create view "integrationtests"._airbyte_test_normalization_bhhpj."dedup_exchange_rate_stg__dbt_tmp" as ( - -with __dbt__cte__dedup_exchange_rate_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "integrationtests".test_normalization_bhhpj._airbyte_raw_dedup_exchange_rate -select - case when _airbyte_data."id" != '' then _airbyte_data."id" end as id, - case when _airbyte_data."currency" != '' then _airbyte_data."currency" end as currency, - case when _airbyte_data."date" != '' then _airbyte_data."date" end as date, - case when _airbyte_data."timestamp_col" != '' then _airbyte_data."timestamp_col" end as timestamp_col, - case when _airbyte_data."HKD@spéçiäl & characters" != '' then _airbyte_data."HKD@spéçiäl & characters" end as "hkd@spéçiäl & characters", - case when _airbyte_data."HKD_special___characters" != '' then _airbyte_data."HKD_special___characters" end as hkd_special___characters, - case when _airbyte_data."NZD" != '' then _airbyte_data."NZD" end as nzd, - case when _airbyte_data."USD" != '' then _airbyte_data."USD" end as usd, - _airbyte_ab_id, - _airbyte_emitted_at, - getdate() as _airbyte_normalized_at -from "integrationtests".test_normalization_bhhpj._airbyte_raw_dedup_exchange_rate as table_alias --- dedup_exchange_rate -where 1 = 1 - -), __dbt__cte__dedup_exchange_rate_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__dedup_exchange_rate_ab1 -select - cast(id as - bigint -) as id, - cast(currency as text) as currency, - cast(nullif(date::varchar, '') as - date -) as date, - cast(nullif(timestamp_col::varchar, '') as - timestamp with time zone -) as timestamp_col, - cast("hkd@spéçiäl & characters" as - float -) as "hkd@spéçiäl & characters", - cast(hkd_special___characters as text) as hkd_special___characters, - cast(nzd as - float -) as nzd, - cast(usd as - float -) as usd, - _airbyte_ab_id, - _airbyte_emitted_at, - getdate() as _airbyte_normalized_at -from __dbt__cte__dedup_exchange_rate_ab1 --- dedup_exchange_rate -where 1 = 1 - -)-- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__dedup_exchange_rate_ab2 -select - md5(cast(coalesce(cast(id as text), '') || '-' || coalesce(cast(currency as text), '') || '-' || coalesce(cast(date as text), '') || '-' || coalesce(cast(timestamp_col as text), '') || '-' || coalesce(cast("hkd@spéçiäl & characters" as text), '') || '-' || coalesce(cast(hkd_special___characters as text), '') || '-' || coalesce(cast(nzd as text), '') || '-' || coalesce(cast(usd as text), '') as text)) as _airbyte_dedup_exchange_rate_hashid, - tmp.* -from __dbt__cte__dedup_exchange_rate_ab2 tmp --- dedup_exchange_rate -where 1 = 1 - - ) ; diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql deleted file mode 100644 index a193db25eb236..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ /dev/null @@ -1,15 +0,0 @@ - - - delete from "integrationtests".test_normalization_bhhpj."dedup_exchange_rate_scd" - where (_airbyte_unique_key_scd) in ( - select (_airbyte_unique_key_scd) - from "dedup_exchange_rate_scd__dbt_tmp" - ); - - - insert into "integrationtests".test_normalization_bhhpj."dedup_exchange_rate_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "currency", "new_column", "date", "timestamp_col", "hkd@spéçiäl & characters", "nzd", "usd", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid") - ( - select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "currency", "new_column", "date", "timestamp_col", "hkd@spéçiäl & characters", "nzd", "usd", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid" - from "dedup_exchange_rate_scd__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql deleted file mode 100644 index 6afa610cc7215..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql +++ /dev/null @@ -1,15 +0,0 @@ - - - delete from "integrationtests".test_normalization_bhhpj."dedup_exchange_rate" - where (_airbyte_unique_key) in ( - select (_airbyte_unique_key) - from "dedup_exchange_rate__dbt_tmp" - ); - - - insert into "integrationtests".test_normalization_bhhpj."dedup_exchange_rate" ("_airbyte_unique_key", "id", "currency", "new_column", "date", "timestamp_col", "hkd@spéçiäl & characters", "nzd", "usd", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid") - ( - select "_airbyte_unique_key", "id", "currency", "new_column", "date", "timestamp_col", "hkd@spéçiäl & characters", "nzd", "usd", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid" - from "dedup_exchange_rate__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_tables/test_normalization/exchange_rate.sql deleted file mode 100644 index 031baa2a7efbe..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_tables/test_normalization/exchange_rate.sql +++ /dev/null @@ -1,94 +0,0 @@ - - - create table - "integrationtests".test_normalization_bhhpj."exchange_rate__dbt_tmp" - - - compound sortkey(_airbyte_emitted_at) - - as ( - -with __dbt__cte__exchange_rate_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "integrationtests".test_normalization_bhhpj._airbyte_raw_exchange_rate -select - case when _airbyte_data."id" != '' then _airbyte_data."id" end as id, - case when _airbyte_data."currency" != '' then _airbyte_data."currency" end as currency, - case when _airbyte_data."new_column" != '' then _airbyte_data."new_column" end as new_column, - case when _airbyte_data."date" != '' then _airbyte_data."date" end as date, - case when _airbyte_data."timestamp_col" != '' then _airbyte_data."timestamp_col" end as timestamp_col, - case when _airbyte_data."HKD@spéçiäl & characters" != '' then _airbyte_data."HKD@spéçiäl & characters" end as "hkd@spéçiäl & characters", - case when _airbyte_data."NZD" != '' then _airbyte_data."NZD" end as nzd, - case when _airbyte_data."USD" != '' then _airbyte_data."USD" end as usd, - case when _airbyte_data."column`_'with""_quotes" != '' then _airbyte_data."column`_'with""_quotes" end as "column`_'with""_quotes", - _airbyte_ab_id, - _airbyte_emitted_at, - getdate() as _airbyte_normalized_at -from "integrationtests".test_normalization_bhhpj._airbyte_raw_exchange_rate as table_alias --- exchange_rate -where 1 = 1 -), __dbt__cte__exchange_rate_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__exchange_rate_ab1 -select - cast(id as - float -) as id, - cast(currency as text) as currency, - cast(new_column as - float -) as new_column, - cast(nullif(date::varchar, '') as - date -) as date, - cast(nullif(timestamp_col::varchar, '') as - timestamp with time zone -) as timestamp_col, - cast("hkd@spéçiäl & characters" as - float -) as "hkd@spéçiäl & characters", - cast(nzd as - float -) as nzd, - cast(usd as - float -) as usd, - cast("column`_'with""_quotes" as text) as "column`_'with""_quotes", - _airbyte_ab_id, - _airbyte_emitted_at, - getdate() as _airbyte_normalized_at -from __dbt__cte__exchange_rate_ab1 --- exchange_rate -where 1 = 1 -), __dbt__cte__exchange_rate_ab3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__exchange_rate_ab2 -select - md5(cast(coalesce(cast(id as text), '') || '-' || coalesce(cast(currency as text), '') || '-' || coalesce(cast(new_column as text), '') || '-' || coalesce(cast(date as text), '') || '-' || coalesce(cast(timestamp_col as text), '') || '-' || coalesce(cast("hkd@spéçiäl & characters" as text), '') || '-' || coalesce(cast(nzd as text), '') || '-' || coalesce(cast(usd as text), '') || '-' || coalesce(cast("column`_'with""_quotes" as text), '') as text)) as _airbyte_exchange_rate_hashid, - tmp.* -from __dbt__cte__exchange_rate_ab2 tmp --- exchange_rate -where 1 = 1 -)-- Final base SQL model --- depends_on: __dbt__cte__exchange_rate_ab3 -select - id, - currency, - new_column, - date, - timestamp_col, - "hkd@spéçiäl & characters", - nzd, - usd, - "column`_'with""_quotes", - _airbyte_ab_id, - _airbyte_emitted_at, - getdate() as _airbyte_normalized_at, - _airbyte_exchange_rate_hashid -from __dbt__cte__exchange_rate_ab3 --- exchange_rate from "integrationtests".test_normalization_bhhpj._airbyte_raw_exchange_rate -where 1 = 1 - ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql deleted file mode 100644 index 8c9d36dd07d19..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql +++ /dev/null @@ -1,68 +0,0 @@ - - - create view "integrationtests"._airbyte_test_normalization_bhhpj."dedup_exchange_rate_stg__dbt_tmp" as ( - -with __dbt__cte__dedup_exchange_rate_ab1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "integrationtests".test_normalization_bhhpj._airbyte_raw_dedup_exchange_rate -select - case when _airbyte_data."id" != '' then _airbyte_data."id" end as id, - case when _airbyte_data."currency" != '' then _airbyte_data."currency" end as currency, - case when _airbyte_data."new_column" != '' then _airbyte_data."new_column" end as new_column, - case when _airbyte_data."date" != '' then _airbyte_data."date" end as date, - case when _airbyte_data."timestamp_col" != '' then _airbyte_data."timestamp_col" end as timestamp_col, - case when _airbyte_data."HKD@spéçiäl & characters" != '' then _airbyte_data."HKD@spéçiäl & characters" end as "hkd@spéçiäl & characters", - case when _airbyte_data."NZD" != '' then _airbyte_data."NZD" end as nzd, - case when _airbyte_data."USD" != '' then _airbyte_data."USD" end as usd, - _airbyte_ab_id, - _airbyte_emitted_at, - getdate() as _airbyte_normalized_at -from "integrationtests".test_normalization_bhhpj._airbyte_raw_dedup_exchange_rate as table_alias --- dedup_exchange_rate -where 1 = 1 - -), __dbt__cte__dedup_exchange_rate_ab2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__dedup_exchange_rate_ab1 -select - cast(id as - float -) as id, - cast(currency as text) as currency, - cast(new_column as - float -) as new_column, - cast(nullif(date::varchar, '') as - date -) as date, - cast(nullif(timestamp_col::varchar, '') as - timestamp with time zone -) as timestamp_col, - cast("hkd@spéçiäl & characters" as - float -) as "hkd@spéçiäl & characters", - cast(nzd as - float -) as nzd, - cast(usd as - bigint -) as usd, - _airbyte_ab_id, - _airbyte_emitted_at, - getdate() as _airbyte_normalized_at -from __dbt__cte__dedup_exchange_rate_ab1 --- dedup_exchange_rate -where 1 = 1 - -)-- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__dedup_exchange_rate_ab2 -select - md5(cast(coalesce(cast(id as text), '') || '-' || coalesce(cast(currency as text), '') || '-' || coalesce(cast(new_column as text), '') || '-' || coalesce(cast(date as text), '') || '-' || coalesce(cast(timestamp_col as text), '') || '-' || coalesce(cast("hkd@spéçiäl & characters" as text), '') || '-' || coalesce(cast(nzd as text), '') || '-' || coalesce(cast(usd as text), '') as text)) as _airbyte_dedup_exchange_rate_hashid, - tmp.* -from __dbt__cte__dedup_exchange_rate_ab2 tmp --- dedup_exchange_rate -where 1 = 1 - - ) ; diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/dbt_project.yml deleted file mode 100644 index 8a64d6b8085ff..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/dbt_project.yml +++ /dev/null @@ -1,126 +0,0 @@ -name: airbyte_utils -version: '1.0' -config-version: 2 -profile: normalize -model-paths: -- models -docs-paths: -- docs -analysis-paths: -- analysis -test-paths: -- tests -seed-paths: -- data -macro-paths: -- macros -target-path: ../build -log-path: ../logs -packages-install-path: /dbt -clean-targets: -- build -- dbt_modules -quoting: - database: true - schema: false - identifier: true -models: - +transient: false - airbyte_utils: - +materialized: table - generated: - airbyte_ctes: - +tags: airbyte_internal_cte - +materialized: ephemeral - airbyte_incremental: - +tags: incremental_tables - +materialized: incremental - +on_schema_change: sync_all_columns - airbyte_tables: - +tags: normalized_tables - +materialized: table - airbyte_views: - +tags: airbyte_internal_views - +materialized: view -dispatch: -- macro_namespace: dbt_utils - search_order: - - airbyte_utils - - dbt_utils -vars: - json_column: _airbyte_data - models_to_source: - NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES - NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES - NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG: TEST_NORMALIZATION._AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES - NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD: TEST_NORMALIZATION._AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES - NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES: TEST_NORMALIZATION._AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES - NON_NESTED_STREAM_WITHOUT_NAMESPACE_RESULTING_INTO_LONG_NAMES_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_NON_NESTED_STREAM_WITHOUT_NAMESPACE_RESULTING_INTO_LONG_NAMES - NON_NESTED_STREAM_WITHOUT_NAMESPACE_RESULTING_INTO_LONG_NAMES_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_NON_NESTED_STREAM_WITHOUT_NAMESPACE_RESULTING_INTO_LONG_NAMES - NON_NESTED_STREAM_WITHOUT_NAMESPACE_RESULTING_INTO_LONG_NAMES_AB3: TEST_NORMALIZATION._AIRBYTE_RAW_NON_NESTED_STREAM_WITHOUT_NAMESPACE_RESULTING_INTO_LONG_NAMES - NON_NESTED_STREAM_WITHOUT_NAMESPACE_RESULTING_INTO_LONG_NAMES: TEST_NORMALIZATION._AIRBYTE_RAW_NON_NESTED_STREAM_WITHOUT_NAMESPACE_RESULTING_INTO_LONG_NAMES - SOME_STREAM_THAT_WAS_EMPTY_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_SOME_STREAM_THAT_WAS_EMPTY - SOME_STREAM_THAT_WAS_EMPTY_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_SOME_STREAM_THAT_WAS_EMPTY - SOME_STREAM_THAT_WAS_EMPTY_STG: TEST_NORMALIZATION._AIRBYTE_RAW_SOME_STREAM_THAT_WAS_EMPTY - SOME_STREAM_THAT_WAS_EMPTY_SCD: TEST_NORMALIZATION._AIRBYTE_RAW_SOME_STREAM_THAT_WAS_EMPTY - SOME_STREAM_THAT_WAS_EMPTY: TEST_NORMALIZATION._AIRBYTE_RAW_SOME_STREAM_THAT_WAS_EMPTY - SIMPLE_STREAM_WITH_NAMESPACE_RESULTING_INTO_LONG_NAMES_AB1: TEST_NORMALIZATION_NAMESPACE._AIRBYTE_RAW_SIMPLE_STREAM_WITH_NAMESPACE_RESULTING_INTO_LONG_NAMES - SIMPLE_STREAM_WITH_NAMESPACE_RESULTING_INTO_LONG_NAMES_AB2: TEST_NORMALIZATION_NAMESPACE._AIRBYTE_RAW_SIMPLE_STREAM_WITH_NAMESPACE_RESULTING_INTO_LONG_NAMES - SIMPLE_STREAM_WITH_NAMESPACE_RESULTING_INTO_LONG_NAMES_AB3: TEST_NORMALIZATION_NAMESPACE._AIRBYTE_RAW_SIMPLE_STREAM_WITH_NAMESPACE_RESULTING_INTO_LONG_NAMES - SIMPLE_STREAM_WITH_NAMESPACE_RESULTING_INTO_LONG_NAMES: TEST_NORMALIZATION_NAMESPACE._AIRBYTE_RAW_SIMPLE_STREAM_WITH_NAMESPACE_RESULTING_INTO_LONG_NAMES - CONFLICT_STREAM_NAME_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_CONFLICT_STREAM_NAME - CONFLICT_STREAM_NAME_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_CONFLICT_STREAM_NAME - CONFLICT_STREAM_NAME_AB3: TEST_NORMALIZATION._AIRBYTE_RAW_CONFLICT_STREAM_NAME - CONFLICT_STREAM_NAME: TEST_NORMALIZATION._AIRBYTE_RAW_CONFLICT_STREAM_NAME - CONFLICT_STREAM_SCALAR_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_CONFLICT_STREAM_SCALAR - CONFLICT_STREAM_SCALAR_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_CONFLICT_STREAM_SCALAR - CONFLICT_STREAM_SCALAR_AB3: TEST_NORMALIZATION._AIRBYTE_RAW_CONFLICT_STREAM_SCALAR - CONFLICT_STREAM_SCALAR: TEST_NORMALIZATION._AIRBYTE_RAW_CONFLICT_STREAM_SCALAR - CONFLICT_STREAM_ARRAY_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_CONFLICT_STREAM_ARRAY - CONFLICT_STREAM_ARRAY_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_CONFLICT_STREAM_ARRAY - CONFLICT_STREAM_ARRAY_AB3: TEST_NORMALIZATION._AIRBYTE_RAW_CONFLICT_STREAM_ARRAY - CONFLICT_STREAM_ARRAY: TEST_NORMALIZATION._AIRBYTE_RAW_CONFLICT_STREAM_ARRAY - UNNEST_ALIAS_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_UNNEST_ALIAS - UNNEST_ALIAS_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_UNNEST_ALIAS - UNNEST_ALIAS_AB3: TEST_NORMALIZATION._AIRBYTE_RAW_UNNEST_ALIAS - UNNEST_ALIAS: TEST_NORMALIZATION._AIRBYTE_RAW_UNNEST_ALIAS - ARRAYS_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_ARRAYS - ARRAYS_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_ARRAYS - ARRAYS_AB3: TEST_NORMALIZATION._AIRBYTE_RAW_ARRAYS - ARRAYS: TEST_NORMALIZATION._AIRBYTE_RAW_ARRAYS - NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES - NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES - NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_AB3: TEST_NORMALIZATION._AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES - NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION: TEST_NORMALIZATION._AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES - CONFLICT_STREAM_NAME_CONFLICT_STREAM_NAME_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_CONFLICT_STREAM_NAME - CONFLICT_STREAM_NAME_CONFLICT_STREAM_NAME_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_CONFLICT_STREAM_NAME - CONFLICT_STREAM_NAME_CONFLICT_STREAM_NAME_AB3: TEST_NORMALIZATION._AIRBYTE_RAW_CONFLICT_STREAM_NAME - CONFLICT_STREAM_NAME_CONFLICT_STREAM_NAME: TEST_NORMALIZATION._AIRBYTE_RAW_CONFLICT_STREAM_NAME - UNNEST_ALIAS_CHILDREN_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_UNNEST_ALIAS - UNNEST_ALIAS_CHILDREN_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_UNNEST_ALIAS - UNNEST_ALIAS_CHILDREN_AB3: TEST_NORMALIZATION._AIRBYTE_RAW_UNNEST_ALIAS - UNNEST_ALIAS_CHILDREN: TEST_NORMALIZATION._AIRBYTE_RAW_UNNEST_ALIAS - ARRAYS_NESTED_ARRAY_PARENT_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_ARRAYS - ARRAYS_NESTED_ARRAY_PARENT_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_ARRAYS - ARRAYS_NESTED_ARRAY_PARENT_AB3: TEST_NORMALIZATION._AIRBYTE_RAW_ARRAYS - ARRAYS_NESTED_ARRAY_PARENT: TEST_NORMALIZATION._AIRBYTE_RAW_ARRAYS - NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES - NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES - NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA_AB3: TEST_NORMALIZATION._AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES - NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA: TEST_NORMALIZATION._AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES - NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES - NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES - NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA_AB3: TEST_NORMALIZATION._AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES - NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA: TEST_NORMALIZATION._AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES - CONFLICT_STREAM_NAME_CONFLICT_STREAM_NAME_CONFLICT_STREAM_NAME_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_CONFLICT_STREAM_NAME - CONFLICT_STREAM_NAME_CONFLICT_STREAM_NAME_CONFLICT_STREAM_NAME_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_CONFLICT_STREAM_NAME - CONFLICT_STREAM_NAME_CONFLICT_STREAM_NAME_CONFLICT_STREAM_NAME_AB3: TEST_NORMALIZATION._AIRBYTE_RAW_CONFLICT_STREAM_NAME - CONFLICT_STREAM_NAME_CONFLICT_STREAM_NAME_CONFLICT_STREAM_NAME: TEST_NORMALIZATION._AIRBYTE_RAW_CONFLICT_STREAM_NAME - UNNEST_ALIAS_CHILDREN_OWNER_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_UNNEST_ALIAS - UNNEST_ALIAS_CHILDREN_OWNER_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_UNNEST_ALIAS - UNNEST_ALIAS_CHILDREN_OWNER_AB3: TEST_NORMALIZATION._AIRBYTE_RAW_UNNEST_ALIAS - UNNEST_ALIAS_CHILDREN_OWNER: TEST_NORMALIZATION._AIRBYTE_RAW_UNNEST_ALIAS - UNNEST_ALIAS_CHILDREN_OWNER_COLUMN___WITH__QUOTES_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_UNNEST_ALIAS - UNNEST_ALIAS_CHILDREN_OWNER_COLUMN___WITH__QUOTES_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_UNNEST_ALIAS - UNNEST_ALIAS_CHILDREN_OWNER_COLUMN___WITH__QUOTES_AB3: TEST_NORMALIZATION._AIRBYTE_RAW_UNNEST_ALIAS - UNNEST_ALIAS_CHILDREN_OWNER_COLUMN___WITH__QUOTES: TEST_NORMALIZATION._AIRBYTE_RAW_UNNEST_ALIAS diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES.sql deleted file mode 100644 index 8a87924032e44..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES.sql +++ /dev/null @@ -1,24 +0,0 @@ - - - create or replace table "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES" as - (select * from( - --- Final base SQL model --- depends_on: "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD" -select - _AIRBYTE_UNIQUE_KEY, - ID, - DATE, - PARTITION, - _AIRBYTE_AB_ID, - _AIRBYTE_EMITTED_AT, - convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT, - _AIRBYTE_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_HASHID -from "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD" --- NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES from "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION._AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES -where 1 = 1 -and _AIRBYTE_ACTIVE_ROW = 1 - - ) order by (_AIRBYTE_UNIQUE_KEY, _AIRBYTE_EMITTED_AT) - ); - alter table "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES" cluster by (_AIRBYTE_UNIQUE_KEY, _AIRBYTE_EMITTED_AT); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION.sql deleted file mode 100644 index 2695e3388ca1d..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION.sql +++ /dev/null @@ -1,72 +0,0 @@ - - - create or replace table "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION" as - (select * from( - -with __dbt__cte__NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_AB1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD" -select - _AIRBYTE_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_HASHID, - get_path(parse_json(PARTITION), '"double_array_data"') as DOUBLE_ARRAY_DATA, - get_path(parse_json(PARTITION), '"DATA"') as DATA, - _AIRBYTE_AB_ID, - _AIRBYTE_EMITTED_AT, - convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT -from "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD" as table_alias --- PARTITION at nested_stream_with_complex_columns_resulting_into_long_names/partition -where 1 = 1 -and PARTITION is not null - -), __dbt__cte__NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_AB2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_AB1 -select - _AIRBYTE_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_HASHID, - DOUBLE_ARRAY_DATA, - DATA, - _AIRBYTE_AB_ID, - _AIRBYTE_EMITTED_AT, - convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT -from __dbt__cte__NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_AB1 --- PARTITION at nested_stream_with_complex_columns_resulting_into_long_names/partition -where 1 = 1 - -), __dbt__cte__NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_AB3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_AB2 -select - md5(cast(coalesce(cast(_AIRBYTE_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_HASHID as - varchar -), '') || '-' || coalesce(cast(DOUBLE_ARRAY_DATA as - varchar -), '') || '-' || coalesce(cast(DATA as - varchar -), '') as - varchar -)) as _AIRBYTE_PARTITION_HASHID, - tmp.* -from __dbt__cte__NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_AB2 tmp --- PARTITION at nested_stream_with_complex_columns_resulting_into_long_names/partition -where 1 = 1 - -)-- Final base SQL model --- depends_on: __dbt__cte__NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_AB3 -select - _AIRBYTE_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_HASHID, - DOUBLE_ARRAY_DATA, - DATA, - _AIRBYTE_AB_ID, - _AIRBYTE_EMITTED_AT, - convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT, - _AIRBYTE_PARTITION_HASHID -from __dbt__cte__NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_AB3 --- PARTITION at nested_stream_with_complex_columns_resulting_into_long_names/partition from "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD" -where 1 = 1 - - ) order by (_AIRBYTE_EMITTED_AT) - ); - alter table "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION" cluster by (_AIRBYTE_EMITTED_AT); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA.sql deleted file mode 100644 index 436ec4cd9d191..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA.sql +++ /dev/null @@ -1,71 +0,0 @@ - - - create or replace table "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA" as - (select * from( - -with __dbt__cte__NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA_AB1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION" - -select - _AIRBYTE_PARTITION_HASHID, - to_varchar(get_path(parse_json(DATA.value), '"currency"')) as CURRENCY, - _AIRBYTE_AB_ID, - _AIRBYTE_EMITTED_AT, - convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT -from "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION" as table_alias --- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA -cross join table(flatten(DATA)) as DATA -where 1 = 1 -and DATA is not null - -), __dbt__cte__NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA_AB2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA_AB1 -select - _AIRBYTE_PARTITION_HASHID, - cast(CURRENCY as - varchar -) as CURRENCY, - _AIRBYTE_AB_ID, - _AIRBYTE_EMITTED_AT, - convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT -from __dbt__cte__NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA_AB1 --- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA -where 1 = 1 - -), __dbt__cte__NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA_AB3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA_AB2 -select - md5(cast(coalesce(cast(_AIRBYTE_PARTITION_HASHID as - varchar -), '') || '-' || coalesce(cast(CURRENCY as - varchar -), '') as - varchar -)) as _AIRBYTE_DATA_HASHID, - tmp.* -from __dbt__cte__NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA_AB2 tmp --- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA -where 1 = 1 - -)-- Final base SQL model --- depends_on: __dbt__cte__NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA_AB3 -select - _AIRBYTE_PARTITION_HASHID, - CURRENCY, - _AIRBYTE_AB_ID, - _AIRBYTE_EMITTED_AT, - convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT, - _AIRBYTE_DATA_HASHID -from __dbt__cte__NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA_AB3 --- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA from "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION" -where 1 = 1 - - ) order by (_AIRBYTE_EMITTED_AT) - ); - alter table "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA" cluster by (_AIRBYTE_EMITTED_AT); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA.sql deleted file mode 100644 index c5a250dd0bc3f..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA.sql +++ /dev/null @@ -1,71 +0,0 @@ - - - create or replace table "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA" as - (select * from( - -with __dbt__cte__NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA_AB1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION" - -select - _AIRBYTE_PARTITION_HASHID, - to_varchar(get_path(parse_json(DOUBLE_ARRAY_DATA.value), '"id"')) as ID, - _AIRBYTE_AB_ID, - _AIRBYTE_EMITTED_AT, - convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT -from "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION" as table_alias --- DOUBLE_ARRAY_DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data -cross join table(flatten(DOUBLE_ARRAY_DATA)) as DOUBLE_ARRAY_DATA -where 1 = 1 -and DOUBLE_ARRAY_DATA is not null - -), __dbt__cte__NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA_AB2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA_AB1 -select - _AIRBYTE_PARTITION_HASHID, - cast(ID as - varchar -) as ID, - _AIRBYTE_AB_ID, - _AIRBYTE_EMITTED_AT, - convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT -from __dbt__cte__NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA_AB1 --- DOUBLE_ARRAY_DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data -where 1 = 1 - -), __dbt__cte__NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA_AB3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA_AB2 -select - md5(cast(coalesce(cast(_AIRBYTE_PARTITION_HASHID as - varchar -), '') || '-' || coalesce(cast(ID as - varchar -), '') as - varchar -)) as _AIRBYTE_DOUBLE_ARRAY_DATA_HASHID, - tmp.* -from __dbt__cte__NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA_AB2 tmp --- DOUBLE_ARRAY_DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data -where 1 = 1 - -)-- Final base SQL model --- depends_on: __dbt__cte__NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA_AB3 -select - _AIRBYTE_PARTITION_HASHID, - ID, - _AIRBYTE_AB_ID, - _AIRBYTE_EMITTED_AT, - convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT, - _AIRBYTE_DOUBLE_ARRAY_DATA_HASHID -from __dbt__cte__NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA_AB3 --- DOUBLE_ARRAY_DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data from "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION" -where 1 = 1 - - ) order by (_AIRBYTE_EMITTED_AT) - ); - alter table "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA" cluster by (_AIRBYTE_EMITTED_AT); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql deleted file mode 100644 index b3072ce0004ca..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql +++ /dev/null @@ -1,85 +0,0 @@ - - - create or replace table "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD" as - (select * from( - --- depends_on: ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG') -with - -input_data as ( - select * - from "INTEGRATION_TEST_NORMALIZATION"._AIRBYTE_TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG" - -- NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES from "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION._AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES -), - -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - md5(cast(coalesce(cast(ID as - varchar -), '') as - varchar -)) as _AIRBYTE_UNIQUE_KEY, - ID, - DATE, - PARTITION, - DATE as _AIRBYTE_START_AT, - lag(DATE) over ( - partition by ID - order by - DATE is null asc, - DATE desc, - _AIRBYTE_EMITTED_AT desc - ) as _AIRBYTE_END_AT, - case when row_number() over ( - partition by ID - order by - DATE is null asc, - DATE desc, - _AIRBYTE_EMITTED_AT desc - ) = 1 then 1 else 0 end as _AIRBYTE_ACTIVE_ROW, - _AIRBYTE_AB_ID, - _AIRBYTE_EMITTED_AT, - _AIRBYTE_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_HASHID - from input_data -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by - _AIRBYTE_UNIQUE_KEY, - _AIRBYTE_START_AT, - _AIRBYTE_EMITTED_AT - order by _AIRBYTE_ACTIVE_ROW desc, _AIRBYTE_AB_ID - ) as _AIRBYTE_ROW_NUM, - md5(cast(coalesce(cast(_AIRBYTE_UNIQUE_KEY as - varchar -), '') || '-' || coalesce(cast(_AIRBYTE_START_AT as - varchar -), '') || '-' || coalesce(cast(_AIRBYTE_EMITTED_AT as - varchar -), '') as - varchar -)) as _AIRBYTE_UNIQUE_KEY_SCD, - scd_data.* - from scd_data -) -select - _AIRBYTE_UNIQUE_KEY, - _AIRBYTE_UNIQUE_KEY_SCD, - ID, - DATE, - PARTITION, - _AIRBYTE_START_AT, - _AIRBYTE_END_AT, - _AIRBYTE_ACTIVE_ROW, - _AIRBYTE_AB_ID, - _AIRBYTE_EMITTED_AT, - convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT, - _AIRBYTE_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_HASHID -from dedup_data where _AIRBYTE_ROW_NUM = 1 - ) order by (_AIRBYTE_ACTIVE_ROW, _AIRBYTE_UNIQUE_KEY_SCD, _AIRBYTE_EMITTED_AT) - ); - alter table "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD" cluster by (_AIRBYTE_ACTIVE_ROW, _AIRBYTE_UNIQUE_KEY_SCD, _AIRBYTE_EMITTED_AT); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_AB1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_AB1.sql deleted file mode 100644 index 772f1976f2c6d..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_AB1.sql +++ /dev/null @@ -1,20 +0,0 @@ -{{ config( - cluster_by = ["_AIRBYTE_EMITTED_AT"], - unique_key = '_AIRBYTE_AB_ID', - schema = "_AIRBYTE_TEST_NORMALIZATION", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ source('TEST_NORMALIZATION', '_AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES') }} -select - {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as ID, - {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as DATE, - {{ json_extract('table_alias', '_airbyte_data', ['partition'], ['partition']) }} as PARTITION, - _AIRBYTE_AB_ID, - _AIRBYTE_EMITTED_AT, - {{ current_timestamp() }} as _AIRBYTE_NORMALIZED_AT -from {{ source('TEST_NORMALIZATION', '_AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES') }} as table_alias --- NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES -where 1 = 1 -{{ incremental_clause('_AIRBYTE_EMITTED_AT', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_AB2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_AB2.sql deleted file mode 100644 index fd49a8524a645..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_AB2.sql +++ /dev/null @@ -1,20 +0,0 @@ -{{ config( - cluster_by = ["_AIRBYTE_EMITTED_AT"], - unique_key = '_AIRBYTE_AB_ID', - schema = "_AIRBYTE_TEST_NORMALIZATION", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_AB1') }} -select - cast(ID as {{ dbt_utils.type_string() }}) as ID, - cast(DATE as {{ dbt_utils.type_string() }}) as DATE, - cast(PARTITION as {{ type_json() }}) as PARTITION, - _AIRBYTE_AB_ID, - _AIRBYTE_EMITTED_AT, - {{ current_timestamp() }} as _AIRBYTE_NORMALIZED_AT -from {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_AB1') }} --- NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES -where 1 = 1 -{{ incremental_clause('_AIRBYTE_EMITTED_AT', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_AB1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_AB1.sql deleted file mode 100644 index e6c344e6308d2..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_AB1.sql +++ /dev/null @@ -1,20 +0,0 @@ -{{ config( - cluster_by = ["_AIRBYTE_EMITTED_AT"], - schema = "_AIRBYTE_TEST_NORMALIZATION", - tags = [ "nested-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD') }} -select - _AIRBYTE_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_HASHID, - {{ json_extract_array('PARTITION', ['double_array_data'], ['double_array_data']) }} as DOUBLE_ARRAY_DATA, - {{ json_extract_array('PARTITION', ['DATA'], ['DATA']) }} as DATA, - _AIRBYTE_AB_ID, - _AIRBYTE_EMITTED_AT, - {{ current_timestamp() }} as _AIRBYTE_NORMALIZED_AT -from {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD') }} as table_alias --- PARTITION at nested_stream_with_complex_columns_resulting_into_long_names/partition -where 1 = 1 -and PARTITION is not null -{{ incremental_clause('_AIRBYTE_EMITTED_AT', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA_AB1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA_AB1.sql deleted file mode 100644 index 050da953efddd..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA_AB1.sql +++ /dev/null @@ -1,21 +0,0 @@ -{{ config( - cluster_by = ["_AIRBYTE_EMITTED_AT"], - schema = "_AIRBYTE_TEST_NORMALIZATION", - tags = [ "nested-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION') }} -{{ unnest_cte(ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION'), 'PARTITION', 'DATA') }} -select - _AIRBYTE_PARTITION_HASHID, - {{ json_extract_scalar(unnested_column_value('DATA'), ['currency'], ['currency']) }} as CURRENCY, - _AIRBYTE_AB_ID, - _AIRBYTE_EMITTED_AT, - {{ current_timestamp() }} as _AIRBYTE_NORMALIZED_AT -from {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION') }} as table_alias --- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA -{{ cross_join_unnest('PARTITION', 'DATA') }} -where 1 = 1 -and DATA is not null -{{ incremental_clause('_AIRBYTE_EMITTED_AT', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA_AB1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA_AB1.sql deleted file mode 100644 index 13b208068c10a..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA_AB1.sql +++ /dev/null @@ -1,21 +0,0 @@ -{{ config( - cluster_by = ["_AIRBYTE_EMITTED_AT"], - schema = "_AIRBYTE_TEST_NORMALIZATION", - tags = [ "nested-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION') }} -{{ unnest_cte(ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION'), 'PARTITION', 'DOUBLE_ARRAY_DATA') }} -select - _AIRBYTE_PARTITION_HASHID, - {{ json_extract_scalar(unnested_column_value('DOUBLE_ARRAY_DATA'), ['id'], ['id']) }} as ID, - _AIRBYTE_AB_ID, - _AIRBYTE_EMITTED_AT, - {{ current_timestamp() }} as _AIRBYTE_NORMALIZED_AT -from {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION') }} as table_alias --- DOUBLE_ARRAY_DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data -{{ cross_join_unnest('PARTITION', 'DOUBLE_ARRAY_DATA') }} -where 1 = 1 -and DOUBLE_ARRAY_DATA is not null -{{ incremental_clause('_AIRBYTE_EMITTED_AT', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES.sql deleted file mode 100644 index 110c17ef216dc..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES.sql +++ /dev/null @@ -1,23 +0,0 @@ -{{ config( - cluster_by = ["_AIRBYTE_UNIQUE_KEY", "_AIRBYTE_EMITTED_AT"], - unique_key = "_AIRBYTE_UNIQUE_KEY", - schema = "TEST_NORMALIZATION", - tags = [ "top-level" ] -) }} --- Final base SQL model --- depends_on: {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD') }} -select - _AIRBYTE_UNIQUE_KEY, - ID, - DATE, - PARTITION, - _AIRBYTE_AB_ID, - _AIRBYTE_EMITTED_AT, - {{ current_timestamp() }} as _AIRBYTE_NORMALIZED_AT, - _AIRBYTE_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_HASHID -from {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD') }} --- NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES from {{ source('TEST_NORMALIZATION', '_AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES') }} -where 1 = 1 -and _AIRBYTE_ACTIVE_ROW = 1 -{{ incremental_clause('_AIRBYTE_EMITTED_AT', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION.sql deleted file mode 100644 index 3dda7efc9c613..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION.sql +++ /dev/null @@ -1,20 +0,0 @@ -{{ config( - cluster_by = ["_AIRBYTE_EMITTED_AT"], - schema = "TEST_NORMALIZATION", - tags = [ "nested" ] -) }} --- Final base SQL model --- depends_on: {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_AB3') }} -select - _AIRBYTE_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_HASHID, - DOUBLE_ARRAY_DATA, - DATA, - _AIRBYTE_AB_ID, - _AIRBYTE_EMITTED_AT, - {{ current_timestamp() }} as _AIRBYTE_NORMALIZED_AT, - _AIRBYTE_PARTITION_HASHID -from {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_AB3') }} --- PARTITION at nested_stream_with_complex_columns_resulting_into_long_names/partition from {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD') }} -where 1 = 1 -{{ incremental_clause('_AIRBYTE_EMITTED_AT', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA.sql deleted file mode 100644 index 526c8b658f19c..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA.sql +++ /dev/null @@ -1,19 +0,0 @@ -{{ config( - cluster_by = ["_AIRBYTE_EMITTED_AT"], - schema = "TEST_NORMALIZATION", - tags = [ "nested" ] -) }} --- Final base SQL model --- depends_on: {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA_AB3') }} -select - _AIRBYTE_PARTITION_HASHID, - CURRENCY, - _AIRBYTE_AB_ID, - _AIRBYTE_EMITTED_AT, - {{ current_timestamp() }} as _AIRBYTE_NORMALIZED_AT, - _AIRBYTE_DATA_HASHID -from {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA_AB3') }} --- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA from {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION') }} -where 1 = 1 -{{ incremental_clause('_AIRBYTE_EMITTED_AT', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA.sql deleted file mode 100644 index c46547e9a6242..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA.sql +++ /dev/null @@ -1,19 +0,0 @@ -{{ config( - cluster_by = ["_AIRBYTE_EMITTED_AT"], - schema = "TEST_NORMALIZATION", - tags = [ "nested" ] -) }} --- Final base SQL model --- depends_on: {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA_AB3') }} -select - _AIRBYTE_PARTITION_HASHID, - ID, - _AIRBYTE_AB_ID, - _AIRBYTE_EMITTED_AT, - {{ current_timestamp() }} as _AIRBYTE_NORMALIZED_AT, - _AIRBYTE_DOUBLE_ARRAY_DATA_HASHID -from {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA_AB3') }} --- DOUBLE_ARRAY_DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data from {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION') }} -where 1 = 1 -{{ incremental_clause('_AIRBYTE_EMITTED_AT', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql deleted file mode 100644 index 7b46e390d0575..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql +++ /dev/null @@ -1,163 +0,0 @@ -{{ config( - cluster_by = ["_AIRBYTE_ACTIVE_ROW", "_AIRBYTE_UNIQUE_KEY_SCD", "_AIRBYTE_EMITTED_AT"], - unique_key = "_AIRBYTE_UNIQUE_KEY_SCD", - schema = "TEST_NORMALIZATION", - post_hook = [" - {% - set final_table_relation = adapter.get_relation( - database=this.database, - schema=this.schema, - identifier='NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES' - ) - %} - {# - If the final table doesn't exist, then obviously we can't delete anything from it. - Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) - So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) - #} - {% - if final_table_relation is not none and '_AIRBYTE_UNIQUE_KEY' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') - %} - -- Delete records which are no longer active: - -- This query is equivalent, but the left join version is more performant: - -- delete from final_table where unique_key in ( - -- select unique_key from scd_table where 1 = 1 - -- ) and unique_key not in ( - -- select unique_key from scd_table where active_row = 1 - -- ) - -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD - -- entries that were _updated_ recently. This is because a deleted record will have an SCD record - -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. - delete from {{ final_table_relation }} where {{ final_table_relation }}._AIRBYTE_UNIQUE_KEY in ( - select recent_records.unique_key - from ( - select distinct _AIRBYTE_UNIQUE_KEY as unique_key - from {{ this }} - where 1=1 {{ incremental_clause('_AIRBYTE_NORMALIZED_AT', this.schema + '.' + adapter.quote('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES')) }} - ) recent_records - left join ( - select _AIRBYTE_UNIQUE_KEY as unique_key, count(_AIRBYTE_UNIQUE_KEY) as active_count - from {{ this }} - where _AIRBYTE_ACTIVE_ROW = 1 {{ incremental_clause('_AIRBYTE_NORMALIZED_AT', this.schema + '.' + adapter.quote('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES')) }} - group by _AIRBYTE_UNIQUE_KEY - ) active_counts - on recent_records.unique_key = active_counts.unique_key - where active_count is null or active_count = 0 - ) - {% else %} - -- We have to have a non-empty query, so just do a noop delete - delete from {{ this }} where 1=0 - {% endif %} - ","drop view _AIRBYTE_TEST_NORMALIZATION.NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG"], - tags = [ "top-level" ] -) }} --- depends_on: ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG') -with -{% if is_incremental() %} -new_data as ( - -- retrieve incremental "new" data - select - * - from {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG') }} - -- NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES from {{ source('TEST_NORMALIZATION', '_AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES') }} - where 1 = 1 - {{ incremental_clause('_AIRBYTE_EMITTED_AT', this) }} -), -new_data_ids as ( - -- build a subset of _AIRBYTE_UNIQUE_KEY from rows that are new - select distinct - {{ dbt_utils.surrogate_key([ - 'ID', - ]) }} as _AIRBYTE_UNIQUE_KEY - from new_data -), -empty_new_data as ( - -- build an empty table to only keep the table's column types - select * from new_data where 1 = 0 -), -previous_active_scd_data as ( - -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes - select - {{ star_intersect(ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG'), this, from_alias='inc_data', intersect_alias='this_data') }} - from {{ this }} as this_data - -- make a join with new_data using primary key to filter active data that need to be updated only - join new_data_ids on this_data._AIRBYTE_UNIQUE_KEY = new_data_ids._AIRBYTE_UNIQUE_KEY - -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) - left join empty_new_data as inc_data on this_data._AIRBYTE_AB_ID = inc_data._AIRBYTE_AB_ID - where _AIRBYTE_ACTIVE_ROW = 1 -), -input_data as ( - select {{ dbt_utils.star(ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG')) }} from new_data - union all - select {{ dbt_utils.star(ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG')) }} from previous_active_scd_data -), -{% else %} -input_data as ( - select * - from {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG') }} - -- NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES from {{ source('TEST_NORMALIZATION', '_AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES') }} -), -{% endif %} -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - {{ dbt_utils.surrogate_key([ - 'ID', - ]) }} as _AIRBYTE_UNIQUE_KEY, - ID, - DATE, - PARTITION, - DATE as _AIRBYTE_START_AT, - lag(DATE) over ( - partition by ID - order by - DATE is null asc, - DATE desc, - _AIRBYTE_EMITTED_AT desc - ) as _AIRBYTE_END_AT, - case when row_number() over ( - partition by ID - order by - DATE is null asc, - DATE desc, - _AIRBYTE_EMITTED_AT desc - ) = 1 then 1 else 0 end as _AIRBYTE_ACTIVE_ROW, - _AIRBYTE_AB_ID, - _AIRBYTE_EMITTED_AT, - _AIRBYTE_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_HASHID - from input_data -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by - _AIRBYTE_UNIQUE_KEY, - _AIRBYTE_START_AT, - _AIRBYTE_EMITTED_AT - order by _AIRBYTE_ACTIVE_ROW desc, _AIRBYTE_AB_ID - ) as _AIRBYTE_ROW_NUM, - {{ dbt_utils.surrogate_key([ - '_AIRBYTE_UNIQUE_KEY', - '_AIRBYTE_START_AT', - '_AIRBYTE_EMITTED_AT' - ]) }} as _AIRBYTE_UNIQUE_KEY_SCD, - scd_data.* - from scd_data -) -select - _AIRBYTE_UNIQUE_KEY, - _AIRBYTE_UNIQUE_KEY_SCD, - ID, - DATE, - PARTITION, - _AIRBYTE_START_AT, - _AIRBYTE_END_AT, - _AIRBYTE_ACTIVE_ROW, - _AIRBYTE_AB_ID, - _AIRBYTE_EMITTED_AT, - {{ current_timestamp() }} as _AIRBYTE_NORMALIZED_AT, - _AIRBYTE_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_HASHID -from dedup_data where _AIRBYTE_ROW_NUM = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/sources.yml deleted file mode 100644 index b51dbe4cce7a1..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/sources.yml +++ /dev/null @@ -1,23 +0,0 @@ -version: 2 -sources: -- name: TEST_NORMALIZATION - quoting: - database: true - schema: false - identifier: false - tables: - - name: _AIRBYTE_RAW_ARRAYS - - name: _AIRBYTE_RAW_CONFLICT_STREAM_ARRAY - - name: _AIRBYTE_RAW_CONFLICT_STREAM_NAME - - name: _AIRBYTE_RAW_CONFLICT_STREAM_SCALAR - - name: _AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES - - name: _AIRBYTE_RAW_NON_NESTED_STREAM_WITHOUT_NAMESPACE_RESULTING_INTO_LONG_NAMES - - name: _AIRBYTE_RAW_SOME_STREAM_THAT_WAS_EMPTY - - name: _AIRBYTE_RAW_UNNEST_ALIAS -- name: TEST_NORMALIZATION_NAMESPACE - quoting: - database: true - schema: false - identifier: false - tables: - - name: _AIRBYTE_RAW_SIMPLE_STREAM_WITH_NAMESPACE_RESULTING_INTO_LONG_NAMES diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/second_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/second_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES.sql deleted file mode 100644 index ce844e3777eff..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/second_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES.sql +++ /dev/null @@ -1,26 +0,0 @@ -begin; - - - - - - - - merge into "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES" as DBT_INTERNAL_DEST - using "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES__dbt_tmp" as DBT_INTERNAL_SOURCE - on - DBT_INTERNAL_SOURCE._AIRBYTE_UNIQUE_KEY = DBT_INTERNAL_DEST._AIRBYTE_UNIQUE_KEY - - - - when matched then update set - "_AIRBYTE_UNIQUE_KEY" = DBT_INTERNAL_SOURCE."_AIRBYTE_UNIQUE_KEY","ID" = DBT_INTERNAL_SOURCE."ID","DATE" = DBT_INTERNAL_SOURCE."DATE","PARTITION" = DBT_INTERNAL_SOURCE."PARTITION","_AIRBYTE_AB_ID" = DBT_INTERNAL_SOURCE."_AIRBYTE_AB_ID","_AIRBYTE_EMITTED_AT" = DBT_INTERNAL_SOURCE."_AIRBYTE_EMITTED_AT","_AIRBYTE_NORMALIZED_AT" = DBT_INTERNAL_SOURCE."_AIRBYTE_NORMALIZED_AT","_AIRBYTE_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_HASHID" = DBT_INTERNAL_SOURCE."_AIRBYTE_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_HASHID" - - - when not matched then insert - ("_AIRBYTE_UNIQUE_KEY", "ID", "DATE", "PARTITION", "_AIRBYTE_AB_ID", "_AIRBYTE_EMITTED_AT", "_AIRBYTE_NORMALIZED_AT", "_AIRBYTE_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_HASHID") - values - ("_AIRBYTE_UNIQUE_KEY", "ID", "DATE", "PARTITION", "_AIRBYTE_AB_ID", "_AIRBYTE_EMITTED_AT", "_AIRBYTE_NORMALIZED_AT", "_AIRBYTE_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_HASHID") - -; - commit; \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/second_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/second_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION.sql deleted file mode 100644 index 8be85b4920f8e..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/second_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION.sql +++ /dev/null @@ -1,9 +0,0 @@ -begin; - - - insert into "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION" ("_AIRBYTE_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_HASHID", "DOUBLE_ARRAY_DATA", "DATA", "_AIRBYTE_AB_ID", "_AIRBYTE_EMITTED_AT", "_AIRBYTE_NORMALIZED_AT", "_AIRBYTE_PARTITION_HASHID") - ( - select "_AIRBYTE_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_HASHID", "DOUBLE_ARRAY_DATA", "DATA", "_AIRBYTE_AB_ID", "_AIRBYTE_EMITTED_AT", "_AIRBYTE_NORMALIZED_AT", "_AIRBYTE_PARTITION_HASHID" - from "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION__dbt_tmp" - ); - commit; \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/second_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/second_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA.sql deleted file mode 100644 index abd722a837d67..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/second_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA.sql +++ /dev/null @@ -1,9 +0,0 @@ -begin; - - - insert into "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA" ("_AIRBYTE_PARTITION_HASHID", "CURRENCY", "_AIRBYTE_AB_ID", "_AIRBYTE_EMITTED_AT", "_AIRBYTE_NORMALIZED_AT", "_AIRBYTE_DATA_HASHID") - ( - select "_AIRBYTE_PARTITION_HASHID", "CURRENCY", "_AIRBYTE_AB_ID", "_AIRBYTE_EMITTED_AT", "_AIRBYTE_NORMALIZED_AT", "_AIRBYTE_DATA_HASHID" - from "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA__dbt_tmp" - ); - commit; \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/second_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/second_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA.sql deleted file mode 100644 index 11746e9f32afd..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/second_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA.sql +++ /dev/null @@ -1,9 +0,0 @@ -begin; - - - insert into "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA" ("_AIRBYTE_PARTITION_HASHID", "ID", "_AIRBYTE_AB_ID", "_AIRBYTE_EMITTED_AT", "_AIRBYTE_NORMALIZED_AT", "_AIRBYTE_DOUBLE_ARRAY_DATA_HASHID") - ( - select "_AIRBYTE_PARTITION_HASHID", "ID", "_AIRBYTE_AB_ID", "_AIRBYTE_EMITTED_AT", "_AIRBYTE_NORMALIZED_AT", "_AIRBYTE_DOUBLE_ARRAY_DATA_HASHID" - from "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA__dbt_tmp" - ); - commit; \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/second_output/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/second_output/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql deleted file mode 100644 index 308aaf13f9081..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/second_output/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql +++ /dev/null @@ -1,26 +0,0 @@ -begin; - - - - - - - - merge into "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD" as DBT_INTERNAL_DEST - using "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD__dbt_tmp" as DBT_INTERNAL_SOURCE - on - DBT_INTERNAL_SOURCE._AIRBYTE_UNIQUE_KEY_SCD = DBT_INTERNAL_DEST._AIRBYTE_UNIQUE_KEY_SCD - - - - when matched then update set - "_AIRBYTE_UNIQUE_KEY" = DBT_INTERNAL_SOURCE."_AIRBYTE_UNIQUE_KEY","_AIRBYTE_UNIQUE_KEY_SCD" = DBT_INTERNAL_SOURCE."_AIRBYTE_UNIQUE_KEY_SCD","ID" = DBT_INTERNAL_SOURCE."ID","DATE" = DBT_INTERNAL_SOURCE."DATE","PARTITION" = DBT_INTERNAL_SOURCE."PARTITION","_AIRBYTE_START_AT" = DBT_INTERNAL_SOURCE."_AIRBYTE_START_AT","_AIRBYTE_END_AT" = DBT_INTERNAL_SOURCE."_AIRBYTE_END_AT","_AIRBYTE_ACTIVE_ROW" = DBT_INTERNAL_SOURCE."_AIRBYTE_ACTIVE_ROW","_AIRBYTE_AB_ID" = DBT_INTERNAL_SOURCE."_AIRBYTE_AB_ID","_AIRBYTE_EMITTED_AT" = DBT_INTERNAL_SOURCE."_AIRBYTE_EMITTED_AT","_AIRBYTE_NORMALIZED_AT" = DBT_INTERNAL_SOURCE."_AIRBYTE_NORMALIZED_AT","_AIRBYTE_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_HASHID" = DBT_INTERNAL_SOURCE."_AIRBYTE_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_HASHID" - - - when not matched then insert - ("_AIRBYTE_UNIQUE_KEY", "_AIRBYTE_UNIQUE_KEY_SCD", "ID", "DATE", "PARTITION", "_AIRBYTE_START_AT", "_AIRBYTE_END_AT", "_AIRBYTE_ACTIVE_ROW", "_AIRBYTE_AB_ID", "_AIRBYTE_EMITTED_AT", "_AIRBYTE_NORMALIZED_AT", "_AIRBYTE_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_HASHID") - values - ("_AIRBYTE_UNIQUE_KEY", "_AIRBYTE_UNIQUE_KEY_SCD", "ID", "DATE", "PARTITION", "_AIRBYTE_START_AT", "_AIRBYTE_END_AT", "_AIRBYTE_ACTIVE_ROW", "_AIRBYTE_AB_ID", "_AIRBYTE_EMITTED_AT", "_AIRBYTE_NORMALIZED_AT", "_AIRBYTE_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_HASHID") - -; - commit; \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/dbt_project.yml deleted file mode 100644 index 2b466206f0839..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/dbt_project.yml +++ /dev/null @@ -1,91 +0,0 @@ -name: airbyte_utils -version: '1.0' -config-version: 2 -profile: normalize -model-paths: -- models -docs-paths: -- docs -analysis-paths: -- analysis -test-paths: -- tests -seed-paths: -- data -macro-paths: -- macros -target-path: ../build -log-path: ../logs -packages-install-path: /dbt -clean-targets: -- build -- dbt_modules -quoting: - database: true - schema: false - identifier: true -models: - +transient: false - airbyte_utils: - +materialized: table - generated: - airbyte_ctes: - +tags: airbyte_internal_cte - +materialized: ephemeral - airbyte_incremental: - +tags: incremental_tables - +materialized: incremental - +on_schema_change: sync_all_columns - airbyte_tables: - +tags: normalized_tables - +materialized: table - airbyte_views: - +tags: airbyte_internal_views - +materialized: view -dispatch: -- macro_namespace: dbt_utils - search_order: - - airbyte_utils - - dbt_utils -vars: - json_column: _airbyte_data - models_to_source: - EXCHANGE_RATE_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_EXCHANGE_RATE - EXCHANGE_RATE_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_EXCHANGE_RATE - EXCHANGE_RATE_AB3: TEST_NORMALIZATION._AIRBYTE_RAW_EXCHANGE_RATE - EXCHANGE_RATE: TEST_NORMALIZATION._AIRBYTE_RAW_EXCHANGE_RATE - DEDUP_EXCHANGE_RATE_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_DEDUP_EXCHANGE_RATE - DEDUP_EXCHANGE_RATE_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_DEDUP_EXCHANGE_RATE - DEDUP_EXCHANGE_RATE_STG: TEST_NORMALIZATION._AIRBYTE_RAW_DEDUP_EXCHANGE_RATE - DEDUP_EXCHANGE_RATE_SCD: TEST_NORMALIZATION._AIRBYTE_RAW_DEDUP_EXCHANGE_RATE - DEDUP_EXCHANGE_RATE: TEST_NORMALIZATION._AIRBYTE_RAW_DEDUP_EXCHANGE_RATE - RENAMED_DEDUP_CDC_EXCLUDED_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_RENAMED_DEDUP_CDC_EXCLUDED - RENAMED_DEDUP_CDC_EXCLUDED_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_RENAMED_DEDUP_CDC_EXCLUDED - RENAMED_DEDUP_CDC_EXCLUDED_STG: TEST_NORMALIZATION._AIRBYTE_RAW_RENAMED_DEDUP_CDC_EXCLUDED - RENAMED_DEDUP_CDC_EXCLUDED_SCD: TEST_NORMALIZATION._AIRBYTE_RAW_RENAMED_DEDUP_CDC_EXCLUDED - RENAMED_DEDUP_CDC_EXCLUDED: TEST_NORMALIZATION._AIRBYTE_RAW_RENAMED_DEDUP_CDC_EXCLUDED - DEDUP_CDC_EXCLUDED_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_DEDUP_CDC_EXCLUDED - DEDUP_CDC_EXCLUDED_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_DEDUP_CDC_EXCLUDED - DEDUP_CDC_EXCLUDED_STG: TEST_NORMALIZATION._AIRBYTE_RAW_DEDUP_CDC_EXCLUDED - DEDUP_CDC_EXCLUDED_SCD: TEST_NORMALIZATION._AIRBYTE_RAW_DEDUP_CDC_EXCLUDED - DEDUP_CDC_EXCLUDED: TEST_NORMALIZATION._AIRBYTE_RAW_DEDUP_CDC_EXCLUDED - POS_DEDUP_CDCX_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_POS_DEDUP_CDCX - POS_DEDUP_CDCX_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_POS_DEDUP_CDCX - POS_DEDUP_CDCX_STG: TEST_NORMALIZATION._AIRBYTE_RAW_POS_DEDUP_CDCX - POS_DEDUP_CDCX_SCD: TEST_NORMALIZATION._AIRBYTE_RAW_POS_DEDUP_CDCX - POS_DEDUP_CDCX: TEST_NORMALIZATION._AIRBYTE_RAW_POS_DEDUP_CDCX - 1_prefix_startwith_number_ab1: TEST_NORMALIZATION._AIRBYTE_RAW_1_PREFIX_STARTWITH_NUMBER - 1_prefix_startwith_number_ab2: TEST_NORMALIZATION._AIRBYTE_RAW_1_PREFIX_STARTWITH_NUMBER - 1_prefix_startwith_number_stg: TEST_NORMALIZATION._AIRBYTE_RAW_1_PREFIX_STARTWITH_NUMBER - 1_prefix_startwith_number_scd: TEST_NORMALIZATION._AIRBYTE_RAW_1_PREFIX_STARTWITH_NUMBER - 1_prefix_startwith_number: TEST_NORMALIZATION._AIRBYTE_RAW_1_PREFIX_STARTWITH_NUMBER - MULTIPLE_COLUMN_NAMES_CONFLICTS_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_MULTIPLE_COLUMN_NAMES_CONFLICTS - MULTIPLE_COLUMN_NAMES_CONFLICTS_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_MULTIPLE_COLUMN_NAMES_CONFLICTS - MULTIPLE_COLUMN_NAMES_CONFLICTS_STG: TEST_NORMALIZATION._AIRBYTE_RAW_MULTIPLE_COLUMN_NAMES_CONFLICTS - MULTIPLE_COLUMN_NAMES_CONFLICTS_SCD: TEST_NORMALIZATION._AIRBYTE_RAW_MULTIPLE_COLUMN_NAMES_CONFLICTS - MULTIPLE_COLUMN_NAMES_CONFLICTS: TEST_NORMALIZATION._AIRBYTE_RAW_MULTIPLE_COLUMN_NAMES_CONFLICTS - TYPES_TESTING_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_TYPES_TESTING - TYPES_TESTING_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_TYPES_TESTING - TYPES_TESTING_STG: TEST_NORMALIZATION._AIRBYTE_RAW_TYPES_TESTING - TYPES_TESTING_SCD: TEST_NORMALIZATION._AIRBYTE_RAW_TYPES_TESTING - TYPES_TESTING: TEST_NORMALIZATION._AIRBYTE_RAW_TYPES_TESTING diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_incremental/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_incremental/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE.sql deleted file mode 100644 index 7efd7f3244dbc..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_incremental/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE.sql +++ /dev/null @@ -1,29 +0,0 @@ - - - create or replace table "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."DEDUP_EXCHANGE_RATE" as - (select * from( - --- Final base SQL model --- depends_on: "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."DEDUP_EXCHANGE_RATE_SCD" -select - _AIRBYTE_UNIQUE_KEY, - ID, - CURRENCY, - DATE, - TIMESTAMP_COL, - "HKD@spéçiäl & characters", - HKD_SPECIAL___CHARACTERS, - NZD, - USD, - _AIRBYTE_AB_ID, - _AIRBYTE_EMITTED_AT, - convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT, - _AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID -from "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."DEDUP_EXCHANGE_RATE_SCD" --- DEDUP_EXCHANGE_RATE from "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION._AIRBYTE_RAW_DEDUP_EXCHANGE_RATE -where 1 = 1 -and _AIRBYTE_ACTIVE_ROW = 1 - - ) order by (_AIRBYTE_UNIQUE_KEY, _AIRBYTE_EMITTED_AT) - ); - alter table "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."DEDUP_EXCHANGE_RATE" cluster by (_AIRBYTE_UNIQUE_KEY, _AIRBYTE_EMITTED_AT); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql deleted file mode 100644 index 220cd093da41e..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql +++ /dev/null @@ -1,103 +0,0 @@ - - - create or replace table "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."DEDUP_EXCHANGE_RATE_SCD" as - (select * from( - --- depends_on: ref('DEDUP_EXCHANGE_RATE_STG') -with - -input_data as ( - select * - from "INTEGRATION_TEST_NORMALIZATION"._AIRBYTE_TEST_NORMALIZATION."DEDUP_EXCHANGE_RATE_STG" - -- DEDUP_EXCHANGE_RATE from "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION._AIRBYTE_RAW_DEDUP_EXCHANGE_RATE -), - -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - md5(cast(coalesce(cast(ID as - varchar -), '') || '-' || coalesce(cast(CURRENCY as - varchar -), '') || '-' || coalesce(cast(NZD as - varchar -), '') as - varchar -)) as _AIRBYTE_UNIQUE_KEY, - ID, - CURRENCY, - DATE, - TIMESTAMP_COL, - "HKD@spéçiäl & characters", - HKD_SPECIAL___CHARACTERS, - NZD, - USD, - DATE as _AIRBYTE_START_AT, - lag(DATE) over ( - partition by ID, CURRENCY, cast(NZD as - varchar -) - order by - DATE is null asc, - DATE desc, - _AIRBYTE_EMITTED_AT desc - ) as _AIRBYTE_END_AT, - case when row_number() over ( - partition by ID, CURRENCY, cast(NZD as - varchar -) - order by - DATE is null asc, - DATE desc, - _AIRBYTE_EMITTED_AT desc - ) = 1 then 1 else 0 end as _AIRBYTE_ACTIVE_ROW, - _AIRBYTE_AB_ID, - _AIRBYTE_EMITTED_AT, - _AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID - from input_data -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by - _AIRBYTE_UNIQUE_KEY, - _AIRBYTE_START_AT, - _AIRBYTE_EMITTED_AT - order by _AIRBYTE_ACTIVE_ROW desc, _AIRBYTE_AB_ID - ) as _AIRBYTE_ROW_NUM, - md5(cast(coalesce(cast(_AIRBYTE_UNIQUE_KEY as - varchar -), '') || '-' || coalesce(cast(_AIRBYTE_START_AT as - varchar -), '') || '-' || coalesce(cast(_AIRBYTE_EMITTED_AT as - varchar -), '') as - varchar -)) as _AIRBYTE_UNIQUE_KEY_SCD, - scd_data.* - from scd_data -) -select - _AIRBYTE_UNIQUE_KEY, - _AIRBYTE_UNIQUE_KEY_SCD, - ID, - CURRENCY, - DATE, - TIMESTAMP_COL, - "HKD@spéçiäl & characters", - HKD_SPECIAL___CHARACTERS, - NZD, - USD, - _AIRBYTE_START_AT, - _AIRBYTE_END_AT, - _AIRBYTE_ACTIVE_ROW, - _AIRBYTE_AB_ID, - _AIRBYTE_EMITTED_AT, - convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT, - _AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID -from dedup_data where _AIRBYTE_ROW_NUM = 1 - ) order by (_AIRBYTE_ACTIVE_ROW, _AIRBYTE_UNIQUE_KEY_SCD, _AIRBYTE_EMITTED_AT) - ); - alter table "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."DEDUP_EXCHANGE_RATE_SCD" cluster by (_AIRBYTE_ACTIVE_ROW, _AIRBYTE_UNIQUE_KEY_SCD, _AIRBYTE_EMITTED_AT); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_tables/TEST_NORMALIZATION/EXCHANGE_RATE.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_tables/TEST_NORMALIZATION/EXCHANGE_RATE.sql deleted file mode 100644 index e35addfdeb762..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_tables/TEST_NORMALIZATION/EXCHANGE_RATE.sql +++ /dev/null @@ -1,159 +0,0 @@ - - - create or replace table "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."EXCHANGE_RATE" as - (select * from( - -with __dbt__cte__EXCHANGE_RATE_AB1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION._AIRBYTE_RAW_EXCHANGE_RATE -select - to_varchar(get_path(parse_json(_airbyte_data), '"id"')) as ID, - to_varchar(get_path(parse_json(_airbyte_data), '"currency"')) as CURRENCY, - to_varchar(get_path(parse_json(_airbyte_data), '"date"')) as DATE, - to_varchar(get_path(parse_json(_airbyte_data), '"timestamp_col"')) as TIMESTAMP_COL, - to_varchar(get_path(parse_json(_airbyte_data), '"HKD@spéçiäl & characters"')) as "HKD@spéçiäl & characters", - to_varchar(get_path(parse_json(_airbyte_data), '"HKD_special___characters"')) as HKD_SPECIAL___CHARACTERS, - to_varchar(get_path(parse_json(_airbyte_data), '"NZD"')) as NZD, - to_varchar(get_path(parse_json(_airbyte_data), '"USD"')) as USD, - to_varchar(get_path(parse_json(_airbyte_data), '"column`_''with""_quotes"')) as "column`_'with""_quotes", - to_varchar(get_path(parse_json(_airbyte_data), '"datetime_tz"')) as DATETIME_TZ, - to_varchar(get_path(parse_json(_airbyte_data), '"datetime_no_tz"')) as DATETIME_NO_TZ, - to_varchar(get_path(parse_json(_airbyte_data), '"time_tz"')) as TIME_TZ, - to_varchar(get_path(parse_json(_airbyte_data), '"time_no_tz"')) as TIME_NO_TZ, - _AIRBYTE_AB_ID, - _AIRBYTE_EMITTED_AT, - convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT -from "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION._AIRBYTE_RAW_EXCHANGE_RATE as table_alias --- EXCHANGE_RATE -where 1 = 1 -), __dbt__cte__EXCHANGE_RATE_AB2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__EXCHANGE_RATE_AB1 -select - cast(ID as - bigint -) as ID, - cast(CURRENCY as - varchar -) as CURRENCY, - cast(nullif(DATE, '') as - date -) as DATE, - case - when TIMESTAMP_COL regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}(\\+|-)\\d{4}' then to_timestamp_tz(TIMESTAMP_COL, 'YYYY-MM-DDTHH24:MI:SSTZHTZM') - when TIMESTAMP_COL regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}(\\+|-)\\d{2}' then to_timestamp_tz(TIMESTAMP_COL, 'YYYY-MM-DDTHH24:MI:SSTZH') - when TIMESTAMP_COL regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}\\.\\d{1,7}(\\+|-)\\d{4}' then to_timestamp_tz(TIMESTAMP_COL, 'YYYY-MM-DDTHH24:MI:SS.FFTZHTZM') - when TIMESTAMP_COL regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}\\.\\d{1,7}(\\+|-)\\d{2}' then to_timestamp_tz(TIMESTAMP_COL, 'YYYY-MM-DDTHH24:MI:SS.FFTZH') - when TIMESTAMP_COL = '' then NULL - else to_timestamp_tz(TIMESTAMP_COL) - end as TIMESTAMP_COL - , - cast("HKD@spéçiäl & characters" as - float -) as "HKD@spéçiäl & characters", - cast(HKD_SPECIAL___CHARACTERS as - varchar -) as HKD_SPECIAL___CHARACTERS, - cast(NZD as - float -) as NZD, - cast(USD as - float -) as USD, - cast("column`_'with""_quotes" as - varchar -) as "column`_'with""_quotes", - case - when DATETIME_TZ regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}(\\+|-)\\d{4}' then to_timestamp_tz(DATETIME_TZ, 'YYYY-MM-DDTHH24:MI:SSTZHTZM') - when DATETIME_TZ regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}(\\+|-)\\d{2}' then to_timestamp_tz(DATETIME_TZ, 'YYYY-MM-DDTHH24:MI:SSTZH') - when DATETIME_TZ regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}\\.\\d{1,7}(\\+|-)\\d{4}' then to_timestamp_tz(DATETIME_TZ, 'YYYY-MM-DDTHH24:MI:SS.FFTZHTZM') - when DATETIME_TZ regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}\\.\\d{1,7}(\\+|-)\\d{2}' then to_timestamp_tz(DATETIME_TZ, 'YYYY-MM-DDTHH24:MI:SS.FFTZH') - when DATETIME_TZ = '' then NULL - else to_timestamp_tz(DATETIME_TZ) - end as DATETIME_TZ - , - case - when DATETIME_NO_TZ regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}' then to_timestamp(DATETIME_NO_TZ, 'YYYY-MM-DDTHH24:MI:SS') - when DATETIME_NO_TZ regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}\\.\\d{1,7}' then to_timestamp(DATETIME_NO_TZ, 'YYYY-MM-DDTHH24:MI:SS.FF') - when DATETIME_NO_TZ = '' then NULL - else to_timestamp(DATETIME_NO_TZ) - end as DATETIME_NO_TZ - , - cast(nullif(TIME_TZ, '') as - varchar -) as TIME_TZ, - cast(nullif(TIME_NO_TZ, '') as - time -) as TIME_NO_TZ, - _AIRBYTE_AB_ID, - _AIRBYTE_EMITTED_AT, - convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT -from __dbt__cte__EXCHANGE_RATE_AB1 --- EXCHANGE_RATE -where 1 = 1 -), __dbt__cte__EXCHANGE_RATE_AB3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__EXCHANGE_RATE_AB2 -select - md5(cast(coalesce(cast(ID as - varchar -), '') || '-' || coalesce(cast(CURRENCY as - varchar -), '') || '-' || coalesce(cast(DATE as - varchar -), '') || '-' || coalesce(cast(TIMESTAMP_COL as - varchar -), '') || '-' || coalesce(cast("HKD@spéçiäl & characters" as - varchar -), '') || '-' || coalesce(cast(HKD_SPECIAL___CHARACTERS as - varchar -), '') || '-' || coalesce(cast(NZD as - varchar -), '') || '-' || coalesce(cast(USD as - varchar -), '') || '-' || coalesce(cast("column`_'with""_quotes" as - varchar -), '') || '-' || coalesce(cast(DATETIME_TZ as - varchar -), '') || '-' || coalesce(cast(DATETIME_NO_TZ as - varchar -), '') || '-' || coalesce(cast(TIME_TZ as - varchar -), '') || '-' || coalesce(cast(TIME_NO_TZ as - varchar -), '') as - varchar -)) as _AIRBYTE_EXCHANGE_RATE_HASHID, - tmp.* -from __dbt__cte__EXCHANGE_RATE_AB2 tmp --- EXCHANGE_RATE -where 1 = 1 -)-- Final base SQL model --- depends_on: __dbt__cte__EXCHANGE_RATE_AB3 -select - ID, - CURRENCY, - DATE, - TIMESTAMP_COL, - "HKD@spéçiäl & characters", - HKD_SPECIAL___CHARACTERS, - NZD, - USD, - "column`_'with""_quotes", - DATETIME_TZ, - DATETIME_NO_TZ, - TIME_TZ, - TIME_NO_TZ, - _AIRBYTE_AB_ID, - _AIRBYTE_EMITTED_AT, - convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT, - _AIRBYTE_EXCHANGE_RATE_HASHID -from __dbt__cte__EXCHANGE_RATE_AB3 --- EXCHANGE_RATE from "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION._AIRBYTE_RAW_EXCHANGE_RATE -where 1 = 1 - ) order by (_AIRBYTE_EMITTED_AT) - ); - alter table "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."EXCHANGE_RATE" cluster by (_AIRBYTE_EMITTED_AT); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_views/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_STG.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_views/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_STG.sql deleted file mode 100644 index e91864477ee70..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_views/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_STG.sql +++ /dev/null @@ -1,95 +0,0 @@ - - create or replace view "INTEGRATION_TEST_NORMALIZATION"._AIRBYTE_TEST_NORMALIZATION."DEDUP_EXCHANGE_RATE_STG" - - as ( - -with __dbt__cte__DEDUP_EXCHANGE_RATE_AB1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION._AIRBYTE_RAW_DEDUP_EXCHANGE_RATE -select - to_varchar(get_path(parse_json(_airbyte_data), '"id"')) as ID, - to_varchar(get_path(parse_json(_airbyte_data), '"currency"')) as CURRENCY, - to_varchar(get_path(parse_json(_airbyte_data), '"date"')) as DATE, - to_varchar(get_path(parse_json(_airbyte_data), '"timestamp_col"')) as TIMESTAMP_COL, - to_varchar(get_path(parse_json(_airbyte_data), '"HKD@spéçiäl & characters"')) as "HKD@spéçiäl & characters", - to_varchar(get_path(parse_json(_airbyte_data), '"HKD_special___characters"')) as HKD_SPECIAL___CHARACTERS, - to_varchar(get_path(parse_json(_airbyte_data), '"NZD"')) as NZD, - to_varchar(get_path(parse_json(_airbyte_data), '"USD"')) as USD, - _AIRBYTE_AB_ID, - _AIRBYTE_EMITTED_AT, - convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT -from "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION._AIRBYTE_RAW_DEDUP_EXCHANGE_RATE as table_alias --- DEDUP_EXCHANGE_RATE -where 1 = 1 - -), __dbt__cte__DEDUP_EXCHANGE_RATE_AB2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__DEDUP_EXCHANGE_RATE_AB1 -select - cast(ID as - bigint -) as ID, - cast(CURRENCY as - varchar -) as CURRENCY, - cast(nullif(DATE, '') as - date -) as DATE, - case - when TIMESTAMP_COL regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}(\\+|-)\\d{4}' then to_timestamp_tz(TIMESTAMP_COL, 'YYYY-MM-DDTHH24:MI:SSTZHTZM') - when TIMESTAMP_COL regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}(\\+|-)\\d{2}' then to_timestamp_tz(TIMESTAMP_COL, 'YYYY-MM-DDTHH24:MI:SSTZH') - when TIMESTAMP_COL regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}\\.\\d{1,7}(\\+|-)\\d{4}' then to_timestamp_tz(TIMESTAMP_COL, 'YYYY-MM-DDTHH24:MI:SS.FFTZHTZM') - when TIMESTAMP_COL regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}\\.\\d{1,7}(\\+|-)\\d{2}' then to_timestamp_tz(TIMESTAMP_COL, 'YYYY-MM-DDTHH24:MI:SS.FFTZH') - when TIMESTAMP_COL = '' then NULL - else to_timestamp_tz(TIMESTAMP_COL) - end as TIMESTAMP_COL - , - cast("HKD@spéçiäl & characters" as - float -) as "HKD@spéçiäl & characters", - cast(HKD_SPECIAL___CHARACTERS as - varchar -) as HKD_SPECIAL___CHARACTERS, - cast(NZD as - float -) as NZD, - cast(USD as - float -) as USD, - _AIRBYTE_AB_ID, - _AIRBYTE_EMITTED_AT, - convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT -from __dbt__cte__DEDUP_EXCHANGE_RATE_AB1 --- DEDUP_EXCHANGE_RATE -where 1 = 1 - -)-- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__DEDUP_EXCHANGE_RATE_AB2 -select - md5(cast(coalesce(cast(ID as - varchar -), '') || '-' || coalesce(cast(CURRENCY as - varchar -), '') || '-' || coalesce(cast(DATE as - varchar -), '') || '-' || coalesce(cast(TIMESTAMP_COL as - varchar -), '') || '-' || coalesce(cast("HKD@spéçiäl & characters" as - varchar -), '') || '-' || coalesce(cast(HKD_SPECIAL___CHARACTERS as - varchar -), '') || '-' || coalesce(cast(NZD as - varchar -), '') || '-' || coalesce(cast(USD as - varchar -), '') as - varchar -)) as _AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID, - tmp.* -from __dbt__cte__DEDUP_EXCHANGE_RATE_AB2 tmp --- DEDUP_EXCHANGE_RATE -where 1 = 1 - - ); diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_views/TEST_NORMALIZATION/MULTIPLE_COLUMN_NAMES_CONFLICTS_STG.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_views/TEST_NORMALIZATION/MULTIPLE_COLUMN_NAMES_CONFLICTS_STG.sql deleted file mode 100644 index 639671b74a4b4..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_views/TEST_NORMALIZATION/MULTIPLE_COLUMN_NAMES_CONFLICTS_STG.sql +++ /dev/null @@ -1,83 +0,0 @@ - - create or replace view "INTEGRATION_TEST_NORMALIZATION"._AIRBYTE_TEST_NORMALIZATION."MULTIPLE_COLUMN_NAMES_CONFLICTS_STG" - - as ( - -with __dbt__cte__MULTIPLE_COLUMN_NAMES_CONFLICTS_AB1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION._AIRBYTE_RAW_MULTIPLE_COLUMN_NAMES_CONFLICTS -select - to_varchar(get_path(parse_json(_airbyte_data), '"id"')) as ID, - to_varchar(get_path(parse_json(_airbyte_data), '"User Id"')) as "User Id", - to_varchar(get_path(parse_json(_airbyte_data), '"user_id"')) as USER_ID, - to_varchar(get_path(parse_json(_airbyte_data), '"User id"')) as "User id", - to_varchar(get_path(parse_json(_airbyte_data), '"user id"')) as "user id", - to_varchar(get_path(parse_json(_airbyte_data), '"User@Id"')) as "User@Id", - to_varchar(get_path(parse_json(_airbyte_data), '"UserId"')) as USERID, - _AIRBYTE_AB_ID, - _AIRBYTE_EMITTED_AT, - convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT -from "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION._AIRBYTE_RAW_MULTIPLE_COLUMN_NAMES_CONFLICTS as table_alias --- MULTIPLE_COLUMN_NAMES_CONFLICTS -where 1 = 1 - -), __dbt__cte__MULTIPLE_COLUMN_NAMES_CONFLICTS_AB2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__MULTIPLE_COLUMN_NAMES_CONFLICTS_AB1 -select - cast(ID as - bigint -) as ID, - cast("User Id" as - varchar -) as "User Id", - cast(USER_ID as - float -) as USER_ID, - cast("User id" as - float -) as "User id", - cast("user id" as - float -) as "user id", - cast("User@Id" as - varchar -) as "User@Id", - cast(USERID as - float -) as USERID, - _AIRBYTE_AB_ID, - _AIRBYTE_EMITTED_AT, - convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT -from __dbt__cte__MULTIPLE_COLUMN_NAMES_CONFLICTS_AB1 --- MULTIPLE_COLUMN_NAMES_CONFLICTS -where 1 = 1 - -)-- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__MULTIPLE_COLUMN_NAMES_CONFLICTS_AB2 -select - md5(cast(coalesce(cast(ID as - varchar -), '') || '-' || coalesce(cast("User Id" as - varchar -), '') || '-' || coalesce(cast(USER_ID as - varchar -), '') || '-' || coalesce(cast("User id" as - varchar -), '') || '-' || coalesce(cast("user id" as - varchar -), '') || '-' || coalesce(cast("User@Id" as - varchar -), '') || '-' || coalesce(cast(USERID as - varchar -), '') as - varchar -)) as _AIRBYTE_MULTIPLE_COLUMN_NAMES_CONFLICTS_HASHID, - tmp.* -from __dbt__cte__MULTIPLE_COLUMN_NAMES_CONFLICTS_AB2 tmp --- MULTIPLE_COLUMN_NAMES_CONFLICTS -where 1 = 1 - - ); diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_AB1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_AB1.sql deleted file mode 100644 index 06be4a0eaa2fb..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_AB1.sql +++ /dev/null @@ -1,25 +0,0 @@ -{{ config( - cluster_by = ["_AIRBYTE_EMITTED_AT"], - unique_key = '_AIRBYTE_AB_ID', - schema = "_AIRBYTE_TEST_NORMALIZATION", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ source('TEST_NORMALIZATION', '_AIRBYTE_RAW_DEDUP_EXCHANGE_RATE') }} -select - {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as ID, - {{ json_extract_scalar('_airbyte_data', ['currency'], ['currency']) }} as CURRENCY, - {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as DATE, - {{ json_extract_scalar('_airbyte_data', ['timestamp_col'], ['timestamp_col']) }} as TIMESTAMP_COL, - {{ json_extract_scalar('_airbyte_data', ['HKD@spéçiäl & characters'], ['HKD@spéçiäl & characters']) }} as {{ adapter.quote('HKD@spéçiäl & characters') }}, - {{ json_extract_scalar('_airbyte_data', ['HKD_special___characters'], ['HKD_special___characters']) }} as HKD_SPECIAL___CHARACTERS, - {{ json_extract_scalar('_airbyte_data', ['NZD'], ['NZD']) }} as NZD, - {{ json_extract_scalar('_airbyte_data', ['USD'], ['USD']) }} as USD, - _AIRBYTE_AB_ID, - _AIRBYTE_EMITTED_AT, - {{ current_timestamp() }} as _AIRBYTE_NORMALIZED_AT -from {{ source('TEST_NORMALIZATION', '_AIRBYTE_RAW_DEDUP_EXCHANGE_RATE') }} as table_alias --- DEDUP_EXCHANGE_RATE -where 1 = 1 -{{ incremental_clause('_AIRBYTE_EMITTED_AT', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_AB2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_AB2.sql deleted file mode 100644 index f3a40af778cc4..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_AB2.sql +++ /dev/null @@ -1,33 +0,0 @@ -{{ config( - cluster_by = ["_AIRBYTE_EMITTED_AT"], - unique_key = '_AIRBYTE_AB_ID', - schema = "_AIRBYTE_TEST_NORMALIZATION", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: {{ ref('DEDUP_EXCHANGE_RATE_AB1') }} -select - cast(ID as {{ dbt_utils.type_bigint() }}) as ID, - cast(CURRENCY as {{ dbt_utils.type_string() }}) as CURRENCY, - cast({{ empty_string_to_null('DATE') }} as {{ type_date() }}) as DATE, - case - when TIMESTAMP_COL regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}(\\+|-)\\d{4}' then to_timestamp_tz(TIMESTAMP_COL, 'YYYY-MM-DDTHH24:MI:SSTZHTZM') - when TIMESTAMP_COL regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}(\\+|-)\\d{2}' then to_timestamp_tz(TIMESTAMP_COL, 'YYYY-MM-DDTHH24:MI:SSTZH') - when TIMESTAMP_COL regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}\\.\\d{1,7}(\\+|-)\\d{4}' then to_timestamp_tz(TIMESTAMP_COL, 'YYYY-MM-DDTHH24:MI:SS.FFTZHTZM') - when TIMESTAMP_COL regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}\\.\\d{1,7}(\\+|-)\\d{2}' then to_timestamp_tz(TIMESTAMP_COL, 'YYYY-MM-DDTHH24:MI:SS.FFTZH') - when TIMESTAMP_COL = '' then NULL - else to_timestamp_tz(TIMESTAMP_COL) - end as TIMESTAMP_COL - , - cast({{ adapter.quote('HKD@spéçiäl & characters') }} as {{ dbt_utils.type_float() }}) as {{ adapter.quote('HKD@spéçiäl & characters') }}, - cast(HKD_SPECIAL___CHARACTERS as {{ dbt_utils.type_string() }}) as HKD_SPECIAL___CHARACTERS, - cast(NZD as {{ dbt_utils.type_float() }}) as NZD, - cast(USD as {{ dbt_utils.type_float() }}) as USD, - _AIRBYTE_AB_ID, - _AIRBYTE_EMITTED_AT, - {{ current_timestamp() }} as _AIRBYTE_NORMALIZED_AT -from {{ ref('DEDUP_EXCHANGE_RATE_AB1') }} --- DEDUP_EXCHANGE_RATE -where 1 = 1 -{{ incremental_clause('_AIRBYTE_EMITTED_AT', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_incremental/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_incremental/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE.sql deleted file mode 100644 index 0663a8d251e46..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_incremental/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE.sql +++ /dev/null @@ -1,28 +0,0 @@ -{{ config( - cluster_by = ["_AIRBYTE_UNIQUE_KEY", "_AIRBYTE_EMITTED_AT"], - unique_key = "_AIRBYTE_UNIQUE_KEY", - schema = "TEST_NORMALIZATION", - tags = [ "top-level" ] -) }} --- Final base SQL model --- depends_on: {{ ref('DEDUP_EXCHANGE_RATE_SCD') }} -select - _AIRBYTE_UNIQUE_KEY, - ID, - CURRENCY, - DATE, - TIMESTAMP_COL, - {{ adapter.quote('HKD@spéçiäl & characters') }}, - HKD_SPECIAL___CHARACTERS, - NZD, - USD, - _AIRBYTE_AB_ID, - _AIRBYTE_EMITTED_AT, - {{ current_timestamp() }} as _AIRBYTE_NORMALIZED_AT, - _AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID -from {{ ref('DEDUP_EXCHANGE_RATE_SCD') }} --- DEDUP_EXCHANGE_RATE from {{ source('TEST_NORMALIZATION', '_AIRBYTE_RAW_DEDUP_EXCHANGE_RATE') }} -where 1 = 1 -and _AIRBYTE_ACTIVE_ROW = 1 -{{ incremental_clause('_AIRBYTE_EMITTED_AT', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql deleted file mode 100644 index 13f4936015110..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql +++ /dev/null @@ -1,177 +0,0 @@ -{{ config( - cluster_by = ["_AIRBYTE_ACTIVE_ROW", "_AIRBYTE_UNIQUE_KEY_SCD", "_AIRBYTE_EMITTED_AT"], - unique_key = "_AIRBYTE_UNIQUE_KEY_SCD", - schema = "TEST_NORMALIZATION", - post_hook = [" - {% - set final_table_relation = adapter.get_relation( - database=this.database, - schema=this.schema, - identifier='DEDUP_EXCHANGE_RATE' - ) - %} - {# - If the final table doesn't exist, then obviously we can't delete anything from it. - Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) - So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) - #} - {% - if final_table_relation is not none and '_AIRBYTE_UNIQUE_KEY' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') - %} - -- Delete records which are no longer active: - -- This query is equivalent, but the left join version is more performant: - -- delete from final_table where unique_key in ( - -- select unique_key from scd_table where 1 = 1 - -- ) and unique_key not in ( - -- select unique_key from scd_table where active_row = 1 - -- ) - -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD - -- entries that were _updated_ recently. This is because a deleted record will have an SCD record - -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. - delete from {{ final_table_relation }} where {{ final_table_relation }}._AIRBYTE_UNIQUE_KEY in ( - select recent_records.unique_key - from ( - select distinct _AIRBYTE_UNIQUE_KEY as unique_key - from {{ this }} - where 1=1 {{ incremental_clause('_AIRBYTE_NORMALIZED_AT', this.schema + '.' + adapter.quote('DEDUP_EXCHANGE_RATE')) }} - ) recent_records - left join ( - select _AIRBYTE_UNIQUE_KEY as unique_key, count(_AIRBYTE_UNIQUE_KEY) as active_count - from {{ this }} - where _AIRBYTE_ACTIVE_ROW = 1 {{ incremental_clause('_AIRBYTE_NORMALIZED_AT', this.schema + '.' + adapter.quote('DEDUP_EXCHANGE_RATE')) }} - group by _AIRBYTE_UNIQUE_KEY - ) active_counts - on recent_records.unique_key = active_counts.unique_key - where active_count is null or active_count = 0 - ) - {% else %} - -- We have to have a non-empty query, so just do a noop delete - delete from {{ this }} where 1=0 - {% endif %} - ","drop view _AIRBYTE_TEST_NORMALIZATION.DEDUP_EXCHANGE_RATE_STG"], - tags = [ "top-level" ] -) }} --- depends_on: ref('DEDUP_EXCHANGE_RATE_STG') -with -{% if is_incremental() %} -new_data as ( - -- retrieve incremental "new" data - select - * - from {{ ref('DEDUP_EXCHANGE_RATE_STG') }} - -- DEDUP_EXCHANGE_RATE from {{ source('TEST_NORMALIZATION', '_AIRBYTE_RAW_DEDUP_EXCHANGE_RATE') }} - where 1 = 1 - {{ incremental_clause('_AIRBYTE_EMITTED_AT', this) }} -), -new_data_ids as ( - -- build a subset of _AIRBYTE_UNIQUE_KEY from rows that are new - select distinct - {{ dbt_utils.surrogate_key([ - 'ID', - 'CURRENCY', - 'NZD', - ]) }} as _AIRBYTE_UNIQUE_KEY - from new_data -), -empty_new_data as ( - -- build an empty table to only keep the table's column types - select * from new_data where 1 = 0 -), -previous_active_scd_data as ( - -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes - select - {{ star_intersect(ref('DEDUP_EXCHANGE_RATE_STG'), this, from_alias='inc_data', intersect_alias='this_data') }} - from {{ this }} as this_data - -- make a join with new_data using primary key to filter active data that need to be updated only - join new_data_ids on this_data._AIRBYTE_UNIQUE_KEY = new_data_ids._AIRBYTE_UNIQUE_KEY - -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) - left join empty_new_data as inc_data on this_data._AIRBYTE_AB_ID = inc_data._AIRBYTE_AB_ID - where _AIRBYTE_ACTIVE_ROW = 1 -), -input_data as ( - select {{ dbt_utils.star(ref('DEDUP_EXCHANGE_RATE_STG')) }} from new_data - union all - select {{ dbt_utils.star(ref('DEDUP_EXCHANGE_RATE_STG')) }} from previous_active_scd_data -), -{% else %} -input_data as ( - select * - from {{ ref('DEDUP_EXCHANGE_RATE_STG') }} - -- DEDUP_EXCHANGE_RATE from {{ source('TEST_NORMALIZATION', '_AIRBYTE_RAW_DEDUP_EXCHANGE_RATE') }} -), -{% endif %} -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - {{ dbt_utils.surrogate_key([ - 'ID', - 'CURRENCY', - 'NZD', - ]) }} as _AIRBYTE_UNIQUE_KEY, - ID, - CURRENCY, - DATE, - TIMESTAMP_COL, - {{ adapter.quote('HKD@spéçiäl & characters') }}, - HKD_SPECIAL___CHARACTERS, - NZD, - USD, - DATE as _AIRBYTE_START_AT, - lag(DATE) over ( - partition by ID, CURRENCY, cast(NZD as {{ dbt_utils.type_string() }}) - order by - DATE is null asc, - DATE desc, - _AIRBYTE_EMITTED_AT desc - ) as _AIRBYTE_END_AT, - case when row_number() over ( - partition by ID, CURRENCY, cast(NZD as {{ dbt_utils.type_string() }}) - order by - DATE is null asc, - DATE desc, - _AIRBYTE_EMITTED_AT desc - ) = 1 then 1 else 0 end as _AIRBYTE_ACTIVE_ROW, - _AIRBYTE_AB_ID, - _AIRBYTE_EMITTED_AT, - _AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID - from input_data -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by - _AIRBYTE_UNIQUE_KEY, - _AIRBYTE_START_AT, - _AIRBYTE_EMITTED_AT - order by _AIRBYTE_ACTIVE_ROW desc, _AIRBYTE_AB_ID - ) as _AIRBYTE_ROW_NUM, - {{ dbt_utils.surrogate_key([ - '_AIRBYTE_UNIQUE_KEY', - '_AIRBYTE_START_AT', - '_AIRBYTE_EMITTED_AT' - ]) }} as _AIRBYTE_UNIQUE_KEY_SCD, - scd_data.* - from scd_data -) -select - _AIRBYTE_UNIQUE_KEY, - _AIRBYTE_UNIQUE_KEY_SCD, - ID, - CURRENCY, - DATE, - TIMESTAMP_COL, - {{ adapter.quote('HKD@spéçiäl & characters') }}, - HKD_SPECIAL___CHARACTERS, - NZD, - USD, - _AIRBYTE_START_AT, - _AIRBYTE_END_AT, - _AIRBYTE_ACTIVE_ROW, - _AIRBYTE_AB_ID, - _AIRBYTE_EMITTED_AT, - {{ current_timestamp() }} as _AIRBYTE_NORMALIZED_AT, - _AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID -from dedup_data where _AIRBYTE_ROW_NUM = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_tables/TEST_NORMALIZATION/EXCHANGE_RATE.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_tables/TEST_NORMALIZATION/EXCHANGE_RATE.sql deleted file mode 100644 index 6b42adb3962da..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_tables/TEST_NORMALIZATION/EXCHANGE_RATE.sql +++ /dev/null @@ -1,30 +0,0 @@ -{{ config( - cluster_by = ["_AIRBYTE_EMITTED_AT"], - unique_key = '_AIRBYTE_AB_ID', - schema = "TEST_NORMALIZATION", - tags = [ "top-level" ] -) }} --- Final base SQL model --- depends_on: {{ ref('EXCHANGE_RATE_AB3') }} -select - ID, - CURRENCY, - DATE, - TIMESTAMP_COL, - {{ adapter.quote('HKD@spéçiäl & characters') }}, - HKD_SPECIAL___CHARACTERS, - NZD, - USD, - {{ adapter.quote('column`_\'with""_quotes') }}, - DATETIME_TZ, - DATETIME_NO_TZ, - TIME_TZ, - TIME_NO_TZ, - _AIRBYTE_AB_ID, - _AIRBYTE_EMITTED_AT, - {{ current_timestamp() }} as _AIRBYTE_NORMALIZED_AT, - _AIRBYTE_EXCHANGE_RATE_HASHID -from {{ ref('EXCHANGE_RATE_AB3') }} --- EXCHANGE_RATE from {{ source('TEST_NORMALIZATION', '_AIRBYTE_RAW_EXCHANGE_RATE') }} -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_views/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_STG.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_views/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_STG.sql deleted file mode 100644 index d810a79652be6..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_views/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_STG.sql +++ /dev/null @@ -1,25 +0,0 @@ -{{ config( - cluster_by = ["_AIRBYTE_EMITTED_AT"], - unique_key = '_AIRBYTE_AB_ID', - schema = "_AIRBYTE_TEST_NORMALIZATION", - tags = [ "top-level-intermediate" ] -) }} --- SQL model to build a hash column based on the values of this record --- depends_on: {{ ref('DEDUP_EXCHANGE_RATE_AB2') }} -select - {{ dbt_utils.surrogate_key([ - 'ID', - 'CURRENCY', - 'DATE', - 'TIMESTAMP_COL', - adapter.quote('HKD@spéçiäl & characters'), - 'HKD_SPECIAL___CHARACTERS', - 'NZD', - 'USD', - ]) }} as _AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID, - tmp.* -from {{ ref('DEDUP_EXCHANGE_RATE_AB2') }} tmp --- DEDUP_EXCHANGE_RATE -where 1 = 1 -{{ incremental_clause('_AIRBYTE_EMITTED_AT', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/sources.yml deleted file mode 100644 index 2932fe914c6c3..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/sources.yml +++ /dev/null @@ -1,16 +0,0 @@ -version: 2 -sources: -- name: TEST_NORMALIZATION - quoting: - database: true - schema: false - identifier: false - tables: - - name: _AIRBYTE_RAW_1_PREFIX_STARTWITH_NUMBER - - name: _AIRBYTE_RAW_DEDUP_CDC_EXCLUDED - - name: _AIRBYTE_RAW_DEDUP_EXCHANGE_RATE - - name: _AIRBYTE_RAW_EXCHANGE_RATE - - name: _AIRBYTE_RAW_MULTIPLE_COLUMN_NAMES_CONFLICTS - - name: _AIRBYTE_RAW_POS_DEDUP_CDCX - - name: _AIRBYTE_RAW_RENAMED_DEDUP_CDC_EXCLUDED - - name: _AIRBYTE_RAW_TYPES_TESTING diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/second_output/airbyte_incremental/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/second_output/airbyte_incremental/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE.sql deleted file mode 100644 index 347a356730944..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/second_output/airbyte_incremental/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE.sql +++ /dev/null @@ -1,26 +0,0 @@ -begin; - - - - - - - - merge into "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."DEDUP_EXCHANGE_RATE" as DBT_INTERNAL_DEST - using "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."DEDUP_EXCHANGE_RATE__dbt_tmp" as DBT_INTERNAL_SOURCE - on - DBT_INTERNAL_SOURCE._AIRBYTE_UNIQUE_KEY = DBT_INTERNAL_DEST._AIRBYTE_UNIQUE_KEY - - - - when matched then update set - "_AIRBYTE_UNIQUE_KEY" = DBT_INTERNAL_SOURCE."_AIRBYTE_UNIQUE_KEY","ID" = DBT_INTERNAL_SOURCE."ID","CURRENCY" = DBT_INTERNAL_SOURCE."CURRENCY","DATE" = DBT_INTERNAL_SOURCE."DATE","TIMESTAMP_COL" = DBT_INTERNAL_SOURCE."TIMESTAMP_COL","HKD@spéçiäl & characters" = DBT_INTERNAL_SOURCE."HKD@spéçiäl & characters","HKD_SPECIAL___CHARACTERS" = DBT_INTERNAL_SOURCE."HKD_SPECIAL___CHARACTERS","NZD" = DBT_INTERNAL_SOURCE."NZD","USD" = DBT_INTERNAL_SOURCE."USD","_AIRBYTE_AB_ID" = DBT_INTERNAL_SOURCE."_AIRBYTE_AB_ID","_AIRBYTE_EMITTED_AT" = DBT_INTERNAL_SOURCE."_AIRBYTE_EMITTED_AT","_AIRBYTE_NORMALIZED_AT" = DBT_INTERNAL_SOURCE."_AIRBYTE_NORMALIZED_AT","_AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID" = DBT_INTERNAL_SOURCE."_AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID" - - - when not matched then insert - ("_AIRBYTE_UNIQUE_KEY", "ID", "CURRENCY", "DATE", "TIMESTAMP_COL", "HKD@spéçiäl & characters", "HKD_SPECIAL___CHARACTERS", "NZD", "USD", "_AIRBYTE_AB_ID", "_AIRBYTE_EMITTED_AT", "_AIRBYTE_NORMALIZED_AT", "_AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID") - values - ("_AIRBYTE_UNIQUE_KEY", "ID", "CURRENCY", "DATE", "TIMESTAMP_COL", "HKD@spéçiäl & characters", "HKD_SPECIAL___CHARACTERS", "NZD", "USD", "_AIRBYTE_AB_ID", "_AIRBYTE_EMITTED_AT", "_AIRBYTE_NORMALIZED_AT", "_AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID") - -; - commit; \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/second_output/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/second_output/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql deleted file mode 100644 index 7323186545749..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/second_output/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql +++ /dev/null @@ -1,26 +0,0 @@ -begin; - - - - - - - - merge into "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."DEDUP_EXCHANGE_RATE_SCD" as DBT_INTERNAL_DEST - using "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."DEDUP_EXCHANGE_RATE_SCD__dbt_tmp" as DBT_INTERNAL_SOURCE - on - DBT_INTERNAL_SOURCE._AIRBYTE_UNIQUE_KEY_SCD = DBT_INTERNAL_DEST._AIRBYTE_UNIQUE_KEY_SCD - - - - when matched then update set - "_AIRBYTE_UNIQUE_KEY" = DBT_INTERNAL_SOURCE."_AIRBYTE_UNIQUE_KEY","_AIRBYTE_UNIQUE_KEY_SCD" = DBT_INTERNAL_SOURCE."_AIRBYTE_UNIQUE_KEY_SCD","ID" = DBT_INTERNAL_SOURCE."ID","CURRENCY" = DBT_INTERNAL_SOURCE."CURRENCY","DATE" = DBT_INTERNAL_SOURCE."DATE","TIMESTAMP_COL" = DBT_INTERNAL_SOURCE."TIMESTAMP_COL","HKD@spéçiäl & characters" = DBT_INTERNAL_SOURCE."HKD@spéçiäl & characters","HKD_SPECIAL___CHARACTERS" = DBT_INTERNAL_SOURCE."HKD_SPECIAL___CHARACTERS","NZD" = DBT_INTERNAL_SOURCE."NZD","USD" = DBT_INTERNAL_SOURCE."USD","_AIRBYTE_START_AT" = DBT_INTERNAL_SOURCE."_AIRBYTE_START_AT","_AIRBYTE_END_AT" = DBT_INTERNAL_SOURCE."_AIRBYTE_END_AT","_AIRBYTE_ACTIVE_ROW" = DBT_INTERNAL_SOURCE."_AIRBYTE_ACTIVE_ROW","_AIRBYTE_AB_ID" = DBT_INTERNAL_SOURCE."_AIRBYTE_AB_ID","_AIRBYTE_EMITTED_AT" = DBT_INTERNAL_SOURCE."_AIRBYTE_EMITTED_AT","_AIRBYTE_NORMALIZED_AT" = DBT_INTERNAL_SOURCE."_AIRBYTE_NORMALIZED_AT","_AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID" = DBT_INTERNAL_SOURCE."_AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID" - - - when not matched then insert - ("_AIRBYTE_UNIQUE_KEY", "_AIRBYTE_UNIQUE_KEY_SCD", "ID", "CURRENCY", "DATE", "TIMESTAMP_COL", "HKD@spéçiäl & characters", "HKD_SPECIAL___CHARACTERS", "NZD", "USD", "_AIRBYTE_START_AT", "_AIRBYTE_END_AT", "_AIRBYTE_ACTIVE_ROW", "_AIRBYTE_AB_ID", "_AIRBYTE_EMITTED_AT", "_AIRBYTE_NORMALIZED_AT", "_AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID") - values - ("_AIRBYTE_UNIQUE_KEY", "_AIRBYTE_UNIQUE_KEY_SCD", "ID", "CURRENCY", "DATE", "TIMESTAMP_COL", "HKD@spéçiäl & characters", "HKD_SPECIAL___CHARACTERS", "NZD", "USD", "_AIRBYTE_START_AT", "_AIRBYTE_END_AT", "_AIRBYTE_ACTIVE_ROW", "_AIRBYTE_AB_ID", "_AIRBYTE_EMITTED_AT", "_AIRBYTE_NORMALIZED_AT", "_AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID") - -; - commit; \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/second_output/airbyte_tables/TEST_NORMALIZATION/EXCHANGE_RATE.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/second_output/airbyte_tables/TEST_NORMALIZATION/EXCHANGE_RATE.sql deleted file mode 100644 index e35addfdeb762..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/second_output/airbyte_tables/TEST_NORMALIZATION/EXCHANGE_RATE.sql +++ /dev/null @@ -1,159 +0,0 @@ - - - create or replace table "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."EXCHANGE_RATE" as - (select * from( - -with __dbt__cte__EXCHANGE_RATE_AB1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION._AIRBYTE_RAW_EXCHANGE_RATE -select - to_varchar(get_path(parse_json(_airbyte_data), '"id"')) as ID, - to_varchar(get_path(parse_json(_airbyte_data), '"currency"')) as CURRENCY, - to_varchar(get_path(parse_json(_airbyte_data), '"date"')) as DATE, - to_varchar(get_path(parse_json(_airbyte_data), '"timestamp_col"')) as TIMESTAMP_COL, - to_varchar(get_path(parse_json(_airbyte_data), '"HKD@spéçiäl & characters"')) as "HKD@spéçiäl & characters", - to_varchar(get_path(parse_json(_airbyte_data), '"HKD_special___characters"')) as HKD_SPECIAL___CHARACTERS, - to_varchar(get_path(parse_json(_airbyte_data), '"NZD"')) as NZD, - to_varchar(get_path(parse_json(_airbyte_data), '"USD"')) as USD, - to_varchar(get_path(parse_json(_airbyte_data), '"column`_''with""_quotes"')) as "column`_'with""_quotes", - to_varchar(get_path(parse_json(_airbyte_data), '"datetime_tz"')) as DATETIME_TZ, - to_varchar(get_path(parse_json(_airbyte_data), '"datetime_no_tz"')) as DATETIME_NO_TZ, - to_varchar(get_path(parse_json(_airbyte_data), '"time_tz"')) as TIME_TZ, - to_varchar(get_path(parse_json(_airbyte_data), '"time_no_tz"')) as TIME_NO_TZ, - _AIRBYTE_AB_ID, - _AIRBYTE_EMITTED_AT, - convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT -from "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION._AIRBYTE_RAW_EXCHANGE_RATE as table_alias --- EXCHANGE_RATE -where 1 = 1 -), __dbt__cte__EXCHANGE_RATE_AB2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__EXCHANGE_RATE_AB1 -select - cast(ID as - bigint -) as ID, - cast(CURRENCY as - varchar -) as CURRENCY, - cast(nullif(DATE, '') as - date -) as DATE, - case - when TIMESTAMP_COL regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}(\\+|-)\\d{4}' then to_timestamp_tz(TIMESTAMP_COL, 'YYYY-MM-DDTHH24:MI:SSTZHTZM') - when TIMESTAMP_COL regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}(\\+|-)\\d{2}' then to_timestamp_tz(TIMESTAMP_COL, 'YYYY-MM-DDTHH24:MI:SSTZH') - when TIMESTAMP_COL regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}\\.\\d{1,7}(\\+|-)\\d{4}' then to_timestamp_tz(TIMESTAMP_COL, 'YYYY-MM-DDTHH24:MI:SS.FFTZHTZM') - when TIMESTAMP_COL regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}\\.\\d{1,7}(\\+|-)\\d{2}' then to_timestamp_tz(TIMESTAMP_COL, 'YYYY-MM-DDTHH24:MI:SS.FFTZH') - when TIMESTAMP_COL = '' then NULL - else to_timestamp_tz(TIMESTAMP_COL) - end as TIMESTAMP_COL - , - cast("HKD@spéçiäl & characters" as - float -) as "HKD@spéçiäl & characters", - cast(HKD_SPECIAL___CHARACTERS as - varchar -) as HKD_SPECIAL___CHARACTERS, - cast(NZD as - float -) as NZD, - cast(USD as - float -) as USD, - cast("column`_'with""_quotes" as - varchar -) as "column`_'with""_quotes", - case - when DATETIME_TZ regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}(\\+|-)\\d{4}' then to_timestamp_tz(DATETIME_TZ, 'YYYY-MM-DDTHH24:MI:SSTZHTZM') - when DATETIME_TZ regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}(\\+|-)\\d{2}' then to_timestamp_tz(DATETIME_TZ, 'YYYY-MM-DDTHH24:MI:SSTZH') - when DATETIME_TZ regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}\\.\\d{1,7}(\\+|-)\\d{4}' then to_timestamp_tz(DATETIME_TZ, 'YYYY-MM-DDTHH24:MI:SS.FFTZHTZM') - when DATETIME_TZ regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}\\.\\d{1,7}(\\+|-)\\d{2}' then to_timestamp_tz(DATETIME_TZ, 'YYYY-MM-DDTHH24:MI:SS.FFTZH') - when DATETIME_TZ = '' then NULL - else to_timestamp_tz(DATETIME_TZ) - end as DATETIME_TZ - , - case - when DATETIME_NO_TZ regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}' then to_timestamp(DATETIME_NO_TZ, 'YYYY-MM-DDTHH24:MI:SS') - when DATETIME_NO_TZ regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}\\.\\d{1,7}' then to_timestamp(DATETIME_NO_TZ, 'YYYY-MM-DDTHH24:MI:SS.FF') - when DATETIME_NO_TZ = '' then NULL - else to_timestamp(DATETIME_NO_TZ) - end as DATETIME_NO_TZ - , - cast(nullif(TIME_TZ, '') as - varchar -) as TIME_TZ, - cast(nullif(TIME_NO_TZ, '') as - time -) as TIME_NO_TZ, - _AIRBYTE_AB_ID, - _AIRBYTE_EMITTED_AT, - convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT -from __dbt__cte__EXCHANGE_RATE_AB1 --- EXCHANGE_RATE -where 1 = 1 -), __dbt__cte__EXCHANGE_RATE_AB3 as ( - --- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__EXCHANGE_RATE_AB2 -select - md5(cast(coalesce(cast(ID as - varchar -), '') || '-' || coalesce(cast(CURRENCY as - varchar -), '') || '-' || coalesce(cast(DATE as - varchar -), '') || '-' || coalesce(cast(TIMESTAMP_COL as - varchar -), '') || '-' || coalesce(cast("HKD@spéçiäl & characters" as - varchar -), '') || '-' || coalesce(cast(HKD_SPECIAL___CHARACTERS as - varchar -), '') || '-' || coalesce(cast(NZD as - varchar -), '') || '-' || coalesce(cast(USD as - varchar -), '') || '-' || coalesce(cast("column`_'with""_quotes" as - varchar -), '') || '-' || coalesce(cast(DATETIME_TZ as - varchar -), '') || '-' || coalesce(cast(DATETIME_NO_TZ as - varchar -), '') || '-' || coalesce(cast(TIME_TZ as - varchar -), '') || '-' || coalesce(cast(TIME_NO_TZ as - varchar -), '') as - varchar -)) as _AIRBYTE_EXCHANGE_RATE_HASHID, - tmp.* -from __dbt__cte__EXCHANGE_RATE_AB2 tmp --- EXCHANGE_RATE -where 1 = 1 -)-- Final base SQL model --- depends_on: __dbt__cte__EXCHANGE_RATE_AB3 -select - ID, - CURRENCY, - DATE, - TIMESTAMP_COL, - "HKD@spéçiäl & characters", - HKD_SPECIAL___CHARACTERS, - NZD, - USD, - "column`_'with""_quotes", - DATETIME_TZ, - DATETIME_NO_TZ, - TIME_TZ, - TIME_NO_TZ, - _AIRBYTE_AB_ID, - _AIRBYTE_EMITTED_AT, - convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT, - _AIRBYTE_EXCHANGE_RATE_HASHID -from __dbt__cte__EXCHANGE_RATE_AB3 --- EXCHANGE_RATE from "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION._AIRBYTE_RAW_EXCHANGE_RATE -where 1 = 1 - ) order by (_AIRBYTE_EMITTED_AT) - ); - alter table "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."EXCHANGE_RATE" cluster by (_AIRBYTE_EMITTED_AT); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/second_output/airbyte_views/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_STG.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/second_output/airbyte_views/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_STG.sql deleted file mode 100644 index e91864477ee70..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/second_output/airbyte_views/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_STG.sql +++ /dev/null @@ -1,95 +0,0 @@ - - create or replace view "INTEGRATION_TEST_NORMALIZATION"._AIRBYTE_TEST_NORMALIZATION."DEDUP_EXCHANGE_RATE_STG" - - as ( - -with __dbt__cte__DEDUP_EXCHANGE_RATE_AB1 as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION._AIRBYTE_RAW_DEDUP_EXCHANGE_RATE -select - to_varchar(get_path(parse_json(_airbyte_data), '"id"')) as ID, - to_varchar(get_path(parse_json(_airbyte_data), '"currency"')) as CURRENCY, - to_varchar(get_path(parse_json(_airbyte_data), '"date"')) as DATE, - to_varchar(get_path(parse_json(_airbyte_data), '"timestamp_col"')) as TIMESTAMP_COL, - to_varchar(get_path(parse_json(_airbyte_data), '"HKD@spéçiäl & characters"')) as "HKD@spéçiäl & characters", - to_varchar(get_path(parse_json(_airbyte_data), '"HKD_special___characters"')) as HKD_SPECIAL___CHARACTERS, - to_varchar(get_path(parse_json(_airbyte_data), '"NZD"')) as NZD, - to_varchar(get_path(parse_json(_airbyte_data), '"USD"')) as USD, - _AIRBYTE_AB_ID, - _AIRBYTE_EMITTED_AT, - convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT -from "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION._AIRBYTE_RAW_DEDUP_EXCHANGE_RATE as table_alias --- DEDUP_EXCHANGE_RATE -where 1 = 1 - -), __dbt__cte__DEDUP_EXCHANGE_RATE_AB2 as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: __dbt__cte__DEDUP_EXCHANGE_RATE_AB1 -select - cast(ID as - bigint -) as ID, - cast(CURRENCY as - varchar -) as CURRENCY, - cast(nullif(DATE, '') as - date -) as DATE, - case - when TIMESTAMP_COL regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}(\\+|-)\\d{4}' then to_timestamp_tz(TIMESTAMP_COL, 'YYYY-MM-DDTHH24:MI:SSTZHTZM') - when TIMESTAMP_COL regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}(\\+|-)\\d{2}' then to_timestamp_tz(TIMESTAMP_COL, 'YYYY-MM-DDTHH24:MI:SSTZH') - when TIMESTAMP_COL regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}\\.\\d{1,7}(\\+|-)\\d{4}' then to_timestamp_tz(TIMESTAMP_COL, 'YYYY-MM-DDTHH24:MI:SS.FFTZHTZM') - when TIMESTAMP_COL regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}\\.\\d{1,7}(\\+|-)\\d{2}' then to_timestamp_tz(TIMESTAMP_COL, 'YYYY-MM-DDTHH24:MI:SS.FFTZH') - when TIMESTAMP_COL = '' then NULL - else to_timestamp_tz(TIMESTAMP_COL) - end as TIMESTAMP_COL - , - cast("HKD@spéçiäl & characters" as - float -) as "HKD@spéçiäl & characters", - cast(HKD_SPECIAL___CHARACTERS as - varchar -) as HKD_SPECIAL___CHARACTERS, - cast(NZD as - float -) as NZD, - cast(USD as - float -) as USD, - _AIRBYTE_AB_ID, - _AIRBYTE_EMITTED_AT, - convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT -from __dbt__cte__DEDUP_EXCHANGE_RATE_AB1 --- DEDUP_EXCHANGE_RATE -where 1 = 1 - -)-- SQL model to build a hash column based on the values of this record --- depends_on: __dbt__cte__DEDUP_EXCHANGE_RATE_AB2 -select - md5(cast(coalesce(cast(ID as - varchar -), '') || '-' || coalesce(cast(CURRENCY as - varchar -), '') || '-' || coalesce(cast(DATE as - varchar -), '') || '-' || coalesce(cast(TIMESTAMP_COL as - varchar -), '') || '-' || coalesce(cast("HKD@spéçiäl & characters" as - varchar -), '') || '-' || coalesce(cast(HKD_SPECIAL___CHARACTERS as - varchar -), '') || '-' || coalesce(cast(NZD as - varchar -), '') || '-' || coalesce(cast(USD as - varchar -), '') as - varchar -)) as _AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID, - tmp.* -from __dbt__cte__DEDUP_EXCHANGE_RATE_AB2 tmp --- DEDUP_EXCHANGE_RATE -where 1 = 1 - - ); diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/README.md b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/README.md deleted file mode 100644 index 470ec8ed70091..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/README.md +++ /dev/null @@ -1,27 +0,0 @@ -# test_nested_streams - -The stream `nested_stream_with_complex_columns_resulting_into_long_names` is testing primary key definition on a stream -with nested fields with different complex types: - -- nested object -- nested array -- nested array of array - -# Stream names collisions - -The following three streams are purposely named with very long descriptions to break postgres 64 characters limits: -(even if they are set in different schemas) - -- `test_normalization_nested_stream_with_complex_columns_resulting_into_long_names` -- `test_normalization_non_nested_stream_without_namespace_resulting_into_long_names` -- `test_normalization_namespace_simple_stream_with_namespace_resulting_into_long_names` - -which could all be truncated into: - -- `test_normalization_n__lting_into_long_names` - -Resulting into collisions... - -# Stream name conflicts - -`conflict_stream_name_*` tables and `unnest_alias` are testing naming conflicts between stream and columns names when combined with nesting diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/data_input/catalog.json b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/data_input/catalog.json deleted file mode 100644 index 4e5105f136e09..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/data_input/catalog.json +++ /dev/null @@ -1,311 +0,0 @@ -{ - "streams": [ - { - "stream": { - "name": "nested_stream_with_complex_columns_resulting_into_long_names", - "json_schema": { - "type": ["null", "object"], - "properties": { - "id": { - "type": ["null", "number", "string"] - }, - "date": { - "type": ["null", "string"] - }, - "partition": { - "type": ["null", "object"], - "properties": { - "double_array_data": { - "type": ["null", "array"], - "items": { - "type": ["null", "array"], - "items": { - "properties": { - "id": { - "type": ["null", "string"] - } - } - } - } - }, - "DATA": { - "type": ["null", "array"], - "items": { - "properties": { - "currency": { - "type": ["null", "string"] - } - } - } - } - } - } - } - }, - "supported_sync_modes": ["incremental"], - "source_defined_cursor": true, - "default_cursor_field": [] - }, - "sync_mode": "incremental", - "cursor_field": ["date"], - "destination_sync_mode": "append_dedup", - "primary_key": [["id"]] - }, - { - "stream": { - "name": "non_nested_stream_without_namespace_resulting_into_long_names", - "json_schema": { - "type": ["null", "object"], - "properties": { - "id": { - "type": ["null", "number", "string"] - }, - "date": { - "type": ["null", "string"] - } - } - }, - "supported_sync_modes": ["full_refresh"], - "source_defined_cursor": true, - "default_cursor_field": [] - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite", - "primary_key": [] - }, - { - "stream": { - "name": "some_stream_that_was_empty", - "json_schema": { - "type": ["null", "object"], - "properties": { - "id": { - "type": ["null", "number", "string"] - }, - "date": { - "type": ["null", "string"] - } - } - }, - "supported_sync_modes": ["full_refresh", "incremental"], - "source_defined_cursor": false, - "default_cursor_field": [] - }, - "sync_mode": "incremental", - "cursor_field": ["date"], - "destination_sync_mode": "append_dedup", - "primary_key": [["id"]] - }, - { - "stream": { - "name": "simple_stream_with_namespace_resulting_into_long_names", - "namespace": "test_normalization_namespace", - "json_schema": { - "type": ["null", "object"], - "properties": { - "id": { - "type": ["null", "number", "string"] - }, - "date": { - "type": ["null", "string"] - } - } - }, - "supported_sync_modes": ["full_refresh"], - "source_defined_cursor": true, - "default_cursor_field": [] - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "append", - "primary_key": [] - }, - { - "stream": { - "name": "conflict_stream_name", - "json_schema": { - "type": ["null", "object"], - "properties": { - "id": { - "type": ["null", "number", "string"] - }, - "conflict_stream_name": { - "type": ["null", "object"], - "properties": { - "conflict_stream_name": { - "type": "object", - "items": { - "type": "object", - "properties": { - "groups": { - "type": "string" - } - }, - "custom_fields": { - "items": { - "properties": { - "id": { - "type": ["null", "integer"] - }, - "value": {} - }, - "type": ["null", "object"] - }, - "type": ["null", "array"] - }, - "conflict_stream_name": { - "type": "integer" - } - } - } - } - } - } - }, - "supported_sync_modes": ["full_refresh"], - "source_defined_cursor": true, - "default_cursor_field": [] - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite", - "primary_key": [] - }, - { - "stream": { - "name": "conflict_stream_scalar", - "json_schema": { - "type": ["null", "object"], - "properties": { - "id": { - "type": ["null", "number", "string"] - }, - "conflict_stream_scalar": { - "type": "integer" - } - } - }, - "supported_sync_modes": ["full_refresh"], - "source_defined_cursor": true, - "default_cursor_field": [] - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite", - "primary_key": [] - }, - { - "stream": { - "name": "conflict_stream_array", - "json_schema": { - "type": ["null", "object"], - "properties": { - "id": { - "type": ["null", "number", "string"] - }, - "conflict_stream_array": { - "type": ["null", "array"], - "properties": { - "conflict_stream_name": { - "type": ["null", "array"], - "items": { - "properties": { - "id": { - "type": ["null", "integer"] - } - } - } - } - } - } - } - }, - "supported_sync_modes": ["full_refresh"], - "source_defined_cursor": true, - "default_cursor_field": [] - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite", - "primary_key": [] - }, - { - "stream": { - "name": "unnest_alias", - "json_schema": { - "type": ["null", "object"], - "properties": { - "id": { - "type": "integer" - }, - "children": { - "type": ["null", "array"], - "items": { - "type": "object", - "properties": { - "ab_id": { - "type": ["null", "integer"] - }, - "owner": { - "type": ["null", "object"], - "properties": { - "owner_id": { - "type": ["null", "integer"] - }, - "column`_'with\"_quotes": { - "type": ["null", "array"], - "items": { - "properties": { - "currency": { - "type": ["null", "string"] - } - } - } - } - } - } - } - } - } - } - }, - "supported_sync_modes": ["incremental"], - "source_defined_cursor": true, - "default_cursor_field": [] - }, - "sync_mode": "incremental", - "cursor_field": [], - "destination_sync_mode": "overwrite" - }, - { - "stream": { - "name": "arrays", - "json_schema": { - "type": ["null", "object"], - "properties": { - "array_of_strings": { - "type": ["null", "array"], - "items": { - "type": ["null", "string"] - } - }, - "nested_array_parent": { - "type": ["null", "object"], - "properties": { - "nested_array": { - "type": ["null", "array"], - "items": { - "type": ["null", "string"] - } - } - } - } - } - }, - "supported_sync_modes": ["full_refresh"], - "source_defined_cursor": true, - "default_cursor_field": [] - }, - "sync_mode": "full_refresh", - "cursor_field": [], - "destination_sync_mode": "overwrite", - "primary_key": [] - } - ] -} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/data_input/messages.txt b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/data_input/messages.txt deleted file mode 100644 index e349c09afc31b..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/data_input/messages.txt +++ /dev/null @@ -1,18 +0,0 @@ -{"type": "RECORD", "record": {"stream": "nested_stream_with_complex_columns_resulting_into_long_names", "emitted_at": 1602638599000, "data": { "id": 4.2, "date": "2020-08-29T00:00:00Z", "partition": { "double_array_data": [[ { "id": "EUR" } ]], "DATA": [ {"currency": "EUR" } ], "column`_'with\"_quotes": [ {"currency": "EUR" } ] } }}} -{"type": "RECORD", "record": {"stream": "nested_stream_with_complex_columns_resulting_into_long_names", "emitted_at": 1602638599100, "data": { "id": "test record", "date": "2020-08-31T00:00:00Z", "partition": { "double_array_data": [[ { "id": "USD" } ], [ { "id": "GBP" } ]], "DATA": [ {"currency": "EUR" } ], "column`_'with\"_quotes": [ {"currency": "EUR" } ] } }}} - -{"type":"RECORD","record":{"stream":"conflict_stream_name","data":{"id":1,"conflict_stream_name":{"conflict_stream_name": {"groups": "1", "custom_fields": [{"id":1, "value":3}, {"id":2, "value":4}], "conflict_stream_name": 3}}},"emitted_at":1623861660}} -{"type":"RECORD","record":{"stream":"conflict_stream_name","data":{"id":2,"conflict_stream_name":{"conflict_stream_name": {"groups": "2", "custom_fields": [{"id":1, "value":3}, {"id":2, "value":4}], "conflict_stream_name": 3}}},"emitted_at":1623861660}} - -{"type":"RECORD","record":{"stream":"conflict_stream_scalar","data":{"id":1,"conflict_stream_scalar": 2},"emitted_at":1623861660}} -{"type":"RECORD","record":{"stream":"conflict_stream_scalar","data":{"id":2,"conflict_stream_scalar": 2},"emitted_at":1623861660}} - -{"type":"RECORD","record":{"stream":"conflict_stream_array","data":{"id":1, "conflict_stream_array": {"conflict_stream_array": [{"id": 1}, {"id": 2}, {"id": 3}]}}, "emitted_at":1623861660}} -{"type":"RECORD","record":{"stream":"conflict_stream_array","data":{"id":2, "conflict_stream_array": {"conflict_stream_array": [{"id": 4}, {"id": 5}, {"id": 6}]}}, "emitted_at":1623861860}} - -{"type":"RECORD","record":{"stream":"conflict_stream_scalar","data":{"id":1,"conflict_stream_scalar": 2},"emitted_at":1623861660}} -{"type":"RECORD","record":{"stream":"conflict_stream_scalar","data":{"id":2,"conflict_stream_scalar": 2},"emitted_at":1623861660}} - -{"type":"RECORD","record":{"stream":"unnest_alias","data":{"id":1, "children": [{"ab_id": 1, "owner": {"owner_id": 1, "column`_'with\"_quotes": [ {"currency": "EUR" } ]}},{"ab_id": 2, "owner": {"owner_id": 2, "column`_'with\"_quotes": [ {"currency": "EUR" } ]}}]},"emitted_at":1623861660}} -{"type":"RECORD","record":{"stream":"unnest_alias","data":{"id":2, "children": [{"ab_id": 3, "owner": {"owner_id": 3, "column`_'with\"_quotes": [ {"currency": "EUR" } ]}},{"ab_id": 4, "owner": {"owner_id": 4, "column`_'with\"_quotes": [ {"currency": "EUR" } ]}}]},"emitted_at":1623861660}} -{"type":"RECORD","record":{"stream":"arrays","emitted_at":1602638599000,"data":{"array_of_strings":["string1",null,"string2","string3"],"nested_array_parent":{"nested_array":["string1",null,"string2"]}}}} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/data_input/messages_incremental.txt b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/data_input/messages_incremental.txt deleted file mode 100644 index ae1cf0f5c0b4e..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/data_input/messages_incremental.txt +++ /dev/null @@ -1,22 +0,0 @@ -{"type": "RECORD", "record": {"stream": "nested_stream_with_complex_columns_resulting_into_long_names", "emitted_at": 1602638599000, "data": { "id": 4.2, "date": "2020-08-29T00:00:00Z", "partition": { "double_array_data": [[ { "id": "EUR" } ]], "DATA": [ {"currency": "EUR" } ], "column`_'with\"_quotes": [ {"currency": "EUR" } ] } }}} -{"type": "RECORD", "record": {"stream": "nested_stream_with_complex_columns_resulting_into_long_names", "emitted_at": 1602638599100, "data": { "id": "test record", "date": "2020-08-31T00:00:00Z", "partition": { "double_array_data": [[ { "id": "USD" } ], [ { "id": "GBP" } ]], "DATA": [ {"currency": "EUR" } ], "column`_'with\"_quotes": [ {"currency": "EUR" } ] } }}} -{"type": "RECORD", "record": {"stream": "nested_stream_with_complex_columns_resulting_into_long_names", "emitted_at": 1602638600000, "data": { "id": "new record", "date": "2020-09-10T00:00:00Z", "partition": { "double_array_data": [[ { "id": "GBP" } ], [ { "id": "HKD" } ]], "DATA": [ {"currency": "EUR" } ], "column`_'with\"_quotes": [ {"currency": "EUR" } ] } }}} - -{"type":"RECORD","record":{"stream":"conflict_stream_name","data":{"id":1,"conflict_stream_name":{"conflict_stream_name": {"groups": "1", "custom_fields": [{"id":1, "value":3}, {"id":2, "value":4}], "conflict_stream_name": 3}}},"emitted_at":1623861660}} -{"type":"RECORD","record":{"stream":"conflict_stream_name","data":{"id":2,"conflict_stream_name":{"conflict_stream_name": {"groups": "2", "custom_fields": [{"id":1, "value":3}, {"id":2, "value":4}], "conflict_stream_name": 3}}},"emitted_at":1623861660}} - -{"type":"RECORD","record":{"stream":"conflict_stream_scalar","data":{"id":1,"conflict_stream_scalar": 2},"emitted_at":1623861660}} -{"type":"RECORD","record":{"stream":"conflict_stream_scalar","data":{"id":2,"conflict_stream_scalar": 2},"emitted_at":1623861660}} - -{"type":"RECORD","record":{"stream":"conflict_stream_array","data":{"id":1, "conflict_stream_array": {"conflict_stream_array": [{"id": 1}, {"id": 2}, {"id": 3}]}}, "emitted_at":1623861660}} -{"type":"RECORD","record":{"stream":"conflict_stream_array","data":{"id":2, "conflict_stream_array": {"conflict_stream_array": [{"id": 4}, {"id": 5}, {"id": 6}]}}, "emitted_at":1623861860}} - -{"type":"RECORD","record":{"stream":"conflict_stream_scalar","data":{"id":1,"conflict_stream_scalar": 2},"emitted_at":1623861660}} -{"type":"RECORD","record":{"stream":"conflict_stream_scalar","data":{"id":2,"conflict_stream_scalar": 2},"emitted_at":1623861660}} - -{"type":"RECORD","record":{"stream":"unnest_alias","data":{"id":1, "children": [{"ab_id": 1, "owner": {"owner_id": 1, "column`_'with\"_quotes": [ {"currency": "EUR" } ]}},{"ab_id": 2, "owner": {"owner_id": 2, "column`_'with\"_quotes": [ {"currency": "EUR" } ]}}]},"emitted_at":1623861660}} -{"type":"RECORD","record":{"stream":"unnest_alias","data":{"id":2, "children": [{"ab_id": 3, "owner": {"owner_id": 3, "column`_'with\"_quotes": [ {"currency": "EUR" } ]}},{"ab_id": 4, "owner": {"owner_id": 4, "column`_'with\"_quotes": [ {"currency": "EUR" } ]}}]},"emitted_at":1623861660}} - -{"type":"RECORD","record":{"stream":"some_stream_that_was_empty","data":{"id":1,"date": "2020-11-05"},"emitted_at":1623871660}} -{"type":"RECORD","record":{"stream":"some_stream_that_was_empty","data":{"id":2,"date": "2020-11-06"},"emitted_at":1623872660}} -{"type":"RECORD","record":{"stream":"some_stream_that_was_empty","data":{"id":3,"date": "2020-11-06"},"emitted_at":1623873660}} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/data_input/replace_identifiers.json b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/data_input/replace_identifiers.json deleted file mode 100644 index 0c2197f2d759c..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/data_input/replace_identifiers.json +++ /dev/null @@ -1,130 +0,0 @@ -{ - "bigquery": [ - { "double_array_data is not null": "array_length(double_array_data) > 0" }, - { "DATA is not null": "array_length(DATA) > 0" }, - { - "\\\"column`_'with\\\"\\\"_quotes\\\" is not null": "array_length(column___with__quotes) > 0" - } - ], - "oracle": [], - "postgres": [ - { - "nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data": "nested_stream_with_c__ion_double_array_data" - }, - { - "nested_stream_with_complex_columns_resulting_into_long_names_partition_data": "nested_stream_with_c___names_partition_data" - }, - { - "nested_stream_with_complex_columns_resulting_into_long_names_partition": "nested_stream_with_c___long_names_partition" - }, - { - "'nested_stream_with_complex_columns_resulting_into_long_names'": "'nested_stream_with_c__lting_into_long_names'" - }, - { - "'non_nested_stream_without_namespace_resulting_into_long_names'": "'non_nested_stream_wi__lting_into_long_names'" - }, - { - "expression: \"DATA is not null\"": "expression: \"\\\"DATA\\\" is not null\"" - } - ], - "snowflake": [ - { - "NESTED_STREAMS_FIRST_RUN_ROW_COUNTS": "nested_streams_first_run_row_counts" - }, - { - "NESTED_STREAMS_SECOND_RUN_ROW_COUNTS": "nested_streams_second_run_row_counts" - } - ], - "redshift": [], - "mysql": [ - { - "_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names": "_airbyte_raw_nested_s__lting_into_long_names" - }, - { - "nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data": "nested_stream_with_co__ion_double_array_data" - }, - { - "nested_stream_with_complex_columns_resulting_into_long_names_partition_data": "nested_stream_with_co___names_partition_data" - }, - { - "nested_stream_with_complex_columns_resulting_into_long_names_partition": "nested_stream_with_co___long_names_partition" - }, - { - "'nested_stream_with_complex_columns_resulting_into_long_names'": "'nested_stream_with_co__lting_into_long_names'" - }, - { - "non_nested_stream_without_namespace_resulting_into_long_names": "non_nested_stream_wit__lting_into_long_names" - }, - { - "double_array_data is not null": "coalesce(json_length(double_array_data), 0) > 0" - }, - { "DATA is not null": "coalesce(json_length(DATA), 0) > 0" }, - { - "\\\"column`_'with\\\"\\\"_quotes\\\" is not null": "coalesce(json_length(`column__'with\\\"_quotes`), 0) > 0" - } - ], - "mssql": [ - { - "nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data": "nested_stream_with_co__ion_double_array_data" - }, - { - "nested_stream_with_complex_columns_resulting_into_long_names_partition_data": "nested_stream_with_co___names_partition_data" - }, - { - "nested_stream_with_complex_columns_resulting_into_long_names_partition": "nested_stream_with_co___long_names_partition" - }, - { - "'nested_stream_with_complex_columns_resulting_into_long_names'": "'nested_stream_with_co__lting_into_long_names'" - }, - { - "non_nested_stream_without_namespace_resulting_into_long_names": "non_nested_stream_wit__lting_into_long_names" - } - ], - "tidb": [ - { - "_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names": "_airbyte_raw_nested_s__lting_into_long_names" - }, - { - "nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data": "nested_stream_with_co__ion_double_array_data" - }, - { - "nested_stream_with_complex_columns_resulting_into_long_names_partition_data": "nested_stream_with_co___names_partition_data" - }, - { - "nested_stream_with_complex_columns_resulting_into_long_names_partition": "nested_stream_with_co___long_names_partition" - }, - { - "'nested_stream_with_complex_columns_resulting_into_long_names'": "'nested_stream_with_co__lting_into_long_names'" - }, - { - "non_nested_stream_without_namespace_resulting_into_long_names": "non_nested_stream_wit__lting_into_long_names" - }, - { - "double_array_data is not null": "coalesce(json_length(double_array_data), 0) > 0" - }, - { "DATA is not null": "coalesce(json_length(DATA), 0) > 0" }, - { - "\\\"column`_'with\\\"\\\"_quotes\\\" is not null": "coalesce(json_length(`column__'with\\\"_quotes`), 0) > 0" - } - ], - "duckdb": [ - { - "_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names": "_airbyte_raw_nested_s__lting_into_long_names" - }, - { - "nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data": "nested_stream_with_co__ion_double_array_data" - }, - { - "nested_stream_with_complex_columns_resulting_into_long_names_partition_data": "nested_stream_with_co___names_partition_data" - }, - { - "nested_stream_with_complex_columns_resulting_into_long_names_partition": "nested_stream_with_co___long_names_partition" - }, - { - "'nested_stream_with_complex_columns_resulting_into_long_names'": "'nested_stream_with_co__lting_into_long_names'" - }, - { - "non_nested_stream_without_namespace_resulting_into_long_names": "non_nested_stream_wit__lting_into_long_names" - } - ] -} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/dbt_test_config/dbt_data_tests/test_check_first_run_row_counts.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/dbt_test_config/dbt_data_tests/test_check_first_run_row_counts.sql deleted file mode 100644 index 4764acc1d39a2..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/dbt_test_config/dbt_data_tests/test_check_first_run_row_counts.sql +++ /dev/null @@ -1,2 +0,0 @@ -select * from {{ ref('nested_streams_first_run_row_counts') }} -where row_count != expected_count diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/dbt_test_config/dbt_data_tests_incremental/test_check_second_run_row_counts.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/dbt_test_config/dbt_data_tests_incremental/test_check_second_run_row_counts.sql deleted file mode 100644 index 169bb80895e6a..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/dbt_test_config/dbt_data_tests_incremental/test_check_second_run_row_counts.sql +++ /dev/null @@ -1,2 +0,0 @@ -select * from {{ ref('nested_streams_second_run_row_counts') }} -where row_count != expected_count diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/dbt_test_config/dbt_data_tests_tmp/nested_streams_first_run_row_counts.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/dbt_test_config/dbt_data_tests_tmp/nested_streams_first_run_row_counts.sql deleted file mode 100644 index 42c4d3c229846..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/dbt_test_config/dbt_data_tests_tmp/nested_streams_first_run_row_counts.sql +++ /dev/null @@ -1,30 +0,0 @@ -with table_row_counts as ( - select distinct '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names' as label, count(*) as row_count, 2 as expected_count - from {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} -union all - select distinct 'nested_stream_with_complex_columns_resulting_into_long_names' as label, count(*) as row_count, 2 as expected_count - from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names') }} -union all - select distinct 'nested_stream_with_complex_columns_resulting_into_long_names_partition' as label, count(*) as row_count, 2 as expected_count - from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition') }} -union all - select 'nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA' as label, count(distinct currency) as row_count, 1 as expected_count - from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA') }} --- union all --- select count(distinct id) as row_count, 3 as expected_count --- from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data') }} -union all - select 'some_stream_that_was_empty_scd' as label, count(*) as row_count, 0 as expected_count - from {{ ref('some_stream_that_was_empty_scd') }} -union all - select 'some_stream_that_was_empty' as label, count(*) as row_count, 0 as expected_count - from {{ ref('some_stream_that_was_empty') }} -union all - select 'arrays' as label, count(*) as row_count, 1 as expected_count - from {{ ref('arrays') }} -union all - select 'arrays_nested_array_parent' as label, count(*) as row_count, 1 as expected_count - from {{ ref('arrays_nested_array_parent') }} -) -select * -from table_row_counts diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/dbt_test_config/dbt_data_tests_tmp_incremental/nested_streams_second_run_row_counts.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/dbt_test_config/dbt_data_tests_tmp_incremental/nested_streams_second_run_row_counts.sql deleted file mode 100644 index d2652ef2fd3bf..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/dbt_test_config/dbt_data_tests_tmp_incremental/nested_streams_second_run_row_counts.sql +++ /dev/null @@ -1,21 +0,0 @@ -with table_row_counts as ( - select distinct '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names' as label, count(*) as row_count, 3 as expected_count - from {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} -union all - select distinct 'nested_stream_with_complex_columns_resulting_into_long_names' as label, count(*) as row_count, 3 as expected_count - from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names') }} -union all - select 'nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA' as label, count(distinct currency) as row_count, 1 as expected_count - from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA') }} --- union all --- select count(distinct id) as row_count, 3 as expected_count --- from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data') }} -union all - select 'some_stream_that_was_empty_scd' as label, count(*) as row_count, 3 as expected_count - from {{ ref('some_stream_that_was_empty_scd') }} -union all - select 'some_stream_that_was_empty' as label, count(*) as row_count, 3 as expected_count - from {{ ref('some_stream_that_was_empty') }} -) -select * -from table_row_counts diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/dbt_test_config/dbt_schema_tests/schema_test.yml b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/dbt_test_config/dbt_schema_tests/schema_test.yml deleted file mode 100644 index 2695a9e408bc2..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/dbt_test_config/dbt_schema_tests/schema_test.yml +++ /dev/null @@ -1,23 +0,0 @@ -version: 2 - -models: - - name: nested_stream_with_complex_columns_resulting_into_long_names_partition - tests: - - dbt_utils.expression_is_true: - expression: "double_array_data is not null" - - dbt_utils.expression_is_true: - expression: "DATA is not null" - - name: nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA - columns: - - name: currency - tests: - - not_null -# - name: nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data -# columns: -# - name: id -# tests: - # - not_null # TODO Fix bug here - - name: unnest_alias_children_owner - tests: - - dbt_utils.expression_is_true: - expression: "\"column`_'with\"\"_quotes\" is not null" diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/dbt_test_config/dbt_schema_tests_incremental/schema_test.yml b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/dbt_test_config/dbt_schema_tests_incremental/schema_test.yml deleted file mode 100644 index 2695a9e408bc2..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/dbt_test_config/dbt_schema_tests_incremental/schema_test.yml +++ /dev/null @@ -1,23 +0,0 @@ -version: 2 - -models: - - name: nested_stream_with_complex_columns_resulting_into_long_names_partition - tests: - - dbt_utils.expression_is_true: - expression: "double_array_data is not null" - - dbt_utils.expression_is_true: - expression: "DATA is not null" - - name: nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA - columns: - - name: currency - tests: - - not_null -# - name: nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data -# columns: -# - name: id -# tests: - # - not_null # TODO Fix bug here - - name: unnest_alias_children_owner - tests: - - dbt_utils.expression_is_true: - expression: "\"column`_'with\"\"_quotes\" is not null" diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_reset_scd_overwrite/data_input/test_drop_scd_catalog.json b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_reset_scd_overwrite/data_input/test_drop_scd_catalog.json deleted file mode 100644 index 37d6c7d9a939c..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_reset_scd_overwrite/data_input/test_drop_scd_catalog.json +++ /dev/null @@ -1,46 +0,0 @@ -{ - "streams": [ - { - "stream": { - "name": "stream_test_scd_drop", - "json_schema": { - "type": ["null", "object"], - "properties": { - "id": { - "type": "integer" - }, - "date": { - "type": "string", - "format": "date" - }, - "timestamp_col": { - "type": "string", - "format": "date-time" - }, - "datetime_to_string": { - "type": "string", - "format": "date-time", - "airbyte_type": "timestamp_with_timezone" - }, - "string_to_dt": { - "type": "string" - }, - "number_to_int": { - "type": "number" - }, - "int_to_number": { - "type": "integer" - } - } - }, - "supported_sync_modes": ["incremental"], - "source_defined_cursor": true, - "default_cursor_field": [] - }, - "sync_mode": "incremental", - "cursor_field": ["date"], - "destination_sync_mode": "append_dedup", - "primary_key": [["id"]] - } - ] -} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_reset_scd_overwrite/data_input/test_drop_scd_catalog_incremental.json b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_reset_scd_overwrite/data_input/test_drop_scd_catalog_incremental.json deleted file mode 100644 index 04b78b4b435f6..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_reset_scd_overwrite/data_input/test_drop_scd_catalog_incremental.json +++ /dev/null @@ -1,46 +0,0 @@ -{ - "streams": [ - { - "stream": { - "name": "stream_test_scd_drop", - "json_schema": { - "type": ["null", "object"], - "properties": { - "id": { - "type": "integer" - }, - "date": { - "type": "string", - "format": "date" - }, - "timestamp_col": { - "type": "string", - "format": "date-time" - }, - "datetime_to_string": { - "type": "string" - }, - "string_to_dt": { - "type": "string", - "format": "date-time", - "airbyte_type": "timestamp_with_timezone" - }, - "number_to_int": { - "type": "integer" - }, - "int_to_number": { - "type": "number" - } - } - }, - "supported_sync_modes": ["incremental"], - "source_defined_cursor": true, - "default_cursor_field": [] - }, - "sync_mode": "incremental", - "cursor_field": ["date"], - "destination_sync_mode": "append_dedup", - "primary_key": [["id"]] - } - ] -} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_reset_scd_overwrite/data_input/test_drop_scd_catalog_reset.json b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_reset_scd_overwrite/data_input/test_drop_scd_catalog_reset.json deleted file mode 100644 index 9a76b76cda8b6..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_reset_scd_overwrite/data_input/test_drop_scd_catalog_reset.json +++ /dev/null @@ -1,46 +0,0 @@ -{ - "streams": [ - { - "stream": { - "name": "stream_test_scd_drop", - "json_schema": { - "type": ["null", "object"], - "properties": { - "id": { - "type": "integer" - }, - "date": { - "type": "string", - "format": "date" - }, - "timestamp_col": { - "type": "string", - "format": "date-time" - }, - "datetime_to_string": { - "type": "string" - }, - "string_to_dt": { - "type": "string", - "format": "date-time", - "airbyte_type": "timestamp_with_timezone" - }, - "number_to_int": { - "type": "integer" - }, - "int_to_number": { - "type": "number" - } - } - }, - "supported_sync_modes": ["incremental"], - "source_defined_cursor": true, - "default_cursor_field": [] - }, - "sync_mode": "incremental", - "cursor_field": ["date"], - "destination_sync_mode": "overwrite", - "primary_key": [["id"]] - } - ] -} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_reset_scd_overwrite/data_input/test_drop_scd_messages.txt b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_reset_scd_overwrite/data_input/test_drop_scd_messages.txt deleted file mode 100644 index e35685cb629a4..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_reset_scd_overwrite/data_input/test_drop_scd_messages.txt +++ /dev/null @@ -1,5 +0,0 @@ -{"type": "RECORD", "record": {"stream": "stream_test_scd_drop", "emitted_at": 1602637589000, "data": { "id": 1, "date": "2022-08-29", "timestamp_col": "2020-08-29T00:00:00.000000-0000", "datetime_to_string":"2022-10-01T01:04:04-04:00", "string_to_dt":"2022-11-01T02:03:04-07:00", "number_to_int": 1, "int_to_number": 10}}} -{"type": "RECORD", "record": {"stream": "stream_test_scd_drop", "emitted_at": 1602637689100, "data": { "id": 2, "date": "2022-08-30", "timestamp_col": "2020-08-30T00:00:00.000-00", "datetime_to_string":"2022-10-02T01:04:04-04:00", "string_to_dt":"2022-11-02T03:04:05-07:00", "number_to_int": 10, "int_to_number": 11}}} -{"type": "RECORD", "record": {"stream": "stream_test_scd_drop", "emitted_at": 1602637789200, "data": { "id": 3, "date": "2022-08-31", "timestamp_col": "2020-08-31T00:00:00+00", "datetime_to_string":"2022-10-03T01:04:04-04:00", "string_to_dt":"2022-11-03T03:04:06-07:00", "number_to_int": 11, "int_to_number": 12}}} -{"type": "RECORD", "record": {"stream": "stream_test_scd_drop", "emitted_at": 1602637889300, "data": { "id": 4, "date": "2022-09-01", "timestamp_col": "2020-08-31T00:00:00+0000", "datetime_to_string":"2022-10-04T01:04:04-04:00", "string_to_dt":"2022-11-04T03:04:07-07:00", "number_to_int": 111, "int_to_number": 133}}} -{"type": "RECORD", "record": {"stream": "stream_test_scd_drop", "emitted_at": 1602637989400, "data": { "id": 5, "date": "2022-09-02", "timestamp_col": "2020-09-01T00:00:00Z", "datetime_to_string":"2022-10-05T01:04:04-04:00", "string_to_dt":"2022-11-05T03:04:08-12:00", "number_to_int": 1010, "int_to_number": 1300}}} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_reset_scd_overwrite/data_input/test_scd_reset_messages_incremental.txt b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_reset_scd_overwrite/data_input/test_scd_reset_messages_incremental.txt deleted file mode 100644 index 492efbaea0aea..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_reset_scd_overwrite/data_input/test_scd_reset_messages_incremental.txt +++ /dev/null @@ -1,6 +0,0 @@ -{"type": "RECORD", "record": {"stream": "stream_test_scd_drop", "emitted_at": 1602637589000, "data": { "id": 1, "date": "2022-08-29", "timestamp_col": "2020-08-29T00:00:00.000000-0000", "datetime_to_string":"2022-10-01T01:04:04-04:00", "string_to_dt":"2022-11-01T02:03:04-07:00", "number_to_int": 1, "int_to_number": 10}}} -{"type": "RECORD", "record": {"stream": "stream_test_scd_drop", "emitted_at": 1602637689100, "data": { "id": 2, "date": "2022-08-30", "timestamp_col": "2020-08-30T00:00:00.000-00", "datetime_to_string":"2022-10-02T01:04:04-04:00", "string_to_dt":"2022-11-02T03:04:05-07:00", "number_to_int": 10, "int_to_number": 11}}} -{"type": "RECORD", "record": {"stream": "stream_test_scd_drop", "emitted_at": 1602637789200, "data": { "id": 3, "date": "2022-08-31", "timestamp_col": "2020-08-31T00:00:00+00", "datetime_to_string":"2022-10-03T01:04:04-04:00", "string_to_dt":"2022-11-03T03:04:06-07:00", "number_to_int": 11, "int_to_number": 12}}} -{"type": "RECORD", "record": {"stream": "stream_test_scd_drop", "emitted_at": 1602637889300, "data": { "id": 4, "date": "2022-09-01", "timestamp_col": "2020-08-31T00:00:00+0000", "datetime_to_string":"2022-10-04T01:04:04-04:00", "string_to_dt":"2022-11-04T03:04:07-07:00", "number_to_int": 111, "int_to_number": 133}}} -{"type": "RECORD", "record": {"stream": "stream_test_scd_drop", "emitted_at": 1602637989400, "data": { "id": 5, "date": "2022-09-02", "timestamp_col": "2020-09-01T00:00:00Z", "datetime_to_string":"2022-10-05T01:04:04-04:00", "string_to_dt":"2022-11-05T03:04:08-12:00", "number_to_int": 1010, "int_to_number": 1300}}} -{"type": "RECORD", "record": {"stream": "stream_test_scd_drop", "emitted_at": 1602637989400, "data": { "id": 6, "date": "2022-09-03", "timestamp_col": "2020-09-01T00:00:00Z", "datetime_to_string":"this is a string, not a datetime value", "string_to_dt":"2022-11-05T03:04:08-12:00", "number_to_int": 1010, "int_to_number": 1300.25}}} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_reset_scd_overwrite/dbt_test_config/dbt_data_tests/test_check_row_counts.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_reset_scd_overwrite/dbt_test_config/dbt_data_tests/test_check_row_counts.sql deleted file mode 100644 index 5b8755db9ec63..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_reset_scd_overwrite/dbt_test_config/dbt_data_tests/test_check_row_counts.sql +++ /dev/null @@ -1,2 +0,0 @@ -select * from {{ ref('test_scd_drop_row_counts') }} -where row_count != expected_count diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/README.md b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/README.md deleted file mode 100644 index 87e59f2f33e84..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/README.md +++ /dev/null @@ -1,18 +0,0 @@ -# test_simple_streams - -## Exchange Rate - -This test suite is focusing on testing a simple stream (non-nested) of data similar to `source-exchangerates` using two different -`destination_sync_modes`: - -- `incremental` + `overwrite` with stream `exchange_rate` -- `incremental` + `append_dedup` with stream `dedup_exchange_rate` - -To do so, we've setup two streams in the catalog.json and are using the exact same record messages data in both. - -Note that we are also making sure that one of the column used as primary key is of type `float` as this could be -an edge case using it as partition key on certain destinations. - -# CDC - -We've also included some streams as if they were produced by a CDC source, especially to test how they would behave regarding dedup sync modes where deleted rows should be removed from deduplicated tables diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/catalog.json b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/catalog.json deleted file mode 100644 index 584f7f98d3599..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/catalog.json +++ /dev/null @@ -1,292 +0,0 @@ -{ - "streams": [ - { - "stream": { - "name": "exchange_rate", - "json_schema": { - "type": ["null", "object"], - "properties": { - "id": { - "type": "integer" - }, - "currency": { - "type": "string" - }, - "date": { - "type": "string", - "format": "date" - }, - "timestamp_col": { - "type": "string", - "format": "date-time" - }, - "HKD@spéçiäl & characters": { - "type": "number" - }, - "HKD_special___characters": { - "type": "string" - }, - "NZD": { - "type": "number" - }, - "USD": { - "type": "number" - }, - "column`_'with\"_quotes": { - "type": "string" - }, - "datetime_tz": { - "type": "string", - "format": "date-time", - "airbyte_type": "timestamp_with_timezone" - }, - "datetime_no_tz": { - "type": "string", - "format": "date-time", - "airbyte_type": "timestamp_without_timezone" - }, - "time_tz": { - "type": "string", - "format": "time", - "airbyte_type": "time_with_timezone" - }, - "time_no_tz": { - "type": "string", - "format": "time", - "airbyte_type": "time_without_timezone" - } - } - }, - "supported_sync_modes": ["incremental"], - "source_defined_cursor": true, - "default_cursor_field": [] - }, - "sync_mode": "incremental", - "cursor_field": [], - "destination_sync_mode": "overwrite" - }, - { - "stream": { - "name": "dedup_exchange_rate", - "json_schema": { - "type": ["null", "object"], - "properties": { - "id": { - "type": "integer" - }, - "currency": { - "type": "string" - }, - "date": { - "type": "string", - "format": "date" - }, - "timestamp_col": { - "type": "string", - "format": "date-time" - }, - "HKD@spéçiäl & characters": { - "type": "number" - }, - "HKD_special___characters": { - "type": "string" - }, - "NZD": { - "type": "number" - }, - "USD": { - "type": "number" - } - } - }, - "supported_sync_modes": ["incremental"], - "source_defined_cursor": true, - "default_cursor_field": [] - }, - "sync_mode": "incremental", - "cursor_field": ["date"], - "destination_sync_mode": "append_dedup", - "primary_key": [["id"], ["currency"], ["NZD"]] - }, - { - "stream": { - "name": "renamed_dedup_cdc_excluded", - "json_schema": { - "type": ["null", "object"], - "properties": { - "id": { - "type": "integer" - }, - "_ab_cdc_updated_at": { - "type": ["null", "number"] - } - } - }, - "supported_sync_modes": ["full_refresh", "incremental"], - "source_defined_cursor": true, - "default_cursor_field": ["_ab_cdc_updated_at"] - }, - "sync_mode": "incremental", - "cursor_field": ["_ab_cdc_updated_at"], - "destination_sync_mode": "append_dedup", - "primary_key": [["id"]] - }, - { - "stream": { - "name": "dedup_cdc_excluded", - "json_schema": { - "type": ["null", "object"], - "properties": { - "id": { - "type": "integer" - }, - "name": { - "type": ["string", "null"] - }, - "_ab_cdc_lsn": { - "type": ["null", "number"] - }, - "_ab_cdc_updated_at": { - "type": ["null", "number"] - }, - "_ab_cdc_deleted_at": { - "type": ["null", "number"] - } - } - }, - "supported_sync_modes": ["full_refresh", "incremental"], - "source_defined_cursor": true, - "default_cursor_field": ["_ab_cdc_lsn"] - }, - "sync_mode": "incremental", - "cursor_field": ["_ab_cdc_lsn"], - "destination_sync_mode": "append_dedup", - "primary_key": [["id"]] - }, - { - "stream": { - "name": "pos_dedup_cdcx", - "json_schema": { - "type": ["null", "object"], - "properties": { - "id": { - "type": "integer" - }, - "name": { - "type": ["string", "null"] - }, - "_ab_cdc_lsn": { - "type": ["null", "number"] - }, - "_ab_cdc_updated_at": { - "type": ["null", "number"] - }, - "_ab_cdc_deleted_at": { - "type": ["null", "number"] - }, - "_ab_cdc_log_pos": { - "type": ["null", "number"] - } - } - }, - "supported_sync_modes": ["full_refresh", "incremental"], - "source_defined_cursor": true, - "default_cursor_field": ["_ab_cdc_lsn"] - }, - "sync_mode": "full_refresh", - "cursor_field": ["_ab_cdc_lsn"], - "destination_sync_mode": "append_dedup", - "primary_key": [["id"]] - }, - { - "stream": { - "name": "1_prefix_startwith_number", - "json_schema": { - "type": ["null", "object"], - "properties": { - "id": { - "type": "integer" - }, - "date": { - "type": "string", - "format": "date" - }, - "text": { - "type": "string" - } - } - }, - "supported_sync_modes": ["incremental"], - "source_defined_cursor": true, - "default_cursor_field": [] - }, - "sync_mode": "incremental", - "cursor_field": ["date"], - "destination_sync_mode": "append_dedup", - "primary_key": [["id"]] - }, - { - "stream": { - "name": "multiple_column_names_conflicts", - "json_schema": { - "type": ["null", "object"], - "properties": { - "id": { - "type": "integer" - }, - "User Id": { - "type": ["string", "null"] - }, - "user_id": { - "type": ["null", "number"] - }, - "User id": { - "type": ["null", "number"] - }, - "user id": { - "type": ["null", "number"] - }, - "User@Id": { - "type": ["null", "string"] - }, - "UserId": { - "type": ["null", "number"] - } - } - }, - "supported_sync_modes": ["full_refresh", "incremental"], - "source_defined_cursor": true, - "default_cursor_field": [] - }, - "sync_mode": "full_refresh", - "cursor_field": [], - "destination_sync_mode": "append_dedup", - "primary_key": [["id"]] - }, - { - "stream": { - "name": "types_testing", - "json_schema": { - "type": ["null", "object"], - "properties": { - "id": { - "type": "integer" - }, - "airbyte_integer_column": { - "type": "number", - "airbyte_type": "integer" - }, - "nullable_airbyte_integer_column": { - "type": ["null", "number"], - "airbyte_type": "integer" - } - } - } - }, - "sync_mode": "full_refresh", - "cursor_field": [], - "destination_sync_mode": "append_dedup", - "primary_key": [["id"]] - } - ] -} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/catalog_schema_change.json b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/catalog_schema_change.json deleted file mode 100644 index 1f334071c928a..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/catalog_schema_change.json +++ /dev/null @@ -1,156 +0,0 @@ -{ - "streams": [ - { - "stream": { - "name": "exchange_rate", - "json_schema": { - "type": ["null", "object"], - "properties": { - "id": { - "type": "number" - }, - "currency": { - "type": "string" - }, - "new_column": { - "type": "number" - }, - "date": { - "type": "string", - "format": "date" - }, - "timestamp_col": { - "type": "string", - "format": "date-time" - }, - "HKD@spéçiäl & characters": { - "type": "number" - }, - "NZD": { - "type": "number" - }, - "USD": { - "type": "number" - }, - "column`_'with\"_quotes": { - "type": "string" - } - } - }, - "supported_sync_modes": ["incremental"], - "source_defined_cursor": true, - "default_cursor_field": [] - }, - "sync_mode": "incremental", - "cursor_field": [], - "destination_sync_mode": "overwrite" - }, - { - "stream": { - "name": "dedup_exchange_rate", - "json_schema": { - "type": ["null", "object"], - "properties": { - "id": { - "type": "number" - }, - "currency": { - "type": "string" - }, - "new_column": { - "type": "number" - }, - "date": { - "type": "string", - "format": "date" - }, - "timestamp_col": { - "type": "string", - "format": "date-time" - }, - "HKD@spéçiäl & characters": { - "type": "number" - }, - "NZD": { - "type": "number" - }, - "USD": { - "type": "integer" - } - } - }, - "supported_sync_modes": ["incremental"], - "source_defined_cursor": true, - "default_cursor_field": [] - }, - "sync_mode": "incremental", - "cursor_field": ["date"], - "destination_sync_mode": "append_dedup", - "primary_key": [["id"], ["currency"], ["NZD"]] - }, - { - "stream": { - "name": "renamed_dedup_cdc_excluded", - "json_schema": { - "type": ["null", "object"], - "properties": { - "id": { - "type": "integer" - }, - "name": { - "type": ["string", "null"] - }, - "_ab_cdc_lsn": { - "type": ["null", "number"] - }, - "_ab_cdc_updated_at": { - "type": ["null", "number"] - }, - "_ab_cdc_deleted_at": { - "type": ["null", "number"] - } - } - }, - "supported_sync_modes": ["full_refresh", "incremental"], - "source_defined_cursor": true, - "default_cursor_field": ["_ab_cdc_lsn"] - }, - "sync_mode": "incremental", - "cursor_field": ["_ab_cdc_lsn"], - "destination_sync_mode": "append_dedup", - "primary_key": [["id"]] - }, - { - "stream": { - "name": "dedup_cdc_excluded", - "json_schema": { - "type": ["null", "object"], - "properties": { - "id": { - "type": "integer" - }, - "name": { - "type": ["string", "null"] - }, - "_ab_cdc_lsn": { - "type": ["null", "number"] - }, - "_ab_cdc_updated_at": { - "type": ["null", "number"] - }, - "_ab_cdc_deleted_at": { - "type": ["null", "number"] - } - } - }, - "supported_sync_modes": ["full_refresh", "incremental"], - "source_defined_cursor": true, - "default_cursor_field": ["_ab_cdc_lsn"] - }, - "sync_mode": "incremental", - "cursor_field": ["_ab_cdc_lsn"], - "destination_sync_mode": "append_dedup", - "primary_key": [["id"]] - } - ] -} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/messages.txt b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/messages.txt deleted file mode 100644 index a2ec40e1974cc..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/messages.txt +++ /dev/null @@ -1,72 +0,0 @@ -{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637589000, "data": { "id": 1, "currency": "USD", "date": "2020-08-29", "timestamp_col": "2020-08-29T00:00:00.000000-0000", "NZD": 1.14, "HKD@spéçiäl & characters": 2.13, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a" }}} -{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637689100, "data": { "id": 1, "currency": "USD", "date": "2020-08-30", "timestamp_col": "2020-08-30T00:00:00.000-00", "NZD": 1.14, "HKD@spéçiäl & characters": 7.15, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} -{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637789200, "data": { "id": 2, "currency": "EUR", "date": "2020-08-31", "timestamp_col": "2020-08-31T00:00:00+00", "NZD": 3.89, "HKD@spéçiäl & characters": 7.12, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a", "USD": 10.16}}} -{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637889300, "data": { "id": 2, "currency": "EUR", "date": "2020-08-31", "timestamp_col": "2020-08-31T00:00:00+0000", "NZD": 1.14, "HKD@spéçiäl & characters": 7.99, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a", "USD": 10.99}}} -{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637989400, "data": { "id": 2, "currency": "EUR", "date": "2020-09-01", "timestamp_col": "2020-09-01T00:00:00Z", "NZD": 2.43, "HKD@spéçiäl & characters": 8, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a", "USD": 10.16}}} -{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637990700, "data": { "id": 1, "currency": "USD", "date": "2020-09-01", "timestamp_col": "2020-09-01T00:00:00Z", "NZD": 1.14, "HKD@spéçiäl & characters": 10.5, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} -{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637990800, "data": { "id": 2, "currency": "EUR", "date": "2020-09-01", "timestamp_col": "2020-09-01T00:00:00Z", "NZD": 2.43, "HKD@spéçiäl & characters": 5.4, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} -{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637990800, "data": { "id": 2, "currency": "EUR", "date": "", "timestamp_col": "", "NZD": 2.43, "HKD@spéçiäl & characters": 5.4, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} -{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637990900, "data": { "id": 3, "currency": "GBP", "NZD": 3.14, "HKD@spéçiäl & characters": 9.2, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} -{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637991000, "data": { "id": 2, "currency": "EUR", "NZD": 3.89, "HKD@spéçiäl & characters": 7.02, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} -{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637991100, "data": { "id": 5, "currency": "USD", "NZD": 0.01, "HKD@spéçiäl & characters": 8.12, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} -{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637991200, "data": { "id": 5, "currency": "USD", "NZD": 0.01, "HKD@spéçiäl & characters": 9.23, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} -{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637991300, "data": { "id": 6, "currency": "USD", "NZD": 0.01, "HKD@spéçiäl & characters": 9.23, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a", "datetime_tz": "2022-01-14T01:04:04-04:00", "datetime_no_tz": "2022-01-14T01:04:04", "time_tz": "01:04:04-04:00", "time_no_tz": "01:04:04"}}} - -{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602637589000, "data": { "id": 1, "currency": "USD", "date": "2020-08-29", "timestamp_col": "2020-08-29T00:00:00.000000-0000", "NZD": 1.14, "HKD@spéçiäl & characters": 2.13, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a" }}} -{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602637689100, "data": { "id": 1, "currency": "USD", "date": "2020-08-30", "timestamp_col": "2020-08-30T00:00:00.000-00", "NZD": 1.14, "HKD@spéçiäl & characters": 7.15, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} -{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602637789200, "data": { "id": 2, "currency": "EUR", "date": "2020-08-31", "timestamp_col": "2020-08-31T00:00:00+00", "NZD": 3.89, "HKD@spéçiäl & characters": 7.12, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a", "USD": 10.16}}} -{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602637889300, "data": { "id": 2, "currency": "EUR", "date": "2020-08-31", "timestamp_col": "2020-08-31T00:00:00+0000", "NZD": 1.14, "HKD@spéçiäl & characters": 7.99, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a", "USD": 10.99}}} -{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602637989400, "data": { "id": 2, "currency": "EUR", "date": "2020-09-01", "timestamp_col": "2020-09-01T00:00:00Z", "NZD": 2.43, "HKD@spéçiäl & characters": 8, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a", "USD": 10.16}}} -{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602637990700, "data": { "id": 1, "currency": "USD", "date": "2020-09-01", "timestamp_col": "2020-09-01T00:00:00Z", "NZD": 1.14, "HKD@spéçiäl & characters": 10.5, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} -{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602637990800, "data": { "id": 2, "currency": "EUR", "date": "2020-09-01", "timestamp_col": "2020-09-01T00:00:00Z", "NZD": 2.43, "HKD@spéçiäl & characters": 5.4, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} -{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602637990800, "data": { "id": 2, "currency": "EUR", "date": "", "timestamp_col": "", "NZD": 2.43, "HKD@spéçiäl & characters": 5.4, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} -{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602637990900, "data": { "id": 3, "currency": "GBP", "NZD": 3.14, "HKD@spéçiäl & characters": 9.2, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} -{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602637991000, "data": { "id": 2, "currency": "EUR", "NZD": 3.89, "HKD@spéçiäl & characters": 7.02, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} -{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602637991100, "data": { "id": 5, "currency": "USD", "NZD": 0.01, "HKD@spéçiäl & characters": 8.12, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} -{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602637991200, "data": { "id": 5, "currency": "USD", "NZD": 0.01, "HKD@spéçiäl & characters": 9.23, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} - -// Note that some of the IDs are inserted and then deleted; this should be reflected as a single row in the SCD model with _airbyte_active_row set to 0. -{"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":1,"name":"mazda","_ab_cdc_updated_at":1623849130530,"_ab_cdc_lsn":26971624,"_ab_cdc_deleted_at":null},"emitted_at":1623859926}} -{"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":2,"name":"toyata","_ab_cdc_updated_at":1623849130549,"_ab_cdc_lsn":26971624,"_ab_cdc_deleted_at":null},"emitted_at":1623859926}} -{"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":4,"name":"bmw","_ab_cdc_updated_at":1623849314535,"_ab_cdc_lsn":26974776,"_ab_cdc_deleted_at":null},"emitted_at":1623860160}} -{"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":5,"name":"vw","_ab_cdc_updated_at":1623849314663,"_ab_cdc_lsn":26975264,"_ab_cdc_deleted_at":null},"emitted_at":1623860160}} -{"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":4,"name":null,"_ab_cdc_updated_at":1623849314791,"_ab_cdc_lsn":26975440,"_ab_cdc_deleted_at":1623849314791},"emitted_at":1623860160}} -{"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":6,"name":"opel","_ab_cdc_updated_at":1623850868109,"_ab_cdc_lsn":27009440,"_ab_cdc_deleted_at":null},"emitted_at":1623861660}} -{"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":7,"name":"lotus","_ab_cdc_updated_at":1623850868237,"_ab_cdc_lsn":27010048,"_ab_cdc_deleted_at":null},"emitted_at":1623861660}} -// messages_incremental.txt has a dedup_cdc_excluded record with emitted_at=1623860160, i.e. older than this record. If you delete/modify this record, make sure to maintain that relationship. -{"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":6,"name":null,"_ab_cdc_updated_at":1623850868371,"_ab_cdc_lsn":27010232,"_ab_cdc_deleted_at":1623850868371},"emitted_at":1623861660}} -// these messages have the same _ab_cdc_updated_at, but different _ab_cdc_lsn. They should each get an entry in the SCD model, and the final table should reflect the highest lsn's data. -{"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":8,"name":"foo1","_ab_cdc_updated_at":1623850900000,"_ab_cdc_lsn":27010232,"_ab_cdc_deleted_at":null},"emitted_at":1623861660}} -// for now - increment lsn by 100 because mysql/mssql/tidb round are truncating it at the hundreds digit -{"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":8,"name":"foo3","_ab_cdc_updated_at":1623850900000,"_ab_cdc_lsn":27010432,"_ab_cdc_deleted_at":null},"emitted_at":1623861660}} -{"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":8,"name":"foo2","_ab_cdc_updated_at":1623850900000,"_ab_cdc_lsn":27010332,"_ab_cdc_deleted_at":null},"emitted_at":1623861660}} - -{"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":1,"name":"mazda","_ab_cdc_updated_at":1623849130530,"_ab_cdc_lsn":26971624,"_ab_cdc_log_pos": 33274,"_ab_cdc_deleted_at":null},"emitted_at":1623859926}} -{"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":2,"name":"toyata","_ab_cdc_updated_at":1623849130549,"_ab_cdc_lsn":26971624,"_ab_cdc_log_pos": 33275,"_ab_cdc_deleted_at":null},"emitted_at":1623859926}} -{"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":2,"name":"bmw","_ab_cdc_updated_at":1623849314535,"_ab_cdc_lsn":26974776,"_ab_cdc_log_pos": 33278,"_ab_cdc_deleted_at":null},"emitted_at":1623860160}} -{"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":3,"name":null,"_ab_cdc_updated_at":1623849314791,"_ab_cdc_lsn":26975440,"_ab_cdc_log_pos": 33274,"_ab_cdc_deleted_at":1623849314791},"emitted_at":1623860160}} -{"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":4,"name":"lotus","_ab_cdc_updated_at":1623850868237,"_ab_cdc_lsn":27010048,"_ab_cdc_log_pos": 33271,"_ab_cdc_deleted_at":null},"emitted_at":1623861660}} -{"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":4,"name":null,"_ab_cdc_updated_at":1623850868371,"_ab_cdc_lsn":27010232,"_ab_cdc_log_pos": 33279,"_ab_cdc_deleted_at":1623850868371},"emitted_at":1623861660}} -{"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":5,"name":"lotus","_ab_cdc_updated_at":1623850868371,"_ab_cdc_lsn":27010048,"_ab_cdc_log_pos": 33280,"_ab_cdc_deleted_at":null},"emitted_at":1623861660}} -{"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":5,"name":"lily","_ab_cdc_updated_at":1623850868371,"_ab_cdc_lsn":27010232,"_ab_cdc_log_pos": 33281,"_ab_cdc_deleted_at":null},"emitted_at":1623861660}} - -{"type": "RECORD", "record": {"stream": "1_prefix_startwith_number", "emitted_at": 1602637589000, "data": { "id": 1, "date": "2020-08-29", "text": "hi 1"}}} -{"type": "RECORD", "record": {"stream": "1_prefix_startwith_number", "emitted_at": 1602637689100, "data": { "id": 1, "date": "2020-08-30", "text": "hi 2"}}} -{"type": "RECORD", "record": {"stream": "1_prefix_startwith_number", "emitted_at": 1602637789200, "data": { "id": 2, "date": "2020-08-31", "text": "hi 1"}}} -{"type": "RECORD", "record": {"stream": "1_prefix_startwith_number", "emitted_at": 1602637889300, "data": { "id": 2, "date": "2020-08-31", "text": "hi 2"}}} -{"type": "RECORD", "record": {"stream": "1_prefix_startwith_number", "emitted_at": 1602637989400, "data": { "id": 2, "date": "2020-09-01", "text": "hi 3"}}} -{"type": "RECORD", "record": {"stream": "1_prefix_startwith_number", "emitted_at": 1602637990700, "data": { "id": 1, "date": "2020-09-01", "text": "hi 3"}}} -{"type": "RECORD", "record": {"stream": "1_prefix_startwith_number", "emitted_at": 1602637990800, "data": { "id": 2, "date": "2020-09-01", "text": "hi 4"}}} - -{"type":"RECORD","record":{"stream":"multiple_column_names_conflicts","data":{"id":1,"User Id":"chris","user_id":42,"User id":300,"user id": 102,"UserId":101},"emitted_at":1623959926}} - -// These records are verified in types_testing_incorrect_values.sql. If you add/remove entries, make sure to update that file as well. -// IMPORTANT: big_integer_column and nullable_big_integer_column were removed from catalog.json because of difficulties in implementing NUMERIC support. -// This is fine, because no major sources currently produce big_integer fields. -// After that functionality is completed, we should restore their entries to catalog.json. -// Verify max value for int64, and a 28-digit value for big_integer. (28 is larger than an int64 can handle, but still within bounds for a BigQuery NUMERIC column) -{"type":"RECORD","record":{"stream":"types_testing","data":{"id":1,"airbyte_integer_column":9223372036854775807,"nullable_airbyte_integer_column":9223372036854775807,"big_integer_column":"1234567890123456789012345678","nullable_big_integer_column":"1234567890123456789012345678"},"emitted_at":1623959926}} -// Verify max value for int64, and a negative 28-digit value for big_integer -{"type":"RECORD","record":{"stream":"types_testing","data":{"id":2,"airbyte_integer_column":-9223372036854775808,"nullable_airbyte_integer_column":-9223372036854775808,"big_integer_column":"-1234567890123456789012345678","nullable_big_integer_column":"-1234567890123456789012345678"},"emitted_at":1623959926}} -// Verify nullable values -{"type":"RECORD","record":{"stream":"types_testing","data":{"id":3,"airbyte_integer_column":0,"big_integer_column":0},"emitted_at":1623959926}} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/messages_incremental.txt b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/messages_incremental.txt deleted file mode 100644 index 1a703548c5b95..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/messages_incremental.txt +++ /dev/null @@ -1,38 +0,0 @@ -// Some records are duplicated from messages.txt - this mimics our "at-least-once" delivery policy. - -// Other records "go back in time", i.e. are new data but have an older emitted_at timestamp than some of the those duplicated records. -// (I think?) This mimics an interruption to normalization, such that some records were normalized but others were not. - -// These first records are old data. -{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637990800, "data": { "id": 2, "currency": "EUR", "date": "", "timestamp_col": "", "NZD": 2.43, "HKD@spéçiäl & characters": 5.4, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} -{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637990900, "data": { "id": 3, "currency": "GBP", "NZD": 3.14, "HKD@spéçiäl & characters": 9.2, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} -// These records are new data. -{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602650000000, "data": { "id": 2, "currency": "EUR", "NZD": 3.89, "HKD@spéçiäl & characters": 14.05, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} -{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602650010000, "data": { "id": 4, "currency": "HKD", "NZD": 1.19, "HKD@spéçiäl & characters": 0.01, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} -{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602650011000, "data": { "id": 1, "currency": "USD", "date": "2020-10-14", "timestamp_col": "2020-10-14T00:00:00.000-00", "NZD": 1.14, "HKD@spéçiäl & characters": 9.5, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} -{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602650012000, "data": { "id": 5, "currency": "USD", "NZD": 0.01, "HKD@spéçiäl & characters": 6.39, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} - -// These first records are old data. -{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602637990800, "data": { "id": 2, "currency": "EUR", "date": "", "timestamp_col": "", "NZD": 2.43, "HKD@spéçiäl & characters": 5.4, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} -{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602637990900, "data": { "id": 3, "currency": "GBP", "NZD": 3.14, "HKD@spéçiäl & characters": 9.2, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} -// These records are new data. -{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602650000000, "data": { "id": 2, "currency": "EUR", "NZD": 3.89, "HKD@spéçiäl & characters": 14.05, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} -{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602650010000, "data": { "id": 4, "currency": "HKD", "NZD": 1.19, "HKD@spéçiäl & characters": 0.01, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} -{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602650011000, "data": { "id": 1, "currency": "USD", "date": "2020-10-14", "timestamp_col": "2020-10-14T00:00:00.000-00", "NZD": 1.14, "HKD@spéçiäl & characters": 9.5, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} -{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602650012000, "data": { "id": 5, "currency": "USD", "NZD": 0.01, "HKD@spéçiäl & characters": 6.39, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} - -// All of these records are new data. -// This record has an _older_ emitted_at than the latest dedup_cdc_excluded record in messages.txt -{"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":5,"name":"vw","column`_'with\"_quotes":"ma\"z`d'a","_ab_cdc_updated_at":1623849314663,"_ab_cdc_lsn":26975264,"_ab_cdc_deleted_at":null},"emitted_at":1623860160}} -{"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":5,"name":null,"column`_'with\"_quotes":"ma\"z`d'a","_ab_cdc_updated_at":1623900000000,"_ab_cdc_lsn":28010252,"_ab_cdc_deleted_at":1623900000000},"emitted_at":1623900000000}} -// Previously we had a bug where we only respected deletions from the most recent _airbyte_emitted_at. This message tests that ID 5 is still correctly deleted (i.e. marked with _airbyte_active_row = 0). -// This record is also deleted in messages_schema_change.txt. -{"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":8,"name":"ford","column`_'with\"_quotes":"ma\"z`d'a","_ab_cdc_updated_at":1624000000000,"_ab_cdc_lsn":29010252,"_ab_cdc_deleted_at":null},"emitted_at":1624000000000}} - -// All of these records are old data. -{"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":1,"name":"mazda","_ab_cdc_updated_at":1623849130530,"_ab_cdc_lsn":26971624,"_ab_cdc_log_pos": 33274,"_ab_cdc_deleted_at":null},"emitted_at":1623859926}} -{"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":2,"name":"toyata","_ab_cdc_updated_at":1623849130549,"_ab_cdc_lsn":26971624,"_ab_cdc_log_pos": 33275,"_ab_cdc_deleted_at":null},"emitted_at":1623859926}} -{"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":2,"name":"bmw","_ab_cdc_updated_at":1623849314535,"_ab_cdc_lsn":26974776,"_ab_cdc_log_pos": 33278,"_ab_cdc_deleted_at":null},"emitted_at":1623860160}} -{"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":3,"name":null,"_ab_cdc_updated_at":1623849314791,"_ab_cdc_lsn":26975440,"_ab_cdc_log_pos": 33274,"_ab_cdc_deleted_at":1623849314791},"emitted_at":1623860160}} -{"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":4,"name":"lotus","_ab_cdc_updated_at":1623850868237,"_ab_cdc_lsn":27010048,"_ab_cdc_log_pos": 33271,"_ab_cdc_deleted_at":null},"emitted_at":1623861660}} -{"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":4,"name":null,"_ab_cdc_updated_at":1623850868371,"_ab_cdc_lsn":27010232,"_ab_cdc_log_pos": 33279,"_ab_cdc_deleted_at":1623850868371},"emitted_at":1623861660}} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/messages_schema_change.txt b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/messages_schema_change.txt deleted file mode 100644 index 4aeca6dbc2073..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/messages_schema_change.txt +++ /dev/null @@ -1,16 +0,0 @@ -{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602661281900, "data": { "id": 3.14, "currency": "EUR", "new_column": 2.1, "date": "2020-11-01", "timestamp_col": "2020-11-01T00:00:00Z", "NZD": 2.43, "HKD@spéçiäl & characters": 2.12, "column`_'with\"_quotes":"ma\"z`d'a", "USD": 7}}} -{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602661291900, "data": { "id": 0.12, "currency": "GBP", "new_column": 3.81, "date": "2020-11-01", "timestamp_col": "2020-11-01T00:00:00Z", "NZD": 3.14, "HKD@spéçiäl & characters": 3.01, "column`_'with\"_quotes":"ma\"z`d'a", "USD": 11}}} -{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602661381900, "data": { "id": 4.22, "currency": "EUR", "new_column": 89.1, "date": "2020-11-01", "timestamp_col": "2020-11-01T00:00:00Z", "NZD": 3.89, "HKD@spéçiäl & characters": 8.88, "column`_'with\"_quotes":"ma\"z`d'a", "USD": 10}}} -{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602661481900, "data": { "id": 1, "currency": "HKD", "new_column": 91.11, "date": "2020-11-01", "timestamp_col": "2020-11-01T00:00:00Z", "NZD": 1.19, "HKD@spéçiäl & characters": 99.1, "column`_'with\"_quotes":"ma\"z`d'a", "USD": 10}}} - -{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602661281900, "data": { "id": 3.14, "currency": "EUR", "new_column": 2.1, "date": "2020-11-01", "timestamp_col": "2020-11-01T00:00:00Z", "NZD": 2.43, "HKD@spéçiäl & characters": 2.12, "column`_'with\"_quotes":"ma\"z`d'a", "USD": 7}}} -{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602661291900, "data": { "id": 0.12, "currency": "GBP", "new_column": 3.81, "date": "2020-11-01", "timestamp_col": "2020-11-01T00:00:00Z", "NZD": 3.14, "HKD@spéçiäl & characters": 3.01, "column`_'with\"_quotes":"ma\"z`d'a", "USD": 11}}} -{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602661381900, "data": { "id": 4.22, "currency": "EUR", "new_column": 89.1, "date": "2020-11-01", "timestamp_col": "2020-11-01T00:00:00Z", "NZD": 3.89, "HKD@spéçiäl & characters": 8.88, "column`_'with\"_quotes":"ma\"z`d'a", "USD": 10}}} -{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602661481900, "data": { "id": 1, "currency": "HKD", "new_column": 91.11, "date": "2020-11-01", "timestamp_col": "2020-11-01T00:00:00Z", "NZD": 1.19, "HKD@spéçiäl & characters": 99.1, "column`_'with\"_quotes":"ma\"z`d'a", "USD": 10}}} - -{"type":"RECORD","record":{"stream":"renamed_dedup_cdc_excluded","data":{"id":8,"name":"vw","column`_'with\"_quotes":"ma\"z`d'a","_ab_cdc_updated_at":1623949314663,"_ab_cdc_lsn":26985264,"_ab_cdc_deleted_at":null},"emitted_at":1623960160}} -{"type":"RECORD","record":{"stream":"renamed_dedup_cdc_excluded","data":{"id":9,"name":"opel","column`_'with\"_quotes":"ma\"z`d'a","_ab_cdc_updated_at":1623950868109,"_ab_cdc_lsn":28009440,"_ab_cdc_deleted_at":null},"emitted_at":1623961660}} -{"type":"RECORD","record":{"stream":"renamed_dedup_cdc_excluded","data":{"id":9,"name":null,"column`_'with\"_quotes":"ma\"z`d'a","_ab_cdc_updated_at":1623950868371,"_ab_cdc_lsn":28010232,"_ab_cdc_deleted_at":1623950868371},"emitted_at":1623961660}} - -// This message tests the ability to delete a record which was inserted in a previous sync. See messages_incremental.txt for how it was inserted. -{"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":8,"name":"ford","column`_'with\"_quotes":"ma\"z`d'a","_ab_cdc_updated_at":1625000000000,"_ab_cdc_lsn":29020252,"_ab_cdc_deleted_at":1625000000000},"emitted_at":1625000000000}} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/replace_identifiers.json b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/replace_identifiers.json deleted file mode 100644 index 4d65ce5e88e9d..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/replace_identifiers.json +++ /dev/null @@ -1,60 +0,0 @@ -{ - "bigquery": [ - { "HKD_special___characters": "HKD_special___characters_1" }, - { "'\"HKD@spéçiäl & characters\"'": "HKD_special___characters" }, - { - "\\\"column`_'with\\\"\\\"_quotes\\\" is not null": "column___with__quotes is not null" - } - ], - "oracle": [ - { "HKD_special___characters": "HKD_special___characters_1" }, - { "'\"HKD@spéçiäl & characters\"'": "HKD_special___characters" }, - { "HKD@spéçiäl & characters": "hkd_special___characters" }, - { "\"hkd_special___characters\"": "hkd_special___characters" }, - { "- date": "- '\"DATE\"'" }, - { "_airbyte_raw_": "airbyte_raw_" }, - { - "\\\"column`_'with\\\"\\\"_quotes\\\" is not null": "column___with__quotes is not null" - } - ], - "postgres": [], - "snowflake": [ - { "HKD@SPÉÇIÄL & CHARACTERS": "HKD@spéçiäl & characters" }, - { - "SIMPLE_STREAMS_FIRST_RUN_ROW_COUNTS": "simple_streams_first_run_row_counts" - }, - { - "SIMPLE_STREAMS_SECOND_RUN_ROW_COUNTS": "simple_streams_second_run_row_counts" - }, - { - "TYPES_TESTING_INCORRECT_VALUES": "types_testing_incorrect_values" - }, - { - "DEDUP_CDC_EXCLUDED_FIRST_RUN_INCORRECT_NAMES": "dedup_cdc_excluded_first_run_incorrect_names" - }, - { - "DEDUP_CDC_EXCLUDED_SECOND_RUN_INCORRECT_NAMES": "dedup_cdc_excluded_second_run_incorrect_names" - }, - { - "DEDUP_CDC_EXCLUDED_THIRD_RUN_INCORRECT_NAMES": "dedup_cdc_excluded_third_run_incorrect_names" - } - ], - "redshift": [], - "mysql": [ - { "- HKD_special___characters": "- '\"HKD_special___characters\"'" }, - { "!= HKD_special___characters": "!= \"HKD_special___characters\"" }, - { - "\\\"column`_'with\\\"\\\"_quotes\\\" is not null": "`column__'with\\\"_quotes` is not null" - } - ], - "mssql": [ - { "- HKD_special___characters": "- '\"HKD_special___characters\"'" }, - { "!= HKD_special___characters": "!= \"HKD_special___characters\"" } - ], - "clickhouse": [ - { "'\"HKD@spéçiäl & characters\"'": "HKD_special___characters" }, - { - "\\\"column`_'with\\\"\\\"_quotes\\\" is not null": "column___with__quotes is not null" - } - ] -} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests/test_check_first_run_row_counts.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests/test_check_first_run_row_counts.sql deleted file mode 100644 index afbdc6ac5b303..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests/test_check_first_run_row_counts.sql +++ /dev/null @@ -1,2 +0,0 @@ -select * from {{ ref('simple_streams_first_run_row_counts') }} -where row_count != expected_count diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests/test_dedup_cdc_excluded_first_run_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests/test_dedup_cdc_excluded_first_run_names.sql deleted file mode 100644 index 2a24121d2c422..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests/test_dedup_cdc_excluded_first_run_names.sql +++ /dev/null @@ -1 +0,0 @@ -select * from {{ ref('dedup_cdc_excluded_first_run_incorrect_names') }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests/test_types_testing_values.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests/test_types_testing_values.sql deleted file mode 100644 index 41eff66fa3135..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests/test_types_testing_values.sql +++ /dev/null @@ -1 +0,0 @@ -select * from {{ ref('types_testing_incorrect_values') }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_incremental/test_check_second_run_row_counts.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_incremental/test_check_second_run_row_counts.sql deleted file mode 100644 index 99e98a10a781c..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_incremental/test_check_second_run_row_counts.sql +++ /dev/null @@ -1,2 +0,0 @@ -select * from {{ ref('simple_streams_second_run_row_counts') }} -where row_count != expected_count diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_incremental/test_dedup_cdc_excluded_second_run_incorrect_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_incremental/test_dedup_cdc_excluded_second_run_incorrect_names.sql deleted file mode 100644 index adf1a31fc5eed..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_incremental/test_dedup_cdc_excluded_second_run_incorrect_names.sql +++ /dev/null @@ -1 +0,0 @@ -select * from {{ ref('dedup_cdc_excluded_second_run_incorrect_names') }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_schema_change/test_check_third_run_row_counts.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_schema_change/test_check_third_run_row_counts.sql deleted file mode 100644 index 5979aa28cea48..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_schema_change/test_check_third_run_row_counts.sql +++ /dev/null @@ -1,2 +0,0 @@ -select * from {{ ref('simple_streams_third_run_row_counts') }} -where row_count != expected_count diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_schema_change/test_dedup_cdc_excluded_third_run_incorrect_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_schema_change/test_dedup_cdc_excluded_third_run_incorrect_names.sql deleted file mode 100644 index b5d359fd6ac2d..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_schema_change/test_dedup_cdc_excluded_third_run_incorrect_names.sql +++ /dev/null @@ -1 +0,0 @@ -select * from {{ ref('dedup_cdc_excluded_third_run_incorrect_names') }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp/dedup_cdc_excluded_first_run_incorrect_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp/dedup_cdc_excluded_first_run_incorrect_names.sql deleted file mode 100644 index 22df0b07645b9..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp/dedup_cdc_excluded_first_run_incorrect_names.sql +++ /dev/null @@ -1,14 +0,0 @@ -select * from {{ ref('dedup_cdc_excluded') }} where -( - id = 1 and name != 'mazda' -) or ( - id = 2 and name != 'toyata' -) or ( - id = 5 and name != 'vw' -) or ( - id = 7 and name != 'lotus' -) or ( - id = 8 and name != 'foo3' -) or ( - id not in (1, 2, 5, 7, 8) -) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp/simple_streams_first_run_row_counts.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp/simple_streams_first_run_row_counts.sql deleted file mode 100644 index 33cc2898bf2b2..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp/simple_streams_first_run_row_counts.sql +++ /dev/null @@ -1,46 +0,0 @@ -with table_row_counts as ( - select distinct '_airbyte_raw_exchange_rate' as label, count(*) as row_count, 13 as expected_count - from {{ source('test_normalization', '_airbyte_raw_exchange_rate') }} -union all - select distinct 'exchange_rate' as label, count(*) as row_count, 13 as expected_count - from {{ ref('exchange_rate') }} - -union all - - select distinct '_airbyte_raw_dedup_exchange_rate' as label, count(*) as row_count, 12 as expected_count - from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} -union all - select distinct 'dedup_exchange_rate_scd' as label, count(*) as row_count, 12 as expected_count - from {{ ref('dedup_exchange_rate_scd') }} -union all - select distinct 'dedup_exchange_rate' as label, count(*) as row_count, 6 as expected_count - from {{ ref('dedup_exchange_rate') }} - -union all - - select distinct '_airbyte_raw_dedup_cdc_excluded' as label, count(*) as row_count, 11 as expected_count - from {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} -union all - select distinct 'dedup_cdc_excluded_scd' as label, count(*) as row_count, 11 as expected_count - from {{ ref('dedup_cdc_excluded_scd') }} -union all - select distinct 'dedup_cdc_excluded' as label, count(*) as row_count, 5 as expected_count - from {{ ref('dedup_cdc_excluded') }} - -union all - - select distinct '_airbyte_raw_pos_dedup_cdcx' as label, count(*) as row_count, 8 as expected_count - from {{ source('test_normalization', '_airbyte_raw_pos_dedup_cdcx') }} -union all - select distinct 'pos_dedup_cdcx_scd' as label, count(*) as row_count, 8 as expected_count - from {{ ref('pos_dedup_cdcx_scd') }} -union all - select distinct 'pos_dedup_cdcx' as label, count(*) as row_count, 3 as expected_count - from {{ ref('pos_dedup_cdcx') }} - -union all - select distinct 'types_testing' as label, count(*) as row_count, 3 as expected_count - from {{ ref('types_testing') }} -) -select * -from table_row_counts diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp/types_testing_incorrect_values.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp/types_testing_incorrect_values.sql deleted file mode 100644 index 9a382eda267c8..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp/types_testing_incorrect_values.sql +++ /dev/null @@ -1,34 +0,0 @@ --- Note that we cast columns_column to string to avoid any weird numeric equality nonsense. --- For example, in Postgres, this query returns `true`, even though the two numbers are different: (9223372036854775807 is the max value of a signed 64-bit int) --- select (9223372036854775807 :: double precision) = (9223372036854775806 :: double precision) --- Because a double has only 15 decimals of precision, so both values are rounded off to 9.223372036854776e+18 - -select * from {{ ref('types_testing') }} where -( - id = 1 and ( - cast(airbyte_integer_column as {{ dbt_utils.type_string() }}) != '9223372036854775807' - or cast(nullable_airbyte_integer_column as {{ dbt_utils.type_string() }}) != '9223372036854775807' - {# - or cast(big_integer_column as {{ dbt_utils.type_string() }}) != '1234567890123456789012345678' - or cast(nullable_big_integer_column as {{ dbt_utils.type_string() }}) != '1234567890123456789012345678' - #} - ) -) or ( - id = 2 and ( - cast(airbyte_integer_column as {{ dbt_utils.type_string() }}) != '-9223372036854775808' - or cast(nullable_airbyte_integer_column as {{ dbt_utils.type_string() }}) != '-9223372036854775808' - {# - or cast(big_integer_column as {{ dbt_utils.type_string() }}) != '-1234567890123456789012345678' - or cast(nullable_big_integer_column as {{ dbt_utils.type_string() }}) != '-1234567890123456789012345678' - #} - ) -) or ( - id = 3 and ( - cast(airbyte_integer_column as {{ dbt_utils.type_string() }}) != '0' - or nullable_airbyte_integer_column is not null - {# - or cast(big_integer_column as {{ dbt_utils.type_string() }}) != '0' - or nullable_big_integer_column is not null - #} - ) -) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp_incremental/dedup_cdc_excluded_second_run_incorrect_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp_incremental/dedup_cdc_excluded_second_run_incorrect_names.sql deleted file mode 100644 index 6bff1b073fd1e..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp_incremental/dedup_cdc_excluded_second_run_incorrect_names.sql +++ /dev/null @@ -1,13 +0,0 @@ -select * from {{ ref('dedup_cdc_excluded') }} where -( - id = 1 and name != 'mazda' -) or ( - id = 2 and name != 'toyata' -) or ( - id = 7 and name != 'lotus' -) or ( - id = 8 and name != 'ford' -) or ( - id not in (1, 2, 7, 8) -) - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp_incremental/simple_streams_second_run_row_counts.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp_incremental/simple_streams_second_run_row_counts.sql deleted file mode 100644 index 405337845bea7..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp_incremental/simple_streams_second_run_row_counts.sql +++ /dev/null @@ -1,42 +0,0 @@ -with table_row_counts as ( - select distinct '_airbyte_raw_exchange_rate' as label, count(*) as row_count, 6 as expected_count - from {{ source('test_normalization', '_airbyte_raw_exchange_rate') }} -union all - select distinct 'exchange_rate' as label, count(*) as row_count, 6 as expected_count - from {{ ref('exchange_rate') }} - -union all - - select distinct '_airbyte_raw_dedup_exchange_rate' as label, count(*) as row_count, 6 as expected_count - from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} -union all - select distinct 'dedup_exchange_rate_scd' as label, count(*) as row_count, 16 as expected_count - from {{ ref('dedup_exchange_rate_scd') }} -union all - select distinct 'dedup_exchange_rate' as label, count(*) as row_count, 7 as expected_count - from {{ ref('dedup_exchange_rate') }} - -union all - - select distinct '_airbyte_raw_dedup_cdc_excluded' as label, count(*) as row_count, 3 as expected_count - from {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} -union all - select distinct 'dedup_cdc_excluded_scd' as label, count(*) as row_count, 13 as expected_count - from {{ ref('dedup_cdc_excluded_scd') }} -union all - select distinct 'dedup_cdc_excluded' as label, count(*) as row_count, 4 as expected_count - from {{ ref('dedup_cdc_excluded') }} - -union all - - select distinct '_airbyte_raw_pos_dedup_cdcx' as label, count(*) as row_count, 6 as expected_count - from {{ source('test_normalization', '_airbyte_raw_pos_dedup_cdcx') }} -union all - select distinct 'pos_dedup_cdcx_scd' as label, count(*) as row_count, 8 as expected_count - from {{ ref('pos_dedup_cdcx_scd') }} -union all - select distinct 'pos_dedup_cdcx' as label, count(*) as row_count, 3 as expected_count - from {{ ref('pos_dedup_cdcx') }} -) -select * -from table_row_counts diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp_schema_change/dedup_cdc_excluded_third_run_incorrect_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp_schema_change/dedup_cdc_excluded_third_run_incorrect_names.sql deleted file mode 100644 index 859af36e7299e..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp_schema_change/dedup_cdc_excluded_third_run_incorrect_names.sql +++ /dev/null @@ -1,11 +0,0 @@ -select * from {{ ref('dedup_cdc_excluded') }} where -( - id = 1 and name != 'mazda' -) or ( - id = 2 and name != 'toyata' -) or ( - id = 7 and name != 'lotus' -) or ( - id not in (1, 2, 7) -) - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp_schema_change/simple_streams_third_run_row_counts.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp_schema_change/simple_streams_third_run_row_counts.sql deleted file mode 100644 index 775a055ae6dfa..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp_schema_change/simple_streams_third_run_row_counts.sql +++ /dev/null @@ -1,31 +0,0 @@ -with table_row_counts as ( - select distinct '_airbyte_raw_exchange_rate' as label, count(*) as row_count, 4 as expected_count - from {{ source('test_normalization', '_airbyte_raw_exchange_rate') }} -union all - select distinct 'exchange_rate' as label, count(*) as row_count, 4 as expected_count - from {{ ref('exchange_rate') }} - -union all - - select distinct '_airbyte_raw_dedup_exchange_rate' as label, count(*) as row_count, 10 as expected_count - from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} -union all - select distinct 'dedup_exchange_rate_scd' as label, count(*) as row_count, 20 as expected_count - from {{ ref('dedup_exchange_rate_scd') }} -union all - select distinct 'dedup_exchange_rate' as label, count(*) as row_count, 11 as expected_count - from {{ ref('dedup_exchange_rate') }} - -union all - - select distinct '_airbyte_raw_dedup_cdc_excluded' as label, count(*) as row_count, 4 as expected_count - from test_normalization._airbyte_raw_dedup_cdc_excluded -union all - select distinct 'dedup_cdc_excluded_scd' as label, count(*) as row_count, 14 as expected_count - from test_normalization.dedup_cdc_excluded_scd -union all - select distinct 'dedup_cdc_excluded' as label, count(*) as row_count, 3 as expected_count - from test_normalization.dedup_cdc_excluded -) -select * -from table_row_counts diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_schema_tests/schema_test.yml b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_schema_tests/schema_test.yml deleted file mode 100644 index fe6b3ef9a85a9..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_schema_tests/schema_test.yml +++ /dev/null @@ -1,44 +0,0 @@ -version: 2 - -models: - - name: exchange_rate - tests: - - dbt_utils.expression_is_true: - # description: check no column collisions - # Two columns having similar names especially after removing special characters should remain distincts - expression: cast("HKD@spéçiäl & characters" as {{ dbt_utils.type_string() }}) != HKD_special___characters - - dbt_utils.expression_is_true: - expression: "\"column`_'with\"\"_quotes\" is not null" - columns: - - name: '"HKD@spéçiäl & characters"' - # description: check special charactesrs - # Use special characters in column names and make sure they are correctly parsed in the JSON blob and populated - tests: - - not_null - - - name: dedup_exchange_rate - tests: - - dbt_utils.unique_combination_of_columns: - # description: check_deduplication_by_primary_key - # The final table for this stream should have unique composite primary key values. - combination_of_columns: - - id - - currency - - NZD - - - name: dedup_cdc_excluded -# TODO: create/fix github issue in dbt-core/adapters repository to handle schema changes (outside airbyte's control) -# Disabling because incremental dbt is not handling quotes well atm (dbt 0.21.0) -# tests: -# - dbt_utils.expression_is_true: -# expression: "\"column`_'with\"\"_quotes\" is not null" - columns: - - name: name - tests: - - not_null - - - name: pos_dedup_cdcx - columns: - - name: name - tests: - - not_null diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_schema_tests_incremental/schema_test.yml b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_schema_tests_incremental/schema_test.yml deleted file mode 100644 index fe6b3ef9a85a9..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_schema_tests_incremental/schema_test.yml +++ /dev/null @@ -1,44 +0,0 @@ -version: 2 - -models: - - name: exchange_rate - tests: - - dbt_utils.expression_is_true: - # description: check no column collisions - # Two columns having similar names especially after removing special characters should remain distincts - expression: cast("HKD@spéçiäl & characters" as {{ dbt_utils.type_string() }}) != HKD_special___characters - - dbt_utils.expression_is_true: - expression: "\"column`_'with\"\"_quotes\" is not null" - columns: - - name: '"HKD@spéçiäl & characters"' - # description: check special charactesrs - # Use special characters in column names and make sure they are correctly parsed in the JSON blob and populated - tests: - - not_null - - - name: dedup_exchange_rate - tests: - - dbt_utils.unique_combination_of_columns: - # description: check_deduplication_by_primary_key - # The final table for this stream should have unique composite primary key values. - combination_of_columns: - - id - - currency - - NZD - - - name: dedup_cdc_excluded -# TODO: create/fix github issue in dbt-core/adapters repository to handle schema changes (outside airbyte's control) -# Disabling because incremental dbt is not handling quotes well atm (dbt 0.21.0) -# tests: -# - dbt_utils.expression_is_true: -# expression: "\"column`_'with\"\"_quotes\" is not null" - columns: - - name: name - tests: - - not_null - - - name: pos_dedup_cdcx - columns: - - name: name - tests: - - not_null diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_schema_tests_schema_change/schema_test.yml b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_schema_tests_schema_change/schema_test.yml deleted file mode 100644 index 485af162c4fb6..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_schema_tests_schema_change/schema_test.yml +++ /dev/null @@ -1,34 +0,0 @@ -version: 2 - -models: - - name: exchange_rate - tests: - - dbt_utils.expression_is_true: - expression: "\"column`_'with\"\"_quotes\" is not null" - columns: - - name: '"HKD@spéçiäl & characters"' - # description: check special charactesrs - # Use special characters in column names and make sure they are correctly parsed in the JSON blob and populated - tests: - - not_null - - - name: dedup_exchange_rate - tests: - - dbt_utils.unique_combination_of_columns: - # description: check_deduplication_by_primary_key - # The final table for this stream should have unique composite primary key values. - combination_of_columns: - - id - - currency - - NZD - - - name: renamed_dedup_cdc_excluded -# TODO: create/fix github issue in dbt-core/adapters repository to handle schema changes (outside airbyte's control) -# Disabling because incremental dbt is not handling quotes well atm (dbt 0.21.0) -# tests: -# - dbt_utils.expression_is_true: -# expression: "\"column`_'with\"\"_quotes\" is not null" - columns: - - name: name - tests: - - not_null diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/data_input/catalog.json b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/data_input/catalog.json deleted file mode 100644 index 7f9ff3d2901c4..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/data_input/catalog.json +++ /dev/null @@ -1,114 +0,0 @@ -{ - "streams": [ - { - "stream": { - "name": "sparse_nested_stream", - "json_schema": { - "type": "object", - "properties": { - "id": { - "type": "integer" - }, - "updated_at": { - "type": "integer" - }, - "obj_nest1": { - "type": "object", - "properties": { - "obj_nest2": { - "type": "object", - "properties": { - "foo": { - "type": "string" - } - } - } - } - }, - "arr_nest1": { - "type": "array", - "items": { - "type": "object", - "properties": { - "arr_nest2": { - "type": "array", - "items": { - "type": "object", - "properties": { - "foo": { - "type": "string" - } - } - } - } - } - } - } - } - }, - "supported_sync_modes": ["incremental"], - "source_defined_cursor": true, - "default_cursor_field": [] - }, - "sync_mode": "incremental", - "cursor_field": ["updated_at"], - "destination_sync_mode": "append_dedup", - "primary_key": [["id"]] - }, - { - "stream": { - "name": "sparse_nested_stream_empty", - "json_schema": { - "type": "object", - "properties": { - "id": { - "type": "integer" - }, - "updated_at": { - "type": "integer" - }, - "obj_nest1": { - "type": "object", - "properties": { - "obj_nest2": { - "type": "object", - "properties": { - "foo": { - "type": "string" - } - } - } - } - }, - "arr_nest1": { - "type": "array", - "items": { - "type": "object", - "properties": { - "arr_nest2": { - "type": "array", - "items": { - "type": "object", - "properties": { - "foo": { - "type": "string" - } - } - } - } - } - } - } - } - }, - "supported_sync_modes": ["incremental"], - "source_defined_cursor": true, - "default_cursor_field": [] - }, - "sync_mode": "incremental", - "cursor_field": ["updated_at"], - "destination_sync_mode": "append_dedup", - "primary_key": [["id"]] - } - ] -} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/data_input/messages.txt b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/data_input/messages.txt deleted file mode 100644 index de70d49e4b089..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/data_input/messages.txt +++ /dev/null @@ -1,6 +0,0 @@ -// This message is to verify that we don't encounter absurd duplication in deeply-nested fields with sparse data. -// We'll emit a message in the first sync with nested fields, but future syncs won't have the nested fields. -{"type": "RECORD", "record": {"stream": "sparse_nested_stream", "data": {"id": 1, "updated_at": 100, "obj_nest1": {"obj_nest2": {"foo": "bar"}}, "arr_nest1": [{"arr_nest2": [{"foo": "bar1"}, {"foo": "bar2"}]}, {"arr_nest2": [{"foo": "baz1"}, {"foo": "baz2"}]}]}, "emitted_at": 1672567200}} - -// This message is to verify our behavior in the case where the stream doesn't see any new data after the first sync. -{"type": "RECORD", "record": {"stream": "sparse_nested_stream_empty", "data": {"id": 1, "updated_at": 100, "obj_nest1": {"obj_nest2": {"foo": "bar"}}, "arr_nest1": [{"arr_nest2": [{"foo": "bar1"}, {"foo": "bar2"}]}, {"arr_nest2": [{"foo": "baz1"}, {"foo": "baz2"}]}]}, "emitted_at": 1672567200}} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/data_input/messages2.txt b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/data_input/messages2.txt deleted file mode 100644 index 6f2ee29261c17..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/data_input/messages2.txt +++ /dev/null @@ -1 +0,0 @@ -{"type": "RECORD", "record": {"stream": "sparse_nested_stream", "data": {"id": 2, "updated_at": 101}, "emitted_at": 1672568200}} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/data_input/messages3.txt b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/data_input/messages3.txt deleted file mode 100644 index 7153c09b864ba..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/data_input/messages3.txt +++ /dev/null @@ -1 +0,0 @@ -{"type": "RECORD", "record": {"stream": "sparse_nested_stream", "data": {"id": 3, "updated_at": 102}, "emitted_at": 1672569200}} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/data_input/replace_identifiers.json b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/data_input/replace_identifiers.json deleted file mode 100644 index 18d5eb0f40dfd..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/data_input/replace_identifiers.json +++ /dev/null @@ -1,54 +0,0 @@ -{ - "bigquery": [ - { - "sparse_nested_stream__y_obj_nest1_obj_nest2": "sparse_nested_stream_empty_obj_nest1_obj_nest2" - }, - { - "sparse_nested_stream__y_arr_nest1_arr_nest2": "sparse_nested_stream_empty_arr_nest1_arr_nest2" - } - ], - "oracle": [], - "postgres": [], - "snowflake": [ - { - "sparse_nested_stream__y_obj_nest1_obj_nest2": "SPARSE_NESTED_STREAM_EMPTY_OBJ_NEST1_OBJ_NEST2" - }, - { - "sparse_nested_stream__y_arr_nest1_arr_nest2": "SPARSE_NESTED_STREAM_EMPTY_ARR_NEST1_ARR_NEST2" - }, - { - "SYNC1_ROW_COUNTS": "sync1_row_counts" - }, - { - "SYNC2_ROW_COUNTS": "sync2_row_counts" - }, - { - "SYNC3_ROW_COUNTS": "sync3_row_counts" - } - ], - "redshift": [ - { - "sparse_nested_stream__y_obj_nest1_obj_nest2": "sparse_nested_stream_empty_obj_nest1_obj_nest2" - }, - { - "sparse_nested_stream__y_arr_nest1_arr_nest2": "sparse_nested_stream_empty_arr_nest1_arr_nest2" - } - ], - "mysql": [], - "mssql": [ - { - "sparse_nested_stream__y_obj_nest1_obj_nest2": "sparse_nested_stream___y_obj_nest1_obj_nest2" - }, - { - "sparse_nested_stream__y_arr_nest1_arr_nest2": "sparse_nested_stream___y_arr_nest1_arr_nest2" - } - ], - "tidb": [ - { - "sparse_nested_stream__y_obj_nest1_obj_nest2": "sparse_nested_stream___y_obj_nest1_obj_nest2" - }, - { - "sparse_nested_stream__y_arr_nest1_arr_nest2": "sparse_nested_stream___y_arr_nest1_arr_nest2" - } - ] -} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/dbt_test_config/sync1_assertions/test_sync1_row_counts.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/dbt_test_config/sync1_assertions/test_sync1_row_counts.sql deleted file mode 100644 index 6ed78bb5f3973..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/dbt_test_config/sync1_assertions/test_sync1_row_counts.sql +++ /dev/null @@ -1,2 +0,0 @@ -select * from {{ ref('sync1_row_counts') }} -where row_count != expected_count diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/dbt_test_config/sync1_expectations/sync1_row_counts.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/dbt_test_config/sync1_expectations/sync1_row_counts.sql deleted file mode 100644 index f087c1d2f9e91..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/dbt_test_config/sync1_expectations/sync1_row_counts.sql +++ /dev/null @@ -1,33 +0,0 @@ -with table_row_counts as ( - select distinct 'sparse_nested_stream' as label, count(*) as row_count, 1 as expected_count - from {{ ref('sparse_nested_stream') }} -union all - select distinct 'sparse_nested_stream_obj_nest1' as label, count(*) as row_count, 1 as expected_count - from {{ ref('sparse_nested_stream_obj_nest1') }} -union all - select distinct 'sparse_nested_stream_obj_nest1_obj_nest2' as label, count(*) as row_count, 1 as expected_count - from {{ ref('sparse_nested_stream_obj_nest1_obj_nest2') }} -union all - select distinct 'sparse_nested_stream_arr_nest1' as label, count(*) as row_count, 2 as expected_count - from {{ ref('sparse_nested_stream_arr_nest1') }} -union all - select distinct 'sparse_nested_stream_arr_nest1_arr_nest2' as label, count(*) as row_count, 4 as expected_count - from {{ ref('sparse_nested_stream_arr_nest1_arr_nest2') }} -union all - select distinct 'sparse_nested_stream_empty' as label, count(*) as row_count, 1 as expected_count - from {{ ref('sparse_nested_stream_empty') }} -union all - select distinct 'sparse_nested_stream_empty_obj_nest1' as label, count(*) as row_count, 1 as expected_count - from {{ ref('sparse_nested_stream_empty_obj_nest1') }} -union all - select distinct 'sparse_nested_stream__y_obj_nest1_obj_nest2' as label, count(*) as row_count, 1 as expected_count - from {{ ref('sparse_nested_stream__y_obj_nest1_obj_nest2') }} -union all - select distinct 'sparse_nested_stream_empty_arr_nest1' as label, count(*) as row_count, 2 as expected_count - from {{ ref('sparse_nested_stream_empty_arr_nest1') }} -union all - select distinct 'sparse_nested_stream__y_arr_nest1_arr_nest2' as label, count(*) as row_count, 4 as expected_count - from {{ ref('sparse_nested_stream__y_arr_nest1_arr_nest2') }} -) -select * -from table_row_counts diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/dbt_test_config/sync2_assertions/test_sync2_row_counts.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/dbt_test_config/sync2_assertions/test_sync2_row_counts.sql deleted file mode 100644 index 78d233e8a90b1..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/dbt_test_config/sync2_assertions/test_sync2_row_counts.sql +++ /dev/null @@ -1,2 +0,0 @@ -select * from {{ ref('sync2_row_counts') }} -where row_count != expected_count diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/dbt_test_config/sync2_expectations/sync2_row_counts.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/dbt_test_config/sync2_expectations/sync2_row_counts.sql deleted file mode 100644 index 557e969c20e24..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/dbt_test_config/sync2_expectations/sync2_row_counts.sql +++ /dev/null @@ -1,34 +0,0 @@ -with table_row_counts as ( - -- This is the only difference with sync1 - there's one new record in sparse_nested_stream. - select distinct 'sparse_nested_stream' as label, count(*) as row_count, 2 as expected_count - from {{ ref('sparse_nested_stream') }} -union all - select distinct 'sparse_nested_stream_obj_nest1' as label, count(*) as row_count, 1 as expected_count - from {{ ref('sparse_nested_stream_obj_nest1') }} -union all - select distinct 'sparse_nested_stream_obj_nest1_obj_nest2' as label, count(*) as row_count, 1 as expected_count - from {{ ref('sparse_nested_stream_obj_nest1_obj_nest2') }} -union all - select distinct 'sparse_nested_stream_arr_nest1' as label, count(*) as row_count, 2 as expected_count - from {{ ref('sparse_nested_stream_arr_nest1') }} -union all - select distinct 'sparse_nested_stream_arr_nest1_arr_nest2' as label, count(*) as row_count, 4 as expected_count - from {{ ref('sparse_nested_stream_arr_nest1_arr_nest2') }} -union all - select distinct 'sparse_nested_stream_empty' as label, count(*) as row_count, 1 as expected_count - from {{ ref('sparse_nested_stream_empty') }} -union all - select distinct 'sparse_nested_stream_empty_obj_nest1' as label, count(*) as row_count, 1 as expected_count - from {{ ref('sparse_nested_stream_empty_obj_nest1') }} -union all - select distinct 'sparse_nested_stream__y_obj_nest1_obj_nest2' as label, count(*) as row_count, 1 as expected_count - from {{ ref('sparse_nested_stream__y_obj_nest1_obj_nest2') }} -union all - select distinct 'sparse_nested_stream_empty_arr_nest1' as label, count(*) as row_count, 2 as expected_count - from {{ ref('sparse_nested_stream_empty_arr_nest1') }} -union all - select distinct 'sparse_nested_stream__y_arr_nest1_arr_nest2' as label, count(*) as row_count, 4 as expected_count - from {{ ref('sparse_nested_stream__y_arr_nest1_arr_nest2') }} -) -select * -from table_row_counts diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/dbt_test_config/sync3_assertions/test_sync3_row_counts.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/dbt_test_config/sync3_assertions/test_sync3_row_counts.sql deleted file mode 100644 index 764aa7b804533..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/dbt_test_config/sync3_assertions/test_sync3_row_counts.sql +++ /dev/null @@ -1,2 +0,0 @@ -select * from {{ ref('sync3_row_counts') }} -where row_count != expected_count diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/dbt_test_config/sync3_expectations/sync3_row_counts.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/dbt_test_config/sync3_expectations/sync3_row_counts.sql deleted file mode 100644 index eb078d0b1570a..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/dbt_test_config/sync3_expectations/sync3_row_counts.sql +++ /dev/null @@ -1,34 +0,0 @@ -with table_row_counts as ( - -- Again, this is the only difference with sync2 - one new record in sparse_nested_stream. - select distinct 'sparse_nested_stream' as label, count(*) as row_count, 3 as expected_count - from {{ ref('sparse_nested_stream') }} -union all - select distinct 'sparse_nested_stream_obj_nest1' as label, count(*) as row_count, 1 as expected_count - from {{ ref('sparse_nested_stream_obj_nest1') }} -union all - select distinct 'sparse_nested_stream_obj_nest1_obj_nest2' as label, count(*) as row_count, 1 as expected_count - from {{ ref('sparse_nested_stream_obj_nest1_obj_nest2') }} -union all - select distinct 'sparse_nested_stream_arr_nest1' as label, count(*) as row_count, 2 as expected_count - from {{ ref('sparse_nested_stream_arr_nest1') }} -union all - select distinct 'sparse_nested_stream_arr_nest1_arr_nest2' as label, count(*) as row_count, 4 as expected_count - from {{ ref('sparse_nested_stream_arr_nest1_arr_nest2') }} -union all - select distinct 'sparse_nested_stream_empty' as label, count(*) as row_count, 1 as expected_count - from {{ ref('sparse_nested_stream_empty') }} -union all - select distinct 'sparse_nested_stream_empty_obj_nest1' as label, count(*) as row_count, 1 as expected_count - from {{ ref('sparse_nested_stream_empty_obj_nest1') }} -union all - select distinct 'sparse_nested_stream__y_obj_nest1_obj_nest2' as label, count(*) as row_count, 1 as expected_count - from {{ ref('sparse_nested_stream__y_obj_nest1_obj_nest2') }} -union all - select distinct 'sparse_nested_stream_empty_arr_nest1' as label, count(*) as row_count, 2 as expected_count - from {{ ref('sparse_nested_stream_empty_arr_nest1') }} -union all - select distinct 'sparse_nested_stream__y_arr_nest1_arr_nest2' as label, count(*) as row_count, 4 as expected_count - from {{ ref('sparse_nested_stream__y_arr_nest1_arr_nest2') }} -) -select * -from table_row_counts diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/test_drop_scd_overwrite.py b/airbyte-integrations/bases/base-normalization/integration_tests/test_drop_scd_overwrite.py deleted file mode 100644 index f5f177a1499d9..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/test_drop_scd_overwrite.py +++ /dev/null @@ -1,161 +0,0 @@ -# -# Copyright (c) 2023 Airbyte, Inc., all rights reserved. -# - -import json -import os -import pathlib -import shutil - -import pytest -from integration_tests.dbt_integration_test import DbtIntegrationTest -from integration_tests.utils import generate_dbt_models, run_destination_process, setup_test_dir -from normalization import DestinationType - -temporary_folders = set() -dbt_test_utils = DbtIntegrationTest() - - -@pytest.fixture(scope="module", autouse=True) -def before_all_tests(request): - destinations_to_test = dbt_test_utils.get_test_targets() - # set clean-up args to clean target destination after the test - clean_up_args = { - "destination_type": [d for d in DestinationType if d.value in destinations_to_test], - "test_type": "test_reset_scd_overwrite", - "tmp_folders": temporary_folders, - } - dbt_test_utils.set_target_schema("test_reset_scd_overwrite") - dbt_test_utils.change_current_test_dir(request) - dbt_test_utils.setup_db(destinations_to_test) - os.environ["PATH"] = os.path.abspath("../.venv/bin/") + ":" + os.environ["PATH"] - yield - dbt_test_utils.clean_tmp_tables(**clean_up_args) - dbt_test_utils.tear_down_db() - for folder in temporary_folders: - print(f"Deleting temporary test folder {folder}") - shutil.rmtree(folder, ignore_errors=True) - - -@pytest.fixture -def setup_test_path(request): - dbt_test_utils.change_current_test_dir(request) - print(f"Running from: {pathlib.Path().absolute()}") - print(f"Current PATH is: {os.environ['PATH']}") - yield - os.chdir(request.config.invocation_dir) - - -@pytest.mark.parametrize("destination_type", DestinationType.testable_destinations()) -def test_reset_scd_on_overwrite(destination_type: DestinationType, setup_test_path): - if destination_type.value not in dbt_test_utils.get_test_targets(): - pytest.skip(f"Destinations {destination_type} is not in NORMALIZATION_TEST_TARGET env variable") - - if destination_type.value in [DestinationType.ORACLE.value, DestinationType.TIDB.value]: - # Oracle and TiDB do not support incremental syncs with schema changes yet - pytest.skip(f"{destination_type} does not support incremental sync with schema change yet") - elif destination_type.value == DestinationType.REDSHIFT.value: - # set unique schema for Redshift test - dbt_test_utils.set_target_schema(dbt_test_utils.generate_random_string("test_reset_scd_")) - - test_resource_name = "test_reset_scd_overwrite" - # Select target schema - target_schema = dbt_test_utils.target_schema - - try: - print(f"Testing resetting SCD tables on overwrite with {destination_type} in schema {target_schema}") - run_reset_scd_on_overwrite_test(destination_type, test_resource_name) - finally: - dbt_test_utils.set_target_schema(target_schema) - - -def run_reset_scd_on_overwrite_test(destination_type: DestinationType, test_resource_name: str): - # Generate DBT profile yaml - integration_type = destination_type.value - test_root_dir = setup_test_dir(integration_type, temporary_folders) - destination_config = dbt_test_utils.generate_profile_yaml_file(destination_type, test_root_dir) - test_directory = os.path.join(test_root_dir, "models/generated") - shutil.rmtree(test_directory, ignore_errors=True) - - # Generate config file for the destination - config_file = os.path.join(test_root_dir, "destination_config.json") - with open(config_file, "w") as f: - f.write(json.dumps(destination_config)) - - # make sure DBT dependencies are installed - dbt_test_utils.dbt_check(destination_type, test_root_dir) - - # Generate catalog for an initial reset/cleanup (pre-test) - original_catalog_file = os.path.join("resources", test_resource_name, "data_input", "test_drop_scd_catalog.json") - dbt_test_utils.copy_replace( - original_catalog_file, - os.path.join(test_root_dir, "initial_reset_catalog.json"), - pattern='"destination_sync_mode": ".*"', - replace_value='"destination_sync_mode": "overwrite"', - ) - - # Force a reset in destination raw tables to remove any data left over from previous test runs - assert run_destination_process(destination_type, test_root_dir, "", "initial_reset_catalog.json", dbt_test_utils) - # generate models from catalog - generate_dbt_models(destination_type, test_resource_name, test_root_dir, "models", "test_drop_scd_catalog_reset.json", dbt_test_utils) - - # Run dbt process to normalize data from the first sync - dbt_test_utils.dbt_run(destination_type, test_root_dir, force_full_refresh=True) - - # Remove models generated in previous step to avoid DBT compilation errors - test_directory = os.path.join(test_root_dir, "models/generated/airbyte_incremental") - shutil.rmtree(test_directory, ignore_errors=True) - test_directory = os.path.join(test_root_dir, "models/generated/airbyte_views") - shutil.rmtree(test_directory, ignore_errors=True) - test_directory = os.path.join(test_root_dir, "models/generated/airbyte_ctes") - shutil.rmtree(test_directory, ignore_errors=True) - test_directory = os.path.join(test_root_dir, "models/generated/airbyte_tables") - shutil.rmtree(test_directory, ignore_errors=True) - - # Run the first sync to create raw tables in destinations - dbt_test_utils.copy_replace(original_catalog_file, os.path.join(test_root_dir, "destination_catalog.json")) - message_file = os.path.join("resources", test_resource_name, "data_input", "test_drop_scd_messages.txt") - assert run_destination_process(destination_type, test_root_dir, message_file, "destination_catalog.json", dbt_test_utils) - - # generate models from catalog - generate_dbt_models(destination_type, test_resource_name, test_root_dir, "models", "test_drop_scd_catalog.json", dbt_test_utils) - - # Run dbt process to normalize data from the first sync - dbt_test_utils.dbt_run(destination_type, test_root_dir, force_full_refresh=True) - - # Remove models generated in previous step to avoid DBT compilation errors - test_directory = os.path.join(test_root_dir, "models/generated/airbyte_incremental") - shutil.rmtree(test_directory, ignore_errors=True) - test_directory = os.path.join(test_root_dir, "models/generated/airbyte_views") - shutil.rmtree(test_directory, ignore_errors=True) - test_directory = os.path.join(test_root_dir, "models/generated/airbyte_ctes") - shutil.rmtree(test_directory, ignore_errors=True) - - # Generate a catalog with modified schema for a reset - reset_catalog_file = os.path.join("resources", test_resource_name, "data_input", "test_drop_scd_catalog_reset.json") - dbt_test_utils.copy_replace(reset_catalog_file, os.path.join(test_root_dir, "reset_catalog.json")) - - # Run a reset - assert run_destination_process(destination_type, test_root_dir, "", "reset_catalog.json", dbt_test_utils) - - # Run dbt process after reset to drop SCD table - generate_dbt_models(destination_type, test_resource_name, test_root_dir, "models", "test_drop_scd_catalog_reset.json", dbt_test_utils) - dbt_test_utils.dbt_run(destination_type, test_root_dir, force_full_refresh=True) - - # Remove models generated in previous step to avoid DBT compilation errors - test_directory = os.path.join(test_root_dir, "models/generated/airbyte_incremental") - shutil.rmtree(test_directory, ignore_errors=True) - test_directory = os.path.join(test_root_dir, "models/generated/airbyte_views") - shutil.rmtree(test_directory, ignore_errors=True) - test_directory = os.path.join(test_root_dir, "models/generated/airbyte_ctes") - shutil.rmtree(test_directory, ignore_errors=True) - - # Run another sync with modified catalog - modified_catalog_file = os.path.join("resources", test_resource_name, "data_input", "test_drop_scd_catalog_incremental.json") - dbt_test_utils.copy_replace(modified_catalog_file, os.path.join(test_root_dir, "destination_catalog.json")) - message_file = os.path.join("resources", test_resource_name, "data_input", "test_scd_reset_messages_incremental.txt") - assert run_destination_process(destination_type, test_root_dir, message_file, "destination_catalog.json", dbt_test_utils) - - # Run dbt process - generate_dbt_models(destination_type, test_resource_name, test_root_dir, "models", "test_drop_scd_catalog_reset.json", dbt_test_utils) - dbt_test_utils.dbt_run(destination_type, test_root_dir) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/test_ephemeral.py b/airbyte-integrations/bases/base-normalization/integration_tests/test_ephemeral.py deleted file mode 100644 index 8a530db76d910..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/test_ephemeral.py +++ /dev/null @@ -1,208 +0,0 @@ -# -# Copyright (c) 2023 Airbyte, Inc., all rights reserved. -# - -import json -import os -import pathlib -import re -import shutil -from typing import Any, Dict - -import pytest -from integration_tests.dbt_integration_test import DbtIntegrationTest -from integration_tests.utils import setup_test_dir -from normalization.destination_type import DestinationType -from normalization.transform_catalog import TransformCatalog - -temporary_folders = set() -dbt_test_utils = DbtIntegrationTest() - - -@pytest.fixture(scope="module", autouse=True) -def before_all_tests(request): - destinations_to_test = dbt_test_utils.get_test_targets() - # set clean-up args to clean target destination after the test - clean_up_args = { - "destination_type": [d for d in DestinationType if d.value in destinations_to_test], - "test_type": "ephemeral", - "tmp_folders": temporary_folders, - } - - dbt_test_utils.set_target_schema("test_ephemeral") - dbt_test_utils.change_current_test_dir(request) - dbt_test_utils.setup_db(destinations_to_test) - os.environ["PATH"] = os.path.abspath("../.venv/bin/") + ":" + os.environ["PATH"] - yield - dbt_test_utils.clean_tmp_tables(**clean_up_args) - dbt_test_utils.tear_down_db() - for folder in temporary_folders: - print(f"Deleting temporary test folder {folder}") - shutil.rmtree(folder, ignore_errors=True) - - -@pytest.fixture -def setup_test_path(request): - dbt_test_utils.change_current_test_dir(request) - print(f"Running from: {pathlib.Path().absolute()}") - print(f"Current PATH is: {os.environ['PATH']}") - yield - os.chdir(request.config.invocation_dir) - - -@pytest.mark.parametrize("column_count", [1000]) -@pytest.mark.parametrize("destination_type", DestinationType.testable_destinations()) -def test_destination_supported_limits(destination_type: DestinationType, column_count: int): - if destination_type.value == DestinationType.MYSQL.value: - # In MySQL, the max number of columns is limited by row size (8KB), - # not by absolute column count. It is way fewer than 1000. - pytest.skip("Skipping test for column limit, because in MySQL, the max number of columns is limited by row size (8KB)") - if destination_type.value == DestinationType.ORACLE.value: - # Airbyte uses a few columns for metadata and Oracle limits are right at 1000 - column_count = 993 - if destination_type.value == DestinationType.MSSQL.value: - column_count = 999 - run_test(destination_type, column_count) - - -@pytest.mark.parametrize( - "integration_type, column_count, expected_exception_message", - [ - ("Postgres", 1665, "target lists can have at most 1664 entries"), - ("BigQuery", 3000, "The view is too large."), - ("Snowflake", 2000, "Operation failed because soft limit on objects of type 'Column' per table was exceeded."), - ("Redshift", 1665, "target lists can have at most 1664 entries"), - ("MySQL", 250, "Row size too large"), - ("Oracle", 1001, "ORA-01792: maximum number of columns in a table or view is 1000"), - ("MSSQL", 1025, "exceeds the maximum of 1024 columns."), - ], -) -def test_destination_failure_over_limits(integration_type: str, column_count: int, expected_exception_message: str, setup_test_path): - destination_type = DestinationType.from_string(integration_type) - if destination_type.value not in dbt_test_utils.get_test_targets(): - pytest.skip(f"Destinations {destination_type} is not in NORMALIZATION_TEST_TARGET env variable") - run_test(destination_type, column_count, expected_exception_message) - - -@pytest.mark.parametrize("destination_type", DestinationType.testable_destinations()) -def test_empty_streams(destination_type: DestinationType, setup_test_path): - run_test(destination_type, 0) - - -@pytest.mark.parametrize("destination_type", DestinationType.testable_destinations()) -def test_stream_with_1_airbyte_column(destination_type: DestinationType, setup_test_path): - run_test(destination_type, 1) - - -def run_test(destination_type: DestinationType, column_count: int, expected_exception_message: str = ""): - if destination_type.value not in dbt_test_utils.get_test_targets(): - pytest.skip(f"Destinations {destination_type} is not in NORMALIZATION_TEST_TARGET env variable") - - if destination_type.value == DestinationType.CLICKHOUSE.value: - pytest.skip("ephemeral materialization isn't supported in ClickHouse yet") - if destination_type.value == DestinationType.ORACLE.value: - # Oracle does not allow changing to random schema - dbt_test_utils.set_target_schema("test_normalization") - elif destination_type.value == DestinationType.REDSHIFT.value: - # set unique schema for Redshift test - dbt_test_utils.set_target_schema(dbt_test_utils.generate_random_string("test_ephemeral_")) - else: - dbt_test_utils.set_target_schema("test_ephemeral") - print(f"Testing ephemeral for destination {destination_type.value} with column count {column_count}") - integration_type = destination_type.value - # Create the test folder with dbt project and appropriate destination settings to run integration tests from - test_root_dir = setup_test_dir(integration_type, temporary_folders) - destination_config = dbt_test_utils.generate_profile_yaml_file(destination_type, test_root_dir) - # generate a catalog and associated dbt models files - generate_dbt_models(destination_type, test_root_dir, column_count) - # Use destination connector to create empty _airbyte_raw_* tables to use as input for the test - assert setup_input_raw_data(integration_type, test_root_dir, destination_config) - dbt_test_utils.dbt_check(destination_type, test_root_dir) - if expected_exception_message: - with pytest.raises(AssertionError): - dbt_test_utils.dbt_run(destination_type, test_root_dir) - assert search_logs_for_pattern(test_root_dir + "/dbt_output.log", expected_exception_message) - else: - dbt_test_utils.dbt_run(destination_type, test_root_dir) - - -def search_logs_for_pattern(log_file: str, pattern: str): - with open(log_file, "r") as file: - for line in file: - if re.search(pattern, line): - return True - return False - - -def setup_input_raw_data(integration_type: str, test_root_dir: str, destination_config: Dict[str, Any]) -> bool: - """ - This should populate the associated "raw" tables from which normalization is reading from when running dbt CLI. - """ - config_file = os.path.join(test_root_dir, "destination_config.json") - with open(config_file, "w") as f: - f.write(json.dumps(destination_config)) - commands = [ - "docker", - "run", - "--rm", - "--init", - "-v", - f"{test_root_dir}:/data", - "--network", - "host", - "-i", - f"airbyte/destination-{integration_type.lower()}:dev", - "write", - "--config", - "/data/destination_config.json", - "--catalog", - "/data/catalog.json", - ] - # Force a reset in destination raw tables - return dbt_test_utils.run_destination_process("", test_root_dir, commands) - - -def generate_dbt_models(destination_type: DestinationType, test_root_dir: str, column_count: int): - """ - This is the normalization step generating dbt models files from the destination_catalog.json taken as input. - """ - output_directory = os.path.join(test_root_dir, "models", "generated") - shutil.rmtree(output_directory, ignore_errors=True) - catalog_config = { - "streams": [ - { - "stream": { - "name": dbt_test_utils.generate_random_string(f"stream_with_{column_count}_columns"), - "json_schema": { - "type": ["null", "object"], - "properties": {}, - }, - "supported_sync_modes": ["incremental"], - "source_defined_cursor": True, - "default_cursor_field": [], - }, - "sync_mode": "incremental", - "cursor_field": [], - "destination_sync_mode": "overwrite", - } - ] - } - if column_count == 1: - catalog_config["streams"][0]["stream"]["json_schema"]["properties"]["_airbyte_id"] = {"type": "integer"} - else: - for column in [dbt_test_utils.random_string(5) for _ in range(column_count)]: - catalog_config["streams"][0]["stream"]["json_schema"]["properties"][column] = {"type": "string"} - catalog = os.path.join(test_root_dir, "catalog.json") - with open(catalog, "w") as fh: - fh.write(json.dumps(catalog_config)) - - transform_catalog = TransformCatalog() - transform_catalog.config = { - "integration_type": destination_type.value, - "schema": dbt_test_utils.target_schema, - "catalog": [catalog], - "output_path": output_directory, - "json_column": "_airbyte_data", - "profile_config_dir": test_root_dir, - } - transform_catalog.process_catalog() diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/test_normalization.py b/airbyte-integrations/bases/base-normalization/integration_tests/test_normalization.py deleted file mode 100644 index f1cea41e1a4f5..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/test_normalization.py +++ /dev/null @@ -1,482 +0,0 @@ -# -# Copyright (c) 2023 Airbyte, Inc., all rights reserved. -# - -import json -import os -import pathlib -import re -import shutil -import tempfile -from distutils.dir_util import copy_tree -from typing import Any, Dict - -import pytest -from integration_tests.dbt_integration_test import DbtIntegrationTest -from integration_tests.utils import generate_dbt_models, run_destination_process -from normalization.destination_type import DestinationType - -temporary_folders = set() - -# dbt models and final sql outputs from the following git versioned tests will be written in a folder included in -# airbyte git repository. -git_versioned_tests = ["test_simple_streams", "test_nested_streams"] - -dbt_test_utils = DbtIntegrationTest() - - -@pytest.fixture(scope="module", autouse=True) -def before_all_tests(request): - destinations_to_test = dbt_test_utils.get_test_targets() - # set clean-up args to clean target destination after the test - clean_up_args = { - "destination_type": [d for d in DestinationType if d.value in destinations_to_test], - "test_type": "normalization", - "git_versioned_tests": git_versioned_tests, - } - for integration_type in [d.value for d in DestinationType]: - if integration_type in destinations_to_test: - test_root_dir = f"{pathlib.Path().absolute()}/normalization_test_output/{integration_type.lower()}" - shutil.rmtree(test_root_dir, ignore_errors=True) - if os.getenv("RANDOM_TEST_SCHEMA"): - target_schema = dbt_test_utils.generate_random_string("test_normalization_ci_") - dbt_test_utils.set_target_schema(target_schema) - dbt_test_utils.change_current_test_dir(request) - dbt_test_utils.setup_db(destinations_to_test) - os.environ["PATH"] = os.path.abspath("../.venv/bin/") + ":" + os.environ["PATH"] - yield - dbt_test_utils.clean_tmp_tables(**clean_up_args) - dbt_test_utils.tear_down_db() - for folder in temporary_folders: - print(f"Deleting temporary test folder {folder}") - shutil.rmtree(folder, ignore_errors=True) - - -@pytest.fixture -def setup_test_path(request): - dbt_test_utils.change_current_test_dir(request) - print(f"Running from: {pathlib.Path().absolute()}") - print(f"Current PATH is: {os.environ['PATH']}") - yield - os.chdir(request.config.invocation_dir) - - -@pytest.mark.parametrize( - "test_resource_name", - set( - git_versioned_tests - + [ - # Non-versioned tests outputs below will be written to /tmp folders instead - ] - ), -) -@pytest.mark.parametrize("destination_type", DestinationType.testable_destinations()) -def test_normalization(destination_type: DestinationType, test_resource_name: str, setup_test_path): - if destination_type.value not in dbt_test_utils.get_test_targets(): - pytest.skip(f"Destinations {destination_type} is not in NORMALIZATION_TEST_TARGET env variable") - if ( - destination_type.value in (DestinationType.ORACLE.value, DestinationType.CLICKHOUSE.value) - and test_resource_name == "test_nested_streams" - ): - pytest.skip(f"Destinations {destination_type} does not support nested streams") - - target_schema = dbt_test_utils.target_schema - if destination_type.value == DestinationType.ORACLE.value: - # Oracle does not allow changing to random schema - dbt_test_utils.set_target_schema("test_normalization") - elif destination_type.value == DestinationType.REDSHIFT.value: - # set unique schema for Redshift test - dbt_test_utils.set_target_schema(dbt_test_utils.generate_random_string("test_normalization_")) - try: - run_test_normalization(destination_type, test_resource_name) - finally: - dbt_test_utils.set_target_schema(target_schema) - - -def run_test_normalization(destination_type: DestinationType, test_resource_name: str): - print(f"Testing normalization {destination_type} for {test_resource_name} in ", dbt_test_utils.target_schema) - # Create the test folder with dbt project and appropriate destination settings to run integration tests from - test_root_dir = setup_test_dir(destination_type, test_resource_name) - run_first_normalization(destination_type, test_resource_name, test_root_dir) - if os.path.exists(os.path.join("resources", test_resource_name, "data_input", "messages_incremental.txt")): - run_incremental_normalization(destination_type, test_resource_name, test_root_dir) - if os.path.exists(os.path.join("resources", test_resource_name, "data_input", "messages_schema_change.txt")): - run_schema_change_normalization(destination_type, test_resource_name, test_root_dir) - - -def run_first_normalization(destination_type: DestinationType, test_resource_name: str, test_root_dir: str): - destination_config = dbt_test_utils.generate_profile_yaml_file(destination_type, test_root_dir) - # Use destination connector to create _airbyte_raw_* tables to use as input for the test - assert setup_input_raw_data(destination_type, test_resource_name, test_root_dir, destination_config) - # generate models from catalog - generate_dbt_models(destination_type, test_resource_name, test_root_dir, "models", "catalog.json", dbt_test_utils) - # Setup test resources and models - setup_dbt_test(destination_type, test_resource_name, test_root_dir) - dbt_test_utils.dbt_check(destination_type, test_root_dir) - # Run dbt process - dbt_test_utils.dbt_run(destination_type, test_root_dir, force_full_refresh=True) - copy_tree(os.path.join(test_root_dir, "build/run/airbyte_utils/models/generated/"), os.path.join(test_root_dir, "first_output")) - shutil.rmtree(os.path.join(test_root_dir, "build/run/airbyte_utils/models/generated/"), ignore_errors=True) - # Verify dbt process - dbt_test(destination_type, test_root_dir) - - -def run_incremental_normalization(destination_type: DestinationType, test_resource_name: str, test_root_dir: str): - # Use destination connector to reset _airbyte_raw_* tables with new incremental data - setup_incremental_data(destination_type, test_resource_name, test_root_dir) - # setup new test files - setup_dbt_incremental_test(destination_type, test_resource_name, test_root_dir) - # Run dbt process - dbt_test_utils.dbt_run(destination_type, test_root_dir) - normalize_dbt_output(test_root_dir, "build/run/airbyte_utils/models/generated/", "second_output") - - if destination_type.value in [DestinationType.MYSQL.value, DestinationType.ORACLE.value]: - pytest.skip(f"{destination_type} does not support incremental yet") - dbt_test(destination_type, test_root_dir) - - -def run_schema_change_normalization(destination_type: DestinationType, test_resource_name: str, test_root_dir: str): - if destination_type.value in [DestinationType.MYSQL.value, DestinationType.ORACLE.value]: - # TODO: upgrade dbt-adapter repositories to work with dbt 0.21.0+ (outside airbyte's control) - pytest.skip(f"{destination_type} does not support schema change in incremental yet (requires dbt 0.21.0+)") - if destination_type.value in [ - DestinationType.SNOWFLAKE.value, - DestinationType.CLICKHOUSE.value, - DestinationType.TIDB.value, - DestinationType.DUCKDB.value, - ]: - pytest.skip(f"{destination_type} is disabled as it doesnt support schema change in incremental yet (column type changes)") - if destination_type.value in [DestinationType.MSSQL.value, DestinationType.SNOWFLAKE.value]: - # TODO: create/fix github issue in corresponding dbt-adapter repository to handle schema changes (outside airbyte's control) - pytest.skip(f"{destination_type} is disabled as it doesnt fully support schema change in incremental yet") - - setup_schema_change_data(destination_type, test_resource_name, test_root_dir) - generate_dbt_models( - destination_type, test_resource_name, test_root_dir, "modified_models", "catalog_schema_change.json", dbt_test_utils - ) - setup_dbt_schema_change_test(destination_type, test_resource_name, test_root_dir) - dbt_test_utils.dbt_run(destination_type, test_root_dir) - normalize_dbt_output(test_root_dir, "build/run/airbyte_utils/modified_models/generated/", "third_output") - dbt_test(destination_type, test_root_dir) - - -def normalize_dbt_output(test_root_dir: str, input_dir: str, output_dir: str): - tmp_dir = os.path.join(test_root_dir, input_dir) - output_dir = os.path.join(test_root_dir, output_dir) - shutil.rmtree(output_dir, ignore_errors=True) - - def copy_replace_dbt_tmp(src, dst): - dbt_test_utils.copy_replace(src, dst, "__dbt_tmp[0-9]+", "__dbt_tmp") - - shutil.copytree(tmp_dir, output_dir, copy_function=copy_replace_dbt_tmp) - shutil.rmtree(tmp_dir, ignore_errors=True) - - -def setup_test_dir(destination_type: DestinationType, test_resource_name: str) -> str: - """ - We prepare a clean folder to run the tests from. - - if the test_resource_name is part of git_versioned_tests, then dbt models and final sql outputs - will be written to a folder included in airbyte git repository. - - Non-versioned tests will be written in /tmp folders instead. - - The purpose is to keep track of a small set of downstream changes on selected integration tests cases. - - generated dbt models created by normalization script from an input destination_catalog.json - - final output sql files created by dbt CLI from the generated dbt models (dbt models are sql files with jinja templating, - these are interpreted and compiled into the native SQL dialect of the final destination engine) - """ - if test_resource_name in git_versioned_tests: - test_root_dir = f"{pathlib.Path().absolute()}/normalization_test_output/{destination_type.value.lower()}" - else: - test_root_dir = f"{pathlib.Path().joinpath('..', 'build', 'normalization_test_output', destination_type.value.lower()).resolve()}" - os.makedirs(test_root_dir, exist_ok=True) - test_root_dir = f"{test_root_dir}/{test_resource_name}" - shutil.rmtree(test_root_dir, ignore_errors=True) - print(f"Setting up test folder {test_root_dir}") - dbt_project_yaml = "../dbt-project-template/dbt_project.yml" - copy_tree("../dbt-project-template", test_root_dir) - if destination_type.value == DestinationType.MSSQL.value: - copy_tree("../dbt-project-template-mssql", test_root_dir) - dbt_project_yaml = "../dbt-project-template-mssql/dbt_project.yml" - elif destination_type.value == DestinationType.MYSQL.value: - copy_tree("../dbt-project-template-mysql", test_root_dir) - dbt_project_yaml = "../dbt-project-template-mysql/dbt_project.yml" - elif destination_type.value == DestinationType.ORACLE.value: - copy_tree("../dbt-project-template-oracle", test_root_dir) - dbt_project_yaml = "../dbt-project-template-oracle/dbt_project.yml" - elif destination_type.value == DestinationType.CLICKHOUSE.value: - copy_tree("../dbt-project-template-clickhouse", test_root_dir) - dbt_project_yaml = "../dbt-project-template-clickhouse/dbt_project.yml" - elif destination_type.value == DestinationType.SNOWFLAKE.value: - copy_tree("../dbt-project-template-snowflake", test_root_dir) - dbt_project_yaml = "../dbt-project-template-snowflake/dbt_project.yml" - elif destination_type.value == DestinationType.REDSHIFT.value: - copy_tree("../dbt-project-template-redshift", test_root_dir) - dbt_project_yaml = "../dbt-project-template-redshift/dbt_project.yml" - elif destination_type.value == DestinationType.TIDB.value: - copy_tree("../dbt-project-template-tidb", test_root_dir) - dbt_project_yaml = "../dbt-project-template-tidb/dbt_project.yml" - elif destination_type.value == DestinationType.DUCKDB.value: - copy_tree("../dbt-project-template-duckdb", test_root_dir) - dbt_project_yaml = "../dbt-project-template-duckdb/dbt_project.yml" - dbt_test_utils.copy_replace(dbt_project_yaml, os.path.join(test_root_dir, "dbt_project.yml")) - return test_root_dir - - -def setup_input_raw_data( - destination_type: DestinationType, test_resource_name: str, test_root_dir: str, destination_config: Dict[str, Any] -) -> bool: - """ - We run docker images of destinations to upload test data stored in the messages.txt file for each test case. - This should populate the associated "raw" tables from which normalization is reading from when running dbt CLI. - """ - catalog_file = os.path.join("resources", test_resource_name, "data_input", "catalog.json") - message_file = os.path.join("resources", test_resource_name, "data_input", "messages.txt") - dbt_test_utils.copy_replace( - catalog_file, - os.path.join(test_root_dir, "reset_catalog.json"), - pattern='"destination_sync_mode": ".*"', - replace_value='"destination_sync_mode": "overwrite"', - ) - dbt_test_utils.copy_replace(catalog_file, os.path.join(test_root_dir, "destination_catalog.json")) - config_file = os.path.join(test_root_dir, "destination_config.json") - with open(config_file, "w") as f: - f.write(json.dumps(destination_config)) - # Force a reset in destination raw tables - assert run_destination_process(destination_type, test_root_dir, "", "reset_catalog.json", dbt_test_utils) - # Run a sync to create raw tables in destinations - return run_destination_process(destination_type, test_root_dir, message_file, "destination_catalog.json", dbt_test_utils) - - -def setup_incremental_data(destination_type: DestinationType, test_resource_name: str, test_root_dir: str) -> bool: - message_file = os.path.join("resources", test_resource_name, "data_input", "messages_incremental.txt") - # Force a reset in destination raw tables - assert run_destination_process(destination_type, test_root_dir, "", "reset_catalog.json", dbt_test_utils) - # Run a sync to create raw tables in destinations - return run_destination_process(destination_type, test_root_dir, message_file, "destination_catalog.json", dbt_test_utils) - - -def setup_schema_change_data(destination_type: DestinationType, test_resource_name: str, test_root_dir: str) -> bool: - catalog_file = os.path.join("resources", test_resource_name, "data_input", "catalog_schema_change.json") - message_file = os.path.join("resources", test_resource_name, "data_input", "messages_schema_change.txt") - dbt_test_utils.copy_replace( - catalog_file, - os.path.join(test_root_dir, "reset_catalog.json"), - pattern='"destination_sync_mode": ".*"', - replace_value='"destination_sync_mode": "overwrite"', - ) - dbt_test_utils.copy_replace(catalog_file, os.path.join(test_root_dir, "destination_catalog.json")) - dbt_test_utils.copy_replace( - os.path.join(test_root_dir, "dbt_project.yml"), - os.path.join(test_root_dir, "first_dbt_project.yml"), - ) - - def update(config_yaml): - if config_yaml["model-paths"] == ["models"]: - config_yaml["model-paths"] = ["modified_models"] - return True, config_yaml - return False, None - - dbt_test_utils.update_yaml_file(os.path.join(test_root_dir, "dbt_project.yml"), update) - # Run a sync to update raw tables in destinations - return run_destination_process(destination_type, test_root_dir, message_file, "destination_catalog.json", dbt_test_utils) - - -def setup_dbt_test(destination_type: DestinationType, test_resource_name: str, test_root_dir: str): - """ - Prepare the data (copy) for the models for dbt test. - """ - replace_identifiers = os.path.join("resources", test_resource_name, "data_input", "replace_identifiers.json") - copy_test_files( - os.path.join("resources", test_resource_name, "dbt_test_config", "dbt_schema_tests"), - os.path.join(test_root_dir, "models/dbt_schema_tests"), - destination_type, - replace_identifiers, - ) - copy_test_files( - os.path.join("resources", test_resource_name, "dbt_test_config", "dbt_data_tests_tmp"), - os.path.join(test_root_dir, "models/dbt_data_tests"), - destination_type, - replace_identifiers, - ) - copy_test_files( - os.path.join("resources", test_resource_name, "dbt_test_config", "dbt_data_tests"), - os.path.join(test_root_dir, "tests"), - destination_type, - replace_identifiers, - ) - - -def setup_dbt_incremental_test(destination_type: DestinationType, test_resource_name: str, test_root_dir: str): - """ - Prepare the data (copy) for the models for dbt test. - """ - replace_identifiers = os.path.join("resources", test_resource_name, "data_input", "replace_identifiers.json") - copy_test_files( - os.path.join("resources", test_resource_name, "dbt_test_config", "dbt_schema_tests_incremental"), - os.path.join(test_root_dir, "models/dbt_schema_tests"), - destination_type, - replace_identifiers, - ) - test_directory = os.path.join(test_root_dir, "models/dbt_data_tests") - shutil.rmtree(test_directory, ignore_errors=True) - os.makedirs(test_directory, exist_ok=True) - copy_test_files( - os.path.join("resources", test_resource_name, "dbt_test_config", "dbt_data_tests_tmp_incremental"), - test_directory, - destination_type, - replace_identifiers, - ) - test_directory = os.path.join(test_root_dir, "tests") - shutil.rmtree(test_directory, ignore_errors=True) - os.makedirs(test_directory, exist_ok=True) - copy_test_files( - os.path.join("resources", test_resource_name, "dbt_test_config", "dbt_data_tests_incremental"), - test_directory, - destination_type, - replace_identifiers, - ) - - -def setup_dbt_schema_change_test(destination_type: DestinationType, test_resource_name: str, test_root_dir: str): - """ - Prepare the data (copy) for the models for dbt test. - """ - replace_identifiers = os.path.join("resources", test_resource_name, "data_input", "replace_identifiers.json") - copy_test_files( - os.path.join("resources", test_resource_name, "dbt_test_config", "dbt_schema_tests_schema_change"), - os.path.join(test_root_dir, "modified_models/dbt_schema_tests"), - destination_type, - replace_identifiers, - ) - test_directory = os.path.join(test_root_dir, "modified_models/dbt_data_tests") - shutil.rmtree(test_directory, ignore_errors=True) - os.makedirs(test_directory, exist_ok=True) - copy_test_files( - os.path.join("resources", test_resource_name, "dbt_test_config", "dbt_data_tests_tmp_schema_change"), - test_directory, - destination_type, - replace_identifiers, - ) - test_directory = os.path.join(test_root_dir, "tests") - shutil.rmtree(test_directory, ignore_errors=True) - os.makedirs(test_directory, exist_ok=True) - copy_test_files( - os.path.join("resources", test_resource_name, "dbt_test_config", "dbt_data_tests_schema_change"), - test_directory, - destination_type, - replace_identifiers, - ) - - -def dbt_test(destination_type: DestinationType, test_root_dir: str): - """ - dbt provides a way to run dbt tests as described here: https://docs.getdbt.com/docs/building-a-dbt-project/tests - - Schema tests are added in .yml files from the schema_tests directory - - see additional macros for testing here: https://github.com/fishtown-analytics/dbt-utils#schema-tests - - Data tests are added in .sql files from the data_tests directory and should return 0 records to be successful - - We use this mechanism to verify the output of our integration tests. - """ - normalization_image: str = dbt_test_utils.get_normalization_image(destination_type) - assert dbt_test_utils.run_check_dbt_command(normalization_image, "test", test_root_dir) - - -def copy_test_files(src: str, dst: str, destination_type: DestinationType, replace_identifiers: str): - """ - Copy file while hacking snowflake identifiers that needs to be uppercased... - (so we can share these dbt tests files accross destinations) - """ - if os.path.exists(src): - temp_dir = tempfile.mkdtemp(dir="/tmp/", prefix="normalization_test_") - temporary_folders.add(temp_dir) - # Copy and adapt capitalization - if destination_type.value == DestinationType.SNOWFLAKE.value: - shutil.copytree(src, temp_dir + "/upper", copy_function=copy_upper) - src = temp_dir + "/upper" - elif destination_type.value == DestinationType.REDSHIFT.value: - shutil.copytree(src, temp_dir + "/lower", copy_function=copy_lower) - src = temp_dir + "/lower" - if os.path.exists(replace_identifiers): - with open(replace_identifiers, "r") as file: - contents = file.read() - identifiers_map = json.loads(contents) - pattern = [] - replace_value = [] - if dbt_test_utils.target_schema != "test_normalization": - pattern.append("test_normalization") - if destination_type.value == DestinationType.SNOWFLAKE.value: - replace_value.append(dbt_test_utils.target_schema.upper()) - else: - replace_value.append(dbt_test_utils.target_schema) - if destination_type.value in identifiers_map: - for entry in identifiers_map[destination_type.value]: - for k in entry: - # re.escape() must not be used for the replacement string in sub(), only backslashes should be escaped: - # see https://docs.python.org/3/library/re.html#re.escape - pattern.append(k.replace("\\", r"\\")) - replace_value.append(entry[k]) - if pattern and replace_value: - - def copy_replace_identifiers(src, dst): - dbt_test_utils.copy_replace(src, dst, pattern, replace_value) - - shutil.copytree(src, temp_dir + "/replace", copy_function=copy_replace_identifiers) - src = temp_dir + "/replace" - # final copy - copy_tree(src, dst) - - -def copy_upper(src, dst): - print(src, "->", dst) - dbt_test_utils.copy_replace( - src, - dst, - pattern=[ - r"(- name:) *(.*)", - r"(ref\(')(.*)('\))", - r"(source\(')(.*)('\))", - ], - replace_value=[ - to_upper_identifier, - to_upper_identifier, - to_upper_identifier, - ], - ) - - -def copy_lower(src, dst): - print(src, "->", dst) - dbt_test_utils.copy_replace( - src, - dst, - pattern=[ - r"(- name:) *(.*)", - r"(ref\(')(.*)('\))", - r"(source\(')(.*)('\))", - ], - replace_value=[ - to_lower_identifier, - to_lower_identifier, - to_lower_identifier, - ], - ) - - -def to_upper_identifier(input: re.Match) -> str: - if len(input.groups()) == 2: - return f"{input.group(1)} {input.group(2).upper()}" - elif len(input.groups()) == 3: - return f"{input.group(1)}{input.group(2).upper()}{input.group(3)}" - else: - raise Exception(f"Unexpected number of groups in {input}") - - -def to_lower_identifier(input: re.Match) -> str: - if len(input.groups()) == 2: - return f"{input.group(1)} {input.group(2).lower()}" - elif len(input.groups()) == 3: - return f"{input.group(1)}{input.group(2).lower()}{input.group(3)}" - else: - raise Exception(f"Unexpected number of groups in {input}") diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/test_sparse_nested_fields.py b/airbyte-integrations/bases/base-normalization/integration_tests/test_sparse_nested_fields.py deleted file mode 100644 index d67547c1a3527..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/test_sparse_nested_fields.py +++ /dev/null @@ -1,340 +0,0 @@ -# -# Copyright (c) 2023 Airbyte, Inc., all rights reserved. -# - -import json -import os -import pathlib -import re -import shutil -import tempfile -from distutils.dir_util import copy_tree -from typing import Any, Dict - -import pytest -from integration_tests.dbt_integration_test import DbtIntegrationTest -from integration_tests.utils import generate_dbt_models, run_destination_process -from normalization.destination_type import DestinationType - -# from normalization.transform_catalog import TransformCatalog - -temporary_folders = set() - -# dbt models and final sql outputs from the following git versioned tests will be written in a folder included in -# airbyte git repository. -git_versioned_tests = ["test_simple_streams", "test_nested_streams"] - -dbt_test_utils = DbtIntegrationTest() - - -@pytest.fixture(scope="module", autouse=True) -def before_all_tests(request): - destinations_to_test = dbt_test_utils.get_test_targets() - # set clean-up args to clean target destination after the test - clean_up_args = { - "destination_type": [d for d in DestinationType if d.value in destinations_to_test], - "test_type": "normalization", - "git_versioned_tests": git_versioned_tests, - } - for integration_type in [d.value for d in DestinationType]: - if integration_type in destinations_to_test: - test_root_dir = f"{pathlib.Path().absolute()}/normalization_test_output/{integration_type.lower()}" - shutil.rmtree(test_root_dir, ignore_errors=True) - if os.getenv("RANDOM_TEST_SCHEMA"): - target_schema = dbt_test_utils.generate_random_string("test_normalization_ci_") - dbt_test_utils.set_target_schema(target_schema) - dbt_test_utils.change_current_test_dir(request) - dbt_test_utils.setup_db(destinations_to_test) - os.environ["PATH"] = os.path.abspath("../.venv/bin/") + ":" + os.environ["PATH"] - yield - dbt_test_utils.clean_tmp_tables(**clean_up_args) - dbt_test_utils.tear_down_db() - for folder in temporary_folders: - print(f"Deleting temporary test folder {folder}") - shutil.rmtree(folder, ignore_errors=True) - - -@pytest.fixture -def setup_test_path(request): - dbt_test_utils.change_current_test_dir(request) - print(f"Running from: {pathlib.Path().absolute()}") - print(f"Current PATH is: {os.environ['PATH']}") - yield - os.chdir(request.config.invocation_dir) - - -@pytest.mark.parametrize("destination_type", DestinationType.testable_destinations()) -def test_sparse_nested_fields(destination_type: DestinationType): - # TODO extract these conditions? - if destination_type.value not in dbt_test_utils.get_test_targets(): - pytest.skip(f"Destinations {destination_type} is not in NORMALIZATION_TEST_TARGET env variable") - if destination_type.value in (DestinationType.ORACLE.value, DestinationType.CLICKHOUSE.value): - pytest.skip(f"Destinations {destination_type} does not support nested streams") - if destination_type.value in [DestinationType.MYSQL.value, DestinationType.ORACLE.value]: - pytest.skip(f"{destination_type} does not support incremental yet") - - target_schema = dbt_test_utils.target_schema - if destination_type.value == DestinationType.ORACLE.value: - # Oracle does not allow changing to random schema - dbt_test_utils.set_target_schema("test_normalization") - elif destination_type.value == DestinationType.REDSHIFT.value: - # set unique schema for Redshift test - dbt_test_utils.set_target_schema(dbt_test_utils.generate_random_string("test_normalization_")) - - try: - print(f"Testing sparse nested field normalization {destination_type} in ", dbt_test_utils.target_schema) - test_resource_name = "test_sparse_nested_streams" - - # Create the test folder with dbt project and appropriate destination settings to run integration tests from - test_root_dir = setup_test_dir(destination_type, test_resource_name) - - # First sync - destination_config = dbt_test_utils.generate_profile_yaml_file(destination_type, test_root_dir) - assert setup_input_raw_data(destination_type, test_resource_name, test_root_dir, destination_config) - generate_dbt_models(destination_type, test_resource_name, test_root_dir, "models", "catalog.json", dbt_test_utils) - dbt_test_utils.dbt_check(destination_type, test_root_dir) - setup_dbt_sparse_nested_streams_test(destination_type, test_resource_name, test_root_dir, 1) - dbt_test_utils.dbt_run(destination_type, test_root_dir) - copy_tree(os.path.join(test_root_dir, "build/run/airbyte_utils/models/generated/"), os.path.join(test_root_dir, "sync1_output")) - shutil.rmtree(os.path.join(test_root_dir, "build/run/airbyte_utils/models/generated/"), ignore_errors=True) - dbt_test(destination_type, test_root_dir) - - # Second sync - message_file = os.path.join("resources", test_resource_name, "data_input", "messages2.txt") - assert run_destination_process(destination_type, test_root_dir, message_file, "destination_catalog.json", dbt_test_utils) - setup_dbt_sparse_nested_streams_test(destination_type, test_resource_name, test_root_dir, 2) - dbt_test_utils.dbt_run(destination_type, test_root_dir) - copy_tree(os.path.join(test_root_dir, "build/run/airbyte_utils/models/generated/"), os.path.join(test_root_dir, "sync2_output")) - shutil.rmtree(os.path.join(test_root_dir, "build/run/airbyte_utils/models/generated/"), ignore_errors=True) - dbt_test(destination_type, test_root_dir) - - # Third sync - message_file = os.path.join("resources", test_resource_name, "data_input", "messages3.txt") - assert run_destination_process(destination_type, test_root_dir, message_file, "destination_catalog.json", dbt_test_utils) - setup_dbt_sparse_nested_streams_test(destination_type, test_resource_name, test_root_dir, 3) - dbt_test_utils.dbt_run(destination_type, test_root_dir) - copy_tree(os.path.join(test_root_dir, "build/run/airbyte_utils/models/generated/"), os.path.join(test_root_dir, "sync3_output")) - shutil.rmtree(os.path.join(test_root_dir, "build/run/airbyte_utils/models/generated/"), ignore_errors=True) - dbt_test(destination_type, test_root_dir) - finally: - dbt_test_utils.set_target_schema(target_schema) - clean_up_args = { - "destination_type": [destination_type], - "test_type": "ephemeral", - "tmp_folders": [str(test_root_dir)], - } - dbt_test_utils.clean_tmp_tables(**clean_up_args) - - -def setup_test_dir(destination_type: DestinationType, test_resource_name: str) -> str: - """ - We prepare a clean folder to run the tests from. - - if the test_resource_name is part of git_versioned_tests, then dbt models and final sql outputs - will be written to a folder included in airbyte git repository. - - Non-versioned tests will be written in /tmp folders instead. - - The purpose is to keep track of a small set of downstream changes on selected integration tests cases. - - generated dbt models created by normalization script from an input destination_catalog.json - - final output sql files created by dbt CLI from the generated dbt models (dbt models are sql files with jinja templating, - these are interpreted and compiled into the native SQL dialect of the final destination engine) - """ - if test_resource_name in git_versioned_tests: - test_root_dir = f"{pathlib.Path().absolute()}/normalization_test_output/{destination_type.value.lower()}" - else: - test_root_dir = f"{pathlib.Path().joinpath('..', 'build', 'normalization_test_output', destination_type.value.lower()).resolve()}" - os.makedirs(test_root_dir, exist_ok=True) - test_root_dir = f"{test_root_dir}/{test_resource_name}" - shutil.rmtree(test_root_dir, ignore_errors=True) - print(f"Setting up test folder {test_root_dir}") - dbt_project_yaml = "../dbt-project-template/dbt_project.yml" - copy_tree("../dbt-project-template", test_root_dir) - if destination_type.value == DestinationType.MSSQL.value: - copy_tree("../dbt-project-template-mssql", test_root_dir) - dbt_project_yaml = "../dbt-project-template-mssql/dbt_project.yml" - elif destination_type.value == DestinationType.MYSQL.value: - copy_tree("../dbt-project-template-mysql", test_root_dir) - dbt_project_yaml = "../dbt-project-template-mysql/dbt_project.yml" - elif destination_type.value == DestinationType.ORACLE.value: - copy_tree("../dbt-project-template-oracle", test_root_dir) - dbt_project_yaml = "../dbt-project-template-oracle/dbt_project.yml" - elif destination_type.value == DestinationType.CLICKHOUSE.value: - copy_tree("../dbt-project-template-clickhouse", test_root_dir) - dbt_project_yaml = "../dbt-project-template-clickhouse/dbt_project.yml" - elif destination_type.value == DestinationType.SNOWFLAKE.value: - copy_tree("../dbt-project-template-snowflake", test_root_dir) - dbt_project_yaml = "../dbt-project-template-snowflake/dbt_project.yml" - elif destination_type.value == DestinationType.REDSHIFT.value: - copy_tree("../dbt-project-template-redshift", test_root_dir) - dbt_project_yaml = "../dbt-project-template-redshift/dbt_project.yml" - elif destination_type.value == DestinationType.TIDB.value: - copy_tree("../dbt-project-template-tidb", test_root_dir) - dbt_project_yaml = "../dbt-project-template-tidb/dbt_project.yml" - elif destination_type.value == DestinationType.DUCKDB.value: - copy_tree("../dbt-project-template-duckdb", test_root_dir) - dbt_project_yaml = "../dbt-project-template-duckdb/dbt_project.yml" - dbt_test_utils.copy_replace(dbt_project_yaml, os.path.join(test_root_dir, "dbt_project.yml")) - return test_root_dir - - -def setup_input_raw_data( - destination_type: DestinationType, test_resource_name: str, test_root_dir: str, destination_config: Dict[str, Any] -) -> bool: - """ - We run docker images of destinations to upload test data stored in the messages.txt file for each test case. - This should populate the associated "raw" tables from which normalization is reading from when running dbt CLI. - """ - catalog_file = os.path.join("resources", test_resource_name, "data_input", "catalog.json") - message_file = os.path.join("resources", test_resource_name, "data_input", "messages.txt") - dbt_test_utils.copy_replace( - catalog_file, - os.path.join(test_root_dir, "reset_catalog.json"), - pattern='"destination_sync_mode": ".*"', - replace_value='"destination_sync_mode": "overwrite"', - ) - dbt_test_utils.copy_replace(catalog_file, os.path.join(test_root_dir, "destination_catalog.json")) - config_file = os.path.join(test_root_dir, "destination_config.json") - with open(config_file, "w") as f: - f.write(json.dumps(destination_config)) - # Force a reset in destination raw tables - assert run_destination_process(destination_type, test_root_dir, "", "reset_catalog.json", dbt_test_utils) - # Run a sync to create raw tables in destinations - return run_destination_process(destination_type, test_root_dir, message_file, "destination_catalog.json", dbt_test_utils) - - -def setup_dbt_sparse_nested_streams_test(destination_type: DestinationType, test_resource_name: str, test_root_dir: str, sync_number: int): - """ - Prepare the data (copy) for the models for dbt test. - """ - replace_identifiers = os.path.join("resources", test_resource_name, "data_input", "replace_identifiers.json") - test_directory = os.path.join(test_root_dir, "models/dbt_data_tests") - shutil.rmtree(test_directory, ignore_errors=True) - os.makedirs(test_directory, exist_ok=True) - copy_test_files( - os.path.join("resources", test_resource_name, "dbt_test_config", f"sync{sync_number}_expectations"), - test_directory, - destination_type, - replace_identifiers, - ) - test_directory = os.path.join(test_root_dir, "tests") - shutil.rmtree(test_directory, ignore_errors=True) - os.makedirs(test_directory, exist_ok=True) - copy_test_files( - os.path.join("resources", test_resource_name, "dbt_test_config", f"sync{sync_number}_assertions"), - test_directory, - destination_type, - replace_identifiers, - ) - - -def dbt_test(destination_type: DestinationType, test_root_dir: str): - """ - dbt provides a way to run dbt tests as described here: https://docs.getdbt.com/docs/building-a-dbt-project/tests - - Schema tests are added in .yml files from the schema_tests directory - - see additional macros for testing here: https://github.com/fishtown-analytics/dbt-utils#schema-tests - - Data tests are added in .sql files from the data_tests directory and should return 0 records to be successful - - We use this mechanism to verify the output of our integration tests. - """ - normalization_image: str = dbt_test_utils.get_normalization_image(destination_type) - assert dbt_test_utils.run_check_dbt_command(normalization_image, "test", test_root_dir) - - -def copy_test_files(src: str, dst: str, destination_type: DestinationType, replace_identifiers: str): - """ - Copy file while hacking snowflake identifiers that needs to be uppercased... - (so we can share these dbt tests files accross destinations) - """ - if os.path.exists(src): - temp_dir = tempfile.mkdtemp(dir="/tmp/", prefix="normalization_test_") - temporary_folders.add(temp_dir) - # Copy and adapt capitalization - if destination_type.value == DestinationType.SNOWFLAKE.value: - shutil.copytree(src, temp_dir + "/upper", copy_function=copy_upper) - src = temp_dir + "/upper" - elif destination_type.value == DestinationType.REDSHIFT.value: - shutil.copytree(src, temp_dir + "/lower", copy_function=copy_lower) - src = temp_dir + "/lower" - if os.path.exists(replace_identifiers): - with open(replace_identifiers, "r") as file: - contents = file.read() - identifiers_map = json.loads(contents) - pattern = [] - replace_value = [] - if dbt_test_utils.target_schema != "test_normalization": - pattern.append("test_normalization") - if destination_type.value == DestinationType.SNOWFLAKE.value: - replace_value.append(dbt_test_utils.target_schema.upper()) - else: - replace_value.append(dbt_test_utils.target_schema) - if destination_type.value in identifiers_map: - for entry in identifiers_map[destination_type.value]: - for k in entry: - # re.escape() must not be used for the replacement string in sub(), only backslashes should be escaped: - # see https://docs.python.org/3/library/re.html#re.escape - pattern.append(k.replace("\\", r"\\")) - replace_value.append(entry[k]) - if pattern and replace_value: - - def copy_replace_identifiers(src, dst): - dbt_test_utils.copy_replace(src, dst, pattern, replace_value) - - shutil.copytree(src, temp_dir + "/replace", copy_function=copy_replace_identifiers) - src = temp_dir + "/replace" - # final copy - copy_tree(src, dst) - - -def copy_upper(src, dst): - print(src, "->", dst) - dbt_test_utils.copy_replace( - src, - dst, - pattern=[ - r"(- name:) *(.*)", - r"(ref\(')(.*)('\))", - r"(source\(')(.*)('\))", - ], - replace_value=[ - to_upper_identifier, - to_upper_identifier, - to_upper_identifier, - ], - ) - - -def copy_lower(src, dst): - print(src, "->", dst) - dbt_test_utils.copy_replace( - src, - dst, - pattern=[ - r"(- name:) *(.*)", - r"(ref\(')(.*)('\))", - r"(source\(')(.*)('\))", - ], - replace_value=[ - to_lower_identifier, - to_lower_identifier, - to_lower_identifier, - ], - ) - - -def to_upper_identifier(input: re.Match) -> str: - if len(input.groups()) == 2: - return f"{input.group(1)} {input.group(2).upper()}" - elif len(input.groups()) == 3: - return f"{input.group(1)}{input.group(2).upper()}{input.group(3)}" - else: - raise Exception(f"Unexpected number of groups in {input}") - - -def to_lower_identifier(input: re.Match) -> str: - if len(input.groups()) == 2: - return f"{input.group(1)} {input.group(2).lower()}" - elif len(input.groups()) == 3: - return f"{input.group(1)}{input.group(2).lower()}{input.group(3)}" - else: - raise Exception(f"Unexpected number of groups in {input}") diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/utils.py b/airbyte-integrations/bases/base-normalization/integration_tests/utils.py deleted file mode 100644 index 30c7cb3e84129..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/utils.py +++ /dev/null @@ -1,87 +0,0 @@ -# -# Copyright (c) 2023 Airbyte, Inc., all rights reserved. -# - -import os -import pathlib -import shutil -import tempfile -from distutils.dir_util import copy_tree - -from integration_tests.dbt_integration_test import DbtIntegrationTest -from normalization import DestinationType, TransformCatalog - - -def setup_test_dir(integration_type: str, temporary_folders: set) -> str: - """ - We prepare a clean folder to run the tests from. - """ - test_root_dir = f"{pathlib.Path().joinpath('..', 'build', 'normalization_test_output', integration_type.lower()).resolve()}" - os.makedirs(test_root_dir, exist_ok=True) - test_root_dir = tempfile.mkdtemp(dir=test_root_dir) - temporary_folders.add(test_root_dir) - shutil.rmtree(test_root_dir, ignore_errors=True) - current_path = os.getcwd() - print(f"Setting up test folder {test_root_dir}. Current path {current_path}") - copy_tree("../dbt-project-template", test_root_dir) - if integration_type == DestinationType.MSSQL.value: - copy_tree("../dbt-project-template-mssql", test_root_dir) - elif integration_type == DestinationType.MYSQL.value: - copy_tree("../dbt-project-template-mysql", test_root_dir) - elif integration_type == DestinationType.ORACLE.value: - copy_tree("../dbt-project-template-oracle", test_root_dir) - elif integration_type == DestinationType.SNOWFLAKE.value: - copy_tree("../dbt-project-template-snowflake", test_root_dir) - elif integration_type == DestinationType.TIDB.value: - copy_tree("../dbt-project-template-tidb", test_root_dir) - return test_root_dir - - -def run_destination_process( - destination_type: DestinationType, - test_root_dir: str, - message_file: str, - catalog_file: str, - dbt_test_utils: DbtIntegrationTest, - docker_tag="dev", -): - commands = [ - "docker", - "run", - "--rm", - "--init", - "-v", - f"{test_root_dir}:/data", - "--network", - "host", - "-i", - f"airbyte/destination-{destination_type.value.lower()}:{docker_tag}", - "write", - "--config", - "/data/destination_config.json", - "--catalog", - ] - return dbt_test_utils.run_destination_process(message_file, test_root_dir, commands + [f"/data/{catalog_file}"]) - - -def generate_dbt_models( - destination_type: DestinationType, - test_resource_name: str, - test_root_dir: str, - output_dir: str, - catalog_file: str, - dbt_test_utils: DbtIntegrationTest, -): - """ - This is the normalization step generating dbt models files from the destination_catalog.json taken as input. - """ - transform_catalog = TransformCatalog() - transform_catalog.config = { - "integration_type": destination_type.value, - "schema": dbt_test_utils.target_schema, - "catalog": [os.path.join("resources", test_resource_name, "data_input", catalog_file)], - "output_path": os.path.join(test_root_dir, output_dir, "generated"), - "json_column": "_airbyte_data", - "profile_config_dir": test_root_dir, - } - transform_catalog.process_catalog() diff --git a/airbyte-integrations/bases/base-normalization/main_dev_transform_catalog.py b/airbyte-integrations/bases/base-normalization/main_dev_transform_catalog.py deleted file mode 100644 index 22e5e57cf2771..0000000000000 --- a/airbyte-integrations/bases/base-normalization/main_dev_transform_catalog.py +++ /dev/null @@ -1,21 +0,0 @@ -# -# Copyright (c) 2023 Airbyte, Inc., all rights reserved. -# - - -import logging - -from airbyte_cdk.exception_handler import init_uncaught_exception_handler -from airbyte_cdk.utils.traced_exception import AirbyteTracedException -from normalization.transform_catalog.transform import main - -if __name__ == "__main__": - init_uncaught_exception_handler(logging.getLogger("airbyte")) - try: - main() - except Exception as e: - msg = ( - "Something went wrong while normalizing the data moved in this sync " - + "(failed to transform catalog into dbt project). See the logs for more details." - ) - raise AirbyteTracedException.from_exception(e, message=msg) diff --git a/airbyte-integrations/bases/base-normalization/main_dev_transform_config.py b/airbyte-integrations/bases/base-normalization/main_dev_transform_config.py deleted file mode 100644 index 579ccb80d99d0..0000000000000 --- a/airbyte-integrations/bases/base-normalization/main_dev_transform_config.py +++ /dev/null @@ -1,21 +0,0 @@ -# -# Copyright (c) 2023 Airbyte, Inc., all rights reserved. -# - - -import logging - -from airbyte_cdk.exception_handler import init_uncaught_exception_handler -from airbyte_cdk.utils.traced_exception import AirbyteTracedException -from normalization.transform_config.transform import main - -if __name__ == "__main__": - init_uncaught_exception_handler(logging.getLogger("airbyte")) - try: - main() - except Exception as e: - msg = ( - "Something went wrong while normalizing the data moved in this sync " - + "(failed to transform config for dbt project). See the logs for more details." - ) - raise AirbyteTracedException.from_exception(e, message=msg) diff --git a/airbyte-integrations/bases/base-normalization/mssql.Dockerfile b/airbyte-integrations/bases/base-normalization/mssql.Dockerfile deleted file mode 100644 index 1ec0997242035..0000000000000 --- a/airbyte-integrations/bases/base-normalization/mssql.Dockerfile +++ /dev/null @@ -1,72 +0,0 @@ -FROM fishtownanalytics/dbt:1.0.0 -COPY --from=airbyte/base-airbyte-protocol-python:0.1.1 /airbyte /airbyte - -# Install curl & gnupg dependencies -USER root -WORKDIR /tmp -RUN apt-get update --allow-insecure-repositories && apt-get install -y \ - wget \ - curl \ - unzip \ - libaio-dev \ - libaio1 \ - gnupg \ - gnupg1 \ - gnupg2 \ - equivs - -# Remove multiarch-support package to use Debian 10 packages -# see https://causlayer.orgs.hk/mlocati/docker-php-extension-installer/issues/432#issuecomment-921341138 -RUN echo 'Package: multiarch-support-dummy\nProvides: multiarch-support\nDescription: Fake multiarch-support' > multiarch-support-dummy.ctl \ - && equivs-build multiarch-support-dummy.ctl && dpkg -i multiarch-support-dummy*.deb && rm multiarch-support-dummy*.* \ - && apt-get -y purge equivs -RUN curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - -RUN curl https://packages.microsoft.com/config/debian/10/prod.list > /etc/apt/sources.list.d/mssql-release.list - -# Install MS SQL Server dependencies -RUN apt-get update && ACCEPT_EULA=Y apt-get install -y \ - libgssapi-krb5-2 \ - unixodbc-dev \ - msodbcsql17 \ - mssql-tools -ENV PATH=$PATH:/opt/mssql-tools/bin - -# Install SSH Tunneling dependencies -RUN apt-get install -y jq sshpass - -# clean up -RUN apt-get -y autoremove && apt-get clean - -WORKDIR /airbyte -COPY entrypoint.sh . -COPY build/sshtunneling.sh . - -WORKDIR /airbyte/normalization_code -COPY normalization ./normalization -COPY setup.py . -COPY dbt-project-template/ ./dbt-template/ -COPY dbt-project-template-mssql/* ./dbt-template/ - -# Install python dependencies -WORKDIR /airbyte/base_python_structs - -# workaround for https://github.com/yaml/pyyaml/issues/601 -# this should be fixed in the airbyte/base-airbyte-protocol-python image -RUN pip install "Cython<3.0" "pyyaml==5.4" --no-build-isolation - -RUN pip install . - -WORKDIR /airbyte/normalization_code -RUN pip install . -# Based of https://github.com/dbt-msft/dbt-sqlserver/tree/v1.0.0 -RUN pip install dbt-sqlserver==1.0.0 - -WORKDIR /airbyte/normalization_code/dbt-template/ -# Download external dbt dependencies -RUN dbt deps - -WORKDIR /airbyte -ENV AIRBYTE_ENTRYPOINT "/airbyte/entrypoint.sh" -ENTRYPOINT ["/airbyte/entrypoint.sh"] - -LABEL io.airbyte.name=airbyte/normalization-mssql diff --git a/airbyte-integrations/bases/base-normalization/mysql.Dockerfile b/airbyte-integrations/bases/base-normalization/mysql.Dockerfile deleted file mode 100644 index efc25fcb38d9a..0000000000000 --- a/airbyte-integrations/bases/base-normalization/mysql.Dockerfile +++ /dev/null @@ -1,38 +0,0 @@ -FROM fishtownanalytics/dbt:1.0.0 -COPY --from=airbyte/base-airbyte-protocol-python:0.1.1 /airbyte /airbyte - -# Install SSH Tunneling dependencies -RUN apt-get update && apt-get install -y jq sshpass - -WORKDIR /airbyte -COPY entrypoint.sh . -COPY build/sshtunneling.sh . - -WORKDIR /airbyte/normalization_code -COPY normalization ./normalization -COPY setup.py . -COPY dbt-project-template/ ./dbt-template/ -COPY dbt-project-template-mysql/* ./dbt-template/ - -# Install python dependencies -WORKDIR /airbyte/base_python_structs - -# workaround for https://github.com/yaml/pyyaml/issues/601 -# this should be fixed in the airbyte/base-airbyte-protocol-python image -RUN pip install "Cython<3.0" "pyyaml==5.4" --no-build-isolation - -RUN pip install . - -WORKDIR /airbyte/normalization_code -RUN pip install . -RUN pip install dbt-mysql==1.0.0 - -WORKDIR /airbyte/normalization_code/dbt-template/ -# Download external dbt dependencies -RUN dbt deps - -WORKDIR /airbyte -ENV AIRBYTE_ENTRYPOINT "/airbyte/entrypoint.sh" -ENTRYPOINT ["/airbyte/entrypoint.sh"] - -LABEL io.airbyte.name=airbyte/normalization-mysql diff --git a/airbyte-integrations/bases/base-normalization/normalization/__init__.py b/airbyte-integrations/bases/base-normalization/normalization/__init__.py deleted file mode 100644 index 142fa6695aca7..0000000000000 --- a/airbyte-integrations/bases/base-normalization/normalization/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -from normalization.destination_type import DestinationType -from normalization.transform_catalog.transform import TransformCatalog -from normalization.transform_config.transform import TransformConfig - -__all__ = [ - "DestinationType", - "TransformCatalog", - "TransformConfig", -] diff --git a/airbyte-integrations/bases/base-normalization/normalization/destination_type.py b/airbyte-integrations/bases/base-normalization/normalization/destination_type.py deleted file mode 100644 index 2a3681f2d1c45..0000000000000 --- a/airbyte-integrations/bases/base-normalization/normalization/destination_type.py +++ /dev/null @@ -1,27 +0,0 @@ -# -# Copyright (c) 2023 Airbyte, Inc., all rights reserved. -# - - -from enum import Enum - - -class DestinationType(Enum): - BIGQUERY = "bigquery" - CLICKHOUSE = "clickhouse" - MSSQL = "mssql" - MYSQL = "mysql" - ORACLE = "oracle" - POSTGRES = "postgres" - REDSHIFT = "redshift" - SNOWFLAKE = "snowflake" - TIDB = "tidb" - DUCKDB = "duckdb" - - @classmethod - def from_string(cls, string_value: str) -> "DestinationType": - return DestinationType[string_value.upper()] - - @staticmethod - def testable_destinations(): - return [dest for dest in list(DestinationType) if dest != DestinationType.DUCKDB] diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/__init__.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/__init__.py deleted file mode 100644 index fc34c615f84a4..0000000000000 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from normalization.transform_catalog.transform import TransformCatalog - -__all__ = ["TransformCatalog"] diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/catalog_processor.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/catalog_processor.py deleted file mode 100644 index 5c55b776c67bb..0000000000000 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/catalog_processor.py +++ /dev/null @@ -1,299 +0,0 @@ -# -# Copyright (c) 2023 Airbyte, Inc., all rights reserved. -# - - -import json -import os -import re -from typing import Any, Dict, List, Set - -import yaml -from airbyte_cdk.models.airbyte_protocol import DestinationSyncMode, SyncMode # type: ignore -from normalization.destination_type import DestinationType -from normalization.transform_catalog import dbt_macro -from normalization.transform_catalog.destination_name_transformer import DestinationNameTransformer -from normalization.transform_catalog.stream_processor import StreamProcessor -from normalization.transform_catalog.table_name_registry import TableNameRegistry - - -class CatalogProcessor: - """ - Takes as input an AirbyteCatalog file (stored as Json Schema). - Associated input raw data is expected to be stored in a staging area called "raw_schema". - - This processor reads the catalog file, extracts streams descriptions and transforms them to final tables in their - targeted destination schema. - - This is relying on a StreamProcessor to handle the conversion of a stream to a table one at a time. - """ - - def __init__(self, output_directory: str, destination_type: DestinationType): - """ - @param output_directory is the path to the directory where this processor should write the resulting SQL files (DBT models) - @param destination_type is the destination type of warehouse - """ - self.output_directory: str = output_directory - self.destination_type: DestinationType = destination_type - self.name_transformer: DestinationNameTransformer = DestinationNameTransformer(destination_type) - self.models_to_source: Dict[str, str] = {} - - def process(self, catalog_file: str, json_column_name: str, default_schema: str): - """ - This method first parse and build models to handle top-level streams. - In a second loop will go over the substreams that were nested in a breadth-first traversal manner. - - @param catalog_file input AirbyteCatalog file in JSON Schema describing the structure of the raw data - @param json_column_name is the column name containing the JSON Blob with the raw data - @param default_schema is the final schema where to output the final transformed data to - """ - tables_registry: TableNameRegistry = TableNameRegistry(self.destination_type) - schema_to_source_tables: Dict[str, Set[str]] = {} - catalog = read_json(catalog_file) - # print(json.dumps(catalog, separators=(",", ":"))) - substreams = [] - stream_processors = self.build_stream_processor( - catalog=catalog, - json_column_name=json_column_name, - default_schema=default_schema, - name_transformer=self.name_transformer, - destination_type=self.destination_type, - tables_registry=tables_registry, - ) - for stream_processor in stream_processors: - stream_processor.collect_table_names() - for conflict in tables_registry.resolve_names(): - print( - f"WARN: Resolving conflict: {conflict.schema}.{conflict.table_name_conflict} " - f"from '{'.'.join(conflict.json_path)}' into {conflict.table_name_resolved}" - ) - for stream_processor in stream_processors: - # MySQL table names need to be manually truncated, because it does not do it automatically - truncate = ( - self.destination_type == DestinationType.MYSQL - or self.destination_type == DestinationType.TIDB - or self.destination_type == DestinationType.DUCKDB - ) - raw_table_name = self.name_transformer.normalize_table_name(f"_airbyte_raw_{stream_processor.stream_name}", truncate=truncate) - add_table_to_sources(schema_to_source_tables, stream_processor.schema, raw_table_name) - - nested_processors = stream_processor.process() - self.models_to_source.update(stream_processor.models_to_source) - - if nested_processors and len(nested_processors) > 0: - substreams += nested_processors - for file in stream_processor.sql_outputs: - output_sql_file(os.path.join(self.output_directory, file), stream_processor.sql_outputs[file]) - self.write_yaml_sources_file(schema_to_source_tables) - self.process_substreams(substreams, tables_registry) - - @staticmethod - def build_stream_processor( - catalog: Dict, - json_column_name: str, - default_schema: str, - name_transformer: DestinationNameTransformer, - destination_type: DestinationType, - tables_registry: TableNameRegistry, - ) -> List[StreamProcessor]: - result = [] - for configured_stream in get_field(catalog, "streams", "Invalid Catalog: 'streams' is not defined in Catalog"): - stream_config = get_field(configured_stream, "stream", "Invalid Stream: 'stream' is not defined in Catalog streams") - - # The logic here matches the logic in JdbcBufferedConsumerFactory.java. - # Any modifications need to be reflected there and vice versa. - schema = default_schema - if "namespace" in stream_config: - schema = stream_config["namespace"] - - schema_name = name_transformer.normalize_schema_name(schema, truncate=False) - if destination_type == DestinationType.ORACLE: - quote_in_parenthesis = re.compile(r"quote\((.*)\)") - raw_schema_name = name_transformer.normalize_schema_name(schema, truncate=False) - if not quote_in_parenthesis.findall(json_column_name): - json_column_name = name_transformer.normalize_column_name(json_column_name, in_jinja=True) - else: - column_inside_single_quote = re.compile(r"\'(.*)\'") - raw_schema_name = name_transformer.normalize_schema_name(f"_airbyte_{schema}", truncate=False) - if not column_inside_single_quote.findall(json_column_name): - json_column_name = f"'{json_column_name}'" - - stream_name = get_field(stream_config, "name", f"Invalid Stream: 'name' is not defined in stream: {str(stream_config)}") - # MySQL table names need to be manually truncated, because it does not do it automatically - truncate = ( - destination_type == DestinationType.MYSQL - or destination_type == DestinationType.TIDB - or destination_type == DestinationType.DUCKDB - ) - raw_table_name = name_transformer.normalize_table_name(f"_airbyte_raw_{stream_name}", truncate=truncate) - - source_sync_mode = get_source_sync_mode(configured_stream, stream_name) - destination_sync_mode = get_destination_sync_mode(configured_stream, stream_name) - cursor_field = [] - primary_key = [] - if source_sync_mode.value == SyncMode.incremental.value or destination_sync_mode.value in [ - # DestinationSyncMode.upsert_dedup.value, - DestinationSyncMode.append_dedup.value, - ]: - cursor_field = get_field(configured_stream, "cursor_field", f"Undefined cursor field for stream {stream_name}") - if destination_sync_mode.value in [ - # DestinationSyncMode.upsert_dedup.value, - DestinationSyncMode.append_dedup.value - ]: - primary_key = get_field(configured_stream, "primary_key", f"Undefined primary key for stream {stream_name}") - - message = f"'json_schema'.'properties' are not defined for stream {stream_name}" - properties = get_field(get_field(stream_config, "json_schema", message), "properties", message) - - from_table = dbt_macro.Source(schema_name, raw_table_name) - - stream_processor = StreamProcessor.create( - stream_name=stream_name, - destination_type=destination_type, - raw_schema=raw_schema_name, - default_schema=default_schema, - schema=schema_name, - source_sync_mode=source_sync_mode, - destination_sync_mode=destination_sync_mode, - cursor_field=cursor_field, - primary_key=primary_key, - json_column_name=json_column_name, - properties=properties, - tables_registry=tables_registry, - from_table=from_table, - ) - result.append(stream_processor) - return result - - def process_substreams(self, substreams: List[StreamProcessor], tables_registry: TableNameRegistry): - """ - Handle nested stream/substream/children - """ - while substreams: - children = substreams - substreams = [] - for substream in children: - substream.tables_registry = tables_registry - nested_processors = substream.process() - self.models_to_source.update(substream.models_to_source) - if nested_processors: - substreams += nested_processors - for file in substream.sql_outputs: - output_sql_file(os.path.join(self.output_directory, file), substream.sql_outputs[file]) - - def write_yaml_sources_file(self, schema_to_source_tables: Dict[str, Set[str]]): - """ - Generate the sources.yaml file as described in https://docs.getdbt.com/docs/building-a-dbt-project/using-sources/ - """ - schemas = [] - for entry in sorted(schema_to_source_tables.items(), key=lambda kv: kv[0]): - schema = entry[0] - quoted_schema = self.name_transformer.needs_quotes(schema) - tables = [] - for source in sorted(schema_to_source_tables[schema]): - if quoted_schema: - tables.append({"name": source, "quoting": {"identifier": True}}) - else: - tables.append({"name": source}) - schemas.append( - { - "name": schema, - "quoting": { - "database": True, - "schema": quoted_schema, - "identifier": False, - }, - "tables": tables, - } - ) - source_config = {"version": 2, "sources": schemas} - source_path = os.path.join(self.output_directory, "sources.yml") - output_dir = os.path.dirname(source_path) - if not os.path.exists(output_dir): - os.makedirs(output_dir) - with open(source_path, "w") as fh: - fh.write(yaml.dump(source_config, sort_keys=False)) - - -# Static Functions - - -def read_json(input_path: str) -> Any: - """ - Reads and load a json file - @param input_path is the path to the file to read - """ - with open(input_path, "r") as file: - contents = file.read() - return json.loads(contents) - - -def get_field(config: Dict, key: str, message: str): - """ - Retrieve value of field in a Dict object. Throw an error if key is not found with message as reason. - """ - if key in config: - return config[key] - else: - raise KeyError(message) - - -def get_source_sync_mode(stream_config: Dict, stream_name: str) -> SyncMode: - """ - Read the source sync_mode field from config or return a default value if not found - """ - if "sync_mode" in stream_config: - sync_mode = get_field(stream_config, "sync_mode", "") - else: - sync_mode = "" - try: - result = SyncMode(sync_mode) - except ValueError as e: - # Fallback to default source sync mode value - result = SyncMode.full_refresh - print(f"WARN: Source sync mode falling back to {result} for {stream_name}: {e}") - return result - - -def get_destination_sync_mode(stream_config: Dict, stream_name: str) -> DestinationSyncMode: - """ - Read the destination_sync_mode field from config or return a default value if not found - """ - if "destination_sync_mode" in stream_config: - dest_sync_mode = get_field(stream_config, "destination_sync_mode", "") - else: - dest_sync_mode = "" - try: - result = DestinationSyncMode(dest_sync_mode) - except ValueError as e: - # Fallback to default destination sync mode value - result = DestinationSyncMode.append - print(f"WARN: Destination sync mode falling back to {result} for {stream_name}: {e}") - return result - - -def add_table_to_sources(schema_to_source_tables: Dict[str, Set[str]], schema_name: str, table_name: str): - """ - Keeps track of source tables used in this catalog to build a source.yaml file for DBT - """ - if schema_name not in schema_to_source_tables: - schema_to_source_tables[schema_name] = set() - if table_name not in schema_to_source_tables[schema_name]: - schema_to_source_tables[schema_name].add(table_name) - else: - raise KeyError(f"Duplicate table {table_name} in {schema_name}") - - -def output_sql_file(file: str, sql: str): - """ - @param file is the path to filename to be written - @param sql is the dbt sql content to be written in the generated model file - """ - output_dir = os.path.dirname(file) - if not os.path.exists(output_dir): - os.makedirs(output_dir) - with open(file, "w") as f: - for line in sql.splitlines(): - if line.strip(): - f.write(line + "\n") - f.write("\n") diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/dbt_macro.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/dbt_macro.py deleted file mode 100644 index 71ee02f0f3a73..0000000000000 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/dbt_macro.py +++ /dev/null @@ -1,44 +0,0 @@ -# -# Copyright (c) 2023 Airbyte, Inc., all rights reserved. -# - - -from abc import ABC, abstractmethod - - -class Macro(ABC): - "https://docs.getdbt.com/docs/building-a-dbt-project/jinja-macros" - - @abstractmethod - def __str__(self): - pass - - def __repr__(self): - return str(self) - - def __add__(self, other): - return str(self) + str(other) - - def __radd__(self, other): - return str(other) + str(self) - - -class Source(Macro): - "https://docs.getdbt.com/reference/dbt-jinja-functions/source" - - def __init__(self, source_name: str, table_name: str): - self.source_name = source_name - self.table_name = table_name - - def __str__(self): - return "source('{}', '{}')".format(self.source_name, self.table_name) - - -class Ref(Macro): - "https://docs.getdbt.com/reference/dbt-jinja-functions/ref" - - def __init__(self, model_name: str): - self.model_name = model_name - - def __str__(self) -> str: - return "ref('{}')".format(self.model_name) diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/destination_name_transformer.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/destination_name_transformer.py deleted file mode 100644 index 3db6b8858120d..0000000000000 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/destination_name_transformer.py +++ /dev/null @@ -1,316 +0,0 @@ -# -# Copyright (c) 2023 Airbyte, Inc., all rights reserved. -# - - -import unicodedata as ud -from re import match, sub - -from normalization.destination_type import DestinationType -from normalization.transform_catalog.reserved_keywords import is_reserved_keyword -from normalization.transform_catalog.utils import jinja_call - -DESTINATION_SIZE_LIMITS = { - # https://cloud.google.com/bigquery/quotas#all_tables - DestinationType.BIGQUERY.value: 1024, - # https://docs.snowflake.com/en/sql-reference/identifiers-syntax.html - DestinationType.SNOWFLAKE.value: 255, - # https://docs.aws.amazon.com/redshift/latest/dg/r_names.html - DestinationType.REDSHIFT.value: 127, - # https://www.postgresql.org/docs/12/limits.html - DestinationType.POSTGRES.value: 63, - # https://dev.mysql.com/doc/refman/8.0/en/identifier-length.html - DestinationType.MYSQL.value: 64, - # https://oracle-base.com/articles/12c/long-identifiers-12cr2 - DestinationType.ORACLE.value: 128, - # https://docs.microsoft.com/en-us/sql/odbc/microsoft/column-name-limitations?view=sql-server-ver15 - DestinationType.MSSQL.value: 64, - # https://stackoverflow.com/questions/68358686/what-is-the-maximum-length-of-a-column-in-clickhouse-can-it-be-modified - DestinationType.CLICKHOUSE.value: 63, - # https://docs.pingcap.com/tidb/stable/tidb-limitations - DestinationType.TIDB.value: 64, - # According to the DuckDB team there no restriction: We don't enforce a maximum right now but I would not recommend having column names - # longer than a few kilobytes. https://discord.com/channels/909674491309850675/1067042662827438122/1067043835768737893. - DestinationType.DUCKDB.value: 64, -} - -# DBT also needs to generate suffix to table names, so we need to make sure it has enough characters to do so... -TRUNCATE_DBT_RESERVED_SIZE = 12 -# we keep 4 characters for 1 underscore and 3 characters for suffix (_ab1, _ab2, etc) -# we keep 4 characters for 1 underscore and 3 characters hash (of the schema) -TRUNCATE_RESERVED_SIZE = 8 - - -class DestinationNameTransformer: - """ - Handles naming conventions in destinations for all kind of sql identifiers: - - schema - - table - - column - """ - - def __init__(self, destination_type: DestinationType): - """ - @param destination_type is the destination type of warehouse - """ - self.destination_type: DestinationType = destination_type - - # Public methods - - def needs_quotes(self, input_name: str) -> bool: - """ - @param input_name to test if it needs to manipulated with quotes or not - """ - if is_reserved_keyword(input_name, self.destination_type): - return True - if self.destination_type.value == DestinationType.BIGQUERY.value: - return False - if self.destination_type.value == DestinationType.ORACLE.value and input_name.startswith("_"): - return True - doesnt_start_with_alphaunderscore = match("[^A-Za-z_]", input_name[0]) is not None - contains_non_alphanumeric = match(".*[^A-Za-z0-9_].*", input_name) is not None - return doesnt_start_with_alphaunderscore or contains_non_alphanumeric - - def normalize_schema_name(self, schema_name: str, in_jinja: bool = False, truncate: bool = True) -> str: - """ - @param schema_name is the schema to normalize - @param in_jinja is a boolean to specify if the returned normalized will be used inside a jinja macro or not - @param truncate force ignoring truncate operation on resulting normalized name. For example, if we don't - control how the name would be normalized - """ - if self.destination_type == DestinationType.ORACLE and schema_name.startswith("_"): - schema_name = schema_name[1:] - return self.__normalize_non_column_identifier_name(input_name=schema_name, in_jinja=in_jinja, truncate=truncate) - - def normalize_table_name( - self, table_name: str, in_jinja: bool = False, truncate: bool = True, conflict: bool = False, conflict_level: int = 0 - ) -> str: - """ - @param table_name is the table to normalize - @param in_jinja is a boolean to specify if the returned normalized will be used inside a jinja macro or not - @param truncate force ignoring truncate operation on resulting normalized name. For example, if we don't - control how the name would be normalized - @param conflict if there is a conflict between stream name and fields - @param conflict_level is the json_path level conflict happened - """ - if self.destination_type == DestinationType.ORACLE and table_name.startswith("_"): - table_name = table_name[1:] - return self.__normalize_non_column_identifier_name( - input_name=table_name, in_jinja=in_jinja, truncate=truncate, conflict=conflict, conflict_level=conflict_level - ) - - def normalize_column_name( - self, column_name: str, in_jinja: bool = False, truncate: bool = True, conflict: bool = False, conflict_level: int = 0 - ) -> str: - """ - @param column_name is the column to normalize - @param in_jinja is a boolean to specify if the returned normalized will be used inside a jinja macro or not - @param truncate force ignoring truncate operation on resulting normalized name. For example, if we don't - control how the name would be normalized - @param conflict if there is a conflict between stream name and fields - @param conflict_level is the json_path level conflict happened - """ - return self.__normalize_identifier_name( - column_name=column_name, in_jinja=in_jinja, truncate=truncate, conflict=conflict, conflict_level=conflict_level - ) - - def truncate_identifier_name(self, input_name: str, custom_limit: int = -1, conflict: bool = False, conflict_level: int = 0) -> str: - """ - @param input_name is the identifier name to middle truncate - @param custom_limit uses a custom length as the max instead of the destination max length - @param conflict if there is a conflict between stream name and fields - @param conflict_level is the json_path level conflict happened - """ - limit = custom_limit - 1 if custom_limit > 0 else self.get_name_max_length() - - if limit < len(input_name): - middle = round(limit / 2) - # truncate in the middle to preserve prefix/suffix instead - prefix = input_name[: limit - middle - 1] - suffix = input_name[1 - middle :] - # Add extra characters '__', signaling a truncate in identifier - print(f"Truncating {input_name} (#{len(input_name)}) to {prefix}_{suffix} (#{2 + len(prefix) + len(suffix)})") - mid = "__" - if conflict: - mid = f"_{conflict_level}" - input_name = f"{prefix}{mid}{suffix}" - - return input_name - - def get_name_max_length(self): - if self.destination_type.value in DESTINATION_SIZE_LIMITS: - destination_limit = DESTINATION_SIZE_LIMITS[self.destination_type.value] - return destination_limit - TRUNCATE_DBT_RESERVED_SIZE - TRUNCATE_RESERVED_SIZE - else: - raise KeyError(f"Unknown destination type {self.destination_type}") - - # Private methods - - def __normalize_non_column_identifier_name( - self, input_name: str, in_jinja: bool = False, truncate: bool = True, conflict: bool = False, conflict_level: int = 0 - ) -> str: - # We force standard naming for non column names (see issue #1785) - result = transform_standard_naming(input_name) - result = self.__normalize_naming_conventions(result, is_column=False) - if truncate: - result = self.truncate_identifier_name(input_name=result, conflict=conflict, conflict_level=conflict_level) - result = self.__normalize_identifier_case(result, is_quoted=False) - if result[0].isdigit(): - if self.destination_type == DestinationType.MSSQL: - result = "_" + result - elif self.destination_type == DestinationType.ORACLE: - result = "ab_" + result - return result - - def __normalize_identifier_name( - self, column_name: str, in_jinja: bool = False, truncate: bool = True, conflict: bool = False, conflict_level: int = 0 - ) -> str: - result = self.__normalize_naming_conventions(column_name, is_column=True) - if truncate: - result = self.truncate_identifier_name(input_name=result, conflict=conflict, conflict_level=conflict_level) - if self.needs_quotes(result): - if self.destination_type.value == DestinationType.CLICKHOUSE.value: - result = result.replace('"', "_") - result = result.replace("`", "_") - result = result.replace("'", "_") - elif ( - self.destination_type.value != DestinationType.MYSQL.value - and self.destination_type.value != DestinationType.TIDB.value - and self.destination_type.value != DestinationType.DUCKDB.value - ): - result = result.replace('"', '""') - else: - result = result.replace("`", "_") - result = result.replace("'", "\\'") - result = self.__normalize_identifier_case(result, is_quoted=True) - result = self.apply_quote(result) - if not in_jinja: - result = jinja_call(result) - return result - else: - result = self.__normalize_identifier_case(result, is_quoted=False) - if in_jinja: - # to refer to columns while already in jinja context, always quote - return f"'{result}'" - return result - - def apply_quote(self, input: str, literal=True) -> str: - if literal: - input = f"'{input}'" - if self.destination_type == DestinationType.ORACLE: - # Oracle dbt lib doesn't implemented adapter quote yet. - return f"quote({input})" - elif self.destination_type == DestinationType.CLICKHOUSE: - return f"quote({input})" - return f"adapter.quote({input})" - - def __normalize_naming_conventions(self, input_name: str, is_column: bool = False) -> str: - result = input_name - if self.destination_type.value == DestinationType.ORACLE.value: - return transform_standard_naming(result) - elif self.destination_type.value == DestinationType.BIGQUERY.value: - # Can start with number: datasetId, table - # Can not start with number: column - result = transform_standard_naming(result) - doesnt_start_with_alphaunderscore = match("[^A-Za-z_]", result[0]) is not None - if is_column and doesnt_start_with_alphaunderscore: - result = f"_{result}" - return result - - def __normalize_identifier_case(self, input_name: str, is_quoted: bool = False) -> str: - result = input_name - if self.destination_type.value == DestinationType.BIGQUERY.value: - pass - elif self.destination_type.value == DestinationType.REDSHIFT.value: - # all tables (even quoted ones) are coerced to lowercase. - result = input_name.lower() - elif self.destination_type.value == DestinationType.POSTGRES.value: - if not is_quoted and not self.needs_quotes(input_name): - result = input_name.lower() - elif self.destination_type.value == DestinationType.SNOWFLAKE.value: - if not is_quoted and not self.needs_quotes(input_name): - result = input_name.upper() - elif self.destination_type.value == DestinationType.MYSQL.value: - if not is_quoted and not self.needs_quotes(input_name): - result = input_name.lower() - elif self.destination_type.value == DestinationType.MSSQL.value: - if not is_quoted and not self.needs_quotes(input_name): - result = input_name.lower() - elif self.destination_type.value == DestinationType.ORACLE.value: - if not is_quoted and not self.needs_quotes(input_name): - result = input_name.lower() - else: - result = input_name.upper() - elif self.destination_type.value == DestinationType.CLICKHOUSE.value: - pass - elif self.destination_type.value == DestinationType.TIDB.value: - if not is_quoted and not self.needs_quotes(input_name): - result = input_name.lower() - elif self.destination_type.value == DestinationType.DUCKDB.value: - if not is_quoted and not self.needs_quotes(input_name): - result = input_name.lower() - else: - raise KeyError(f"Unknown destination type {self.destination_type}") - return result - - def normalize_column_identifier_case_for_lookup(self, input_name: str, is_quoted: bool = False) -> str: - """ - This function adds an additional normalization regarding the column name casing to determine if multiple columns - are in collisions. On certain destinations/settings, case sensitivity matters, in others it does not. - We separate this from standard identifier normalization "__normalize_identifier_case", - so the generated SQL queries are keeping the original casing from the catalog. - But we still need to determine if casing matters or not, thus by using this function. - """ - result = input_name - if self.destination_type.value == DestinationType.BIGQUERY.value: - # Columns are considered identical regardless of casing - result = input_name.lower() - elif self.destination_type.value == DestinationType.REDSHIFT.value: - # Columns are considered identical regardless of casing (even quoted ones) - result = input_name.lower() - elif self.destination_type.value == DestinationType.POSTGRES.value: - if not is_quoted and not self.needs_quotes(input_name): - result = input_name.lower() - elif self.destination_type.value == DestinationType.SNOWFLAKE.value: - if not is_quoted and not self.needs_quotes(input_name): - result = input_name.upper() - elif self.destination_type.value == DestinationType.MYSQL.value: - # Columns are considered identical regardless of casing (even quoted ones) - result = input_name.lower() - elif self.destination_type.value == DestinationType.MSSQL.value: - # Columns are considered identical regardless of casing (even quoted ones) - result = input_name.lower() - elif self.destination_type.value == DestinationType.ORACLE.value: - if not is_quoted and not self.needs_quotes(input_name): - result = input_name.lower() - else: - result = input_name.upper() - elif self.destination_type.value == DestinationType.CLICKHOUSE.value: - pass - elif self.destination_type.value == DestinationType.TIDB.value: - result = input_name.lower() - elif self.destination_type.value == DestinationType.DUCKDB.value: - result = input_name.lower() - else: - raise KeyError(f"Unknown destination type {self.destination_type}") - return result - - -# Static Functions - - -def transform_standard_naming(input_name: str) -> str: - result = input_name.strip() - result = strip_accents(result) - result = sub(r"\s+", "_", result) - result = sub(r"[^a-zA-Z0-9_]", "_", result) - return result - - -def transform_json_naming(input_name: str) -> str: - result = sub(r"['\"`]", "_", input_name) - return result - - -def strip_accents(input_name: str) -> str: - return "".join(c for c in ud.normalize("NFD", input_name) if ud.category(c) != "Mn") diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/reserved_keywords.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/reserved_keywords.py deleted file mode 100644 index ccfd5eaf07c12..0000000000000 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/reserved_keywords.py +++ /dev/null @@ -1,3276 +0,0 @@ -# -# Copyright (c) 2023 Airbyte, Inc., all rights reserved. -# - - -from typing import Set - -from normalization import DestinationType - -# https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords -BIGQUERY = { - "ALL", - "AND", - "ANY", - "ARRAY", - "AS", - "ASC", - "ASSERT_ROWS_MODIFIED", - "AT", - "BETWEEN", - "BY", - "CASE", - "CAST", - "COLLATE", - "CONTAINS", - "CREATE", - "CROSS", - "CUBE", - "CURRENT", - "CURRENT_DATE", - "CURRENT_TIME", - "CURRENT_TIMESTAMP", - "DEFAULT", - "DEFINE", - "DESC", - "DISTINCT", - "ELSE", - "END", - "ENUM", - "ESCAPE", - "EXCEPT", - "EXCLUDE", - "EXISTS", - "EXTRACT", - "FALSE", - "FETCH", - "FOLLOWING", - "FOR", - "FROM", - "FULL", - "GROUP", - "GROUPING", - "GROUPS", - "HASH", - "HAVING", - "IF", - "IGNORE", - "IN", - "INNER", - "INTERSECT", - "INTERVAL", - "INTO", - "IS", - "JOIN", - "LATERAL", - "LEFT", - "LIKE", - "LIMIT", - "LOOKUP", - "MERGE", - "NATURAL", - "NEW", - "NO", - "NOT", - "NULL", - "NULLS", - "OF", - "ON", - "OR", - "ORDER", - "OUTER", - "OVER", - "PARTITION", - "PRECEDING", - "PROTO", - "RANGE", - "RECURSIVE", - "RESPECT", - "RIGHT", - "ROLLUP", - "ROWS", - "SELECT", - "SET", - "SOME", - "STRUCT", - "TABLESAMPLE", - "THEN", - "TO", - "TREAT", - "TRUE", - "UNBOUNDED", - "UNION", - "UNNEST", - "USING", - "WHEN", - "WHERE", - "WINDOW", - "WITH", - "WITHIN", -} - -# https://docs.aws.amazon.com/redshift/latest/dg/r_pg_keywords.html -# Some additional keywords not supported by redshift are missing from their docs... -REDSHIFT = { - "AES128", - "AES256", - "ALL", - "ALLOWOVERWRITE", - "ANALYSE", - "ANALYZE", - "AND", - "ANY", - "ARRAY", - "AS", - "ASC", - "AUTHORIZATION", - "AZ64", - "BACKUP", - "BETWEEN", - "BINARY", - "BLANKSASNULL", - "BOTH", - "BOOLEAN", - "BYTEDICT", - "BZIP2", - "CASE", - "CAST", - "CHECK", - "COLLATE", - "COLUMN", - "CONSTRAINT", - "CREATE", - "CREDENTIALS", - "CROSS", - "CURRENT_DATE", - "CURRENT_TIME", - "CURRENT_TIMESTAMP", - "CURRENT_USER", - "CURRENT_USER_ID", - "DATETIME", - "DEFAULT", - "DEFERRABLE", - "DEFLATE", - "DEFRAG", - "DELTA", - "DELTA32K", - "DESC", - "DISABLE", - "DISTINCT", - "DO", - "ELSE", - "EMPTYASNULL", - "ENABLE", - "ENCODE", - "ENCRYPT", - "ENCRYPTION", - "END", - "EXCEPT", - "EXPLICIT", - "FALSE", - "FOR", - "FOREIGN", - "FREEZE", - "FROM", - "FULL", - "GLOBALDICT256", - "GLOBALDICT64K", - "GRANT", - "GROUP", - "GZIP", - "HAVING", - "IDENTITY", - "IGNORE", - "ILIKE", - "IN", - "INITIALLY", - "INNER", - "INTERSECT", - "INTERVAL", - "INTO", - "IS", - "ISNULL", - "JOIN", - "LANGUAGE", - "LEADING", - "LEFT", - "LIKE", - "LIMIT", - "LOCALTIME", - "LOCALTIMESTAMP", - "LUN", - "LUNS", - "LZO", - "LZOP", - "MINUS", - "MOSTLY16", - "MOSTLY32", - "MOSTLY8", - "NATURAL", - "NEW", - "NOT", - "NOTNULL", - "NULL", - "NULLS", - "OFF", - "OFFLINE", - "OFFSET", - "OID", - "OLD", - "ON", - "ONLY", - "OPEN", - "OR", - "ORDER", - "OUTER", - "OVERLAPS", - "PARALLEL", - "PARTITION", - "PERCENT", - "PERMISSIONS", - "PLACING", - "PRIMARY", - "RAW", - "READRATIO", - "RECOVER", - "REFERENCES", - "RESPECT", - "REJECTLOG", - "RESORT", - "RESTORE", - "RIGHT", - "SELECT", - "SESSION_USER", - "SIMILAR", - "SNAPSHOT", - "SOME", - "SYSDATE", - "SYSTEM", - "TABLE", - "TAG", - "TDES", - "TEXT255", - "TEXT32K", - "THEN", - "TIME", - "TIMESTAMP", - "TO", - "TOP", - "TRAILING", - "TRUE", - "TRUNCATECOLUMNS", - "UNION", - "UNIQUE", - "USER", - "USING", - "VERBOSE", - "WALLET", - "WHEN", - "WHERE", - "WITH", - "WITHOUT", -} - -# https://www.postgresql.org/docs/current/sql-keywords-appendix.html -POSTGRES = { - "A", - "ABORT", - "ABS", - "ABSENT", - "ABSOLUTE", - "ACCESS", - "ACCORDING", - "ACOS", - "ACTION", - "ADA", - "ADD", - "ADMIN", - "AFTER", - "AGGREGATE", - "ALL", - "ALLOCATE", - "ALSO", - "ALTER", - "ALWAYS", - "ANALYSE", - "ANALYZE", - "AND", - "ANY", - "ARE", - "ARRAY", - "ARRAY_AGG", - "ARRAY_MAX_CARDINALITY", - "AS", - "ASC", - "ASENSITIVE", - "ASIN", - "ASSERTION", - "ASSIGNMENT", - "ASYMMETRIC", - "AT", - "ATAN", - "ATOMIC", - "ATTACH", - "ATTRIBUTE", - "ATTRIBUTES", - "AUTHORIZATION", - "AVG", - "BACKWARD", - "BASE64", - "BEFORE", - "BEGIN", - "BEGIN_FRAME", - "BEGIN_PARTITION", - "BERNOULLI", - "BETWEEN", - "BIGINT", - "BINARY", - "BIT", - "BIT_LENGTH", - "BLOB", - "BLOCKED", - "BOM", - "BOOLEAN", - "BOTH", - "BREADTH", - "BY", - "C", - "CACHE", - "CALL", - "CALLED", - "CARDINALITY", - "CASCADE", - "CASCADED", - "CASE", - "CAST", - "CATALOG", - "CATALOG_NAME", - "CEIL", - "CEILING", - "CHAIN", - "CHAINING", - "CHAR", - "CHARACTER", - "CHARACTERISTICS", - "CHARACTERS", - "CHARACTER_LENGTH", - "CHARACTER_SET_CATALOG", - "CHARACTER_SET_NAME", - "CHARACTER_SET_SCHEMA", - "CHAR_LENGTH", - "CHECK", - "CHECKPOINT", - "CLASS", - "CLASSIFIER", - "CLASS_ORIGIN", - "CLOB", - "CLOSE", - "CLUSTER", - "COALESCE", - "COBOL", - "COLLATE", - "COLLATION", - "COLLATION_CATALOG", - "COLLATION_NAME", - "COLLATION_SCHEMA", - "COLLECT", - "COLUMN", - "COLUMNS", - "COLUMN_NAME", - "COMMAND_FUNCTION", - "COMMAND_FUNCTION_CODE", - "COMMENT", - "COMMENTS", - "COMMIT", - "COMMITTED", - "CONCURRENTLY", - "CONDITION", - "CONDITIONAL", - "CONDITION_NUMBER", - "CONFIGURATION", - "CONFLICT", - "CONNECT", - "CONNECTION", - "CONNECTION_NAME", - "CONSTRAINT", - "CONSTRAINTS", - "CONSTRAINT_CATALOG", - "CONSTRAINT_NAME", - "CONSTRAINT_SCHEMA", - "CONSTRUCTOR", - "CONTAINS", - "CONTENT", - "CONTINUE", - "CONTROL", - "CONVERSION", - "CONVERT", - "COPY", - "CORR", - "CORRESPONDING", - "COS", - "COSH", - "COST", - "COUNT", - "COVAR_POP", - "COVAR_SAMP", - "CREATE", - "CROSS", - "CSV", - "CUBE", - "CUME_DIST", - "CURRENT", - "CURRENT_CATALOG", - "CURRENT_DATE", - "CURRENT_DEFAULT_TRANSFORM_GROUP", - "CURRENT_PATH", - "CURRENT_ROLE", - "CURRENT_ROW", - "CURRENT_SCHEMA", - "CURRENT_TIME", - "CURRENT_TIMESTAMP", - "CURRENT_TRANSFORM_GROUP_FOR_TYPE", - "CURRENT_USER", - "CURSOR", - "CURSOR_NAME", - "CYCLE", - "DATA", - "DATABASE", - "DATALINK", - "DATE", - "DATETIME_INTERVAL_CODE", - "DATETIME_INTERVAL_PRECISION", - "DAY", - "DB", - "DEALLOCATE", - "DEC", - "DECFLOAT", - "DECIMAL", - "DECLARE", - "DEFAULT", - "DEFAULTS", - "DEFERRABLE", - "DEFERRED", - "DEFINE", - "DEFINED", - "DEFINER", - "DEGREE", - "DELETE", - "DELIMITER", - "DELIMITERS", - "DENSE_RANK", - "DEPENDS", - "DEPTH", - "DEREF", - "DERIVED", - "DESC", - "DESCRIBE", - "DESCRIPTOR", - "DETACH", - "DETERMINISTIC", - "DIAGNOSTICS", - "DICTIONARY", - "DISABLE", - "DISCARD", - "DISCONNECT", - "DISPATCH", - "DISTINCT", - "DLNEWCOPY", - "DLPREVIOUSCOPY", - "DLURLCOMPLETE", - "DLURLCOMPLETEONLY", - "DLURLCOMPLETEWRITE", - "DLURLPATH", - "DLURLPATHONLY", - "DLURLPATHWRITE", - "DLURLSCHEME", - "DLURLSERVER", - "DLVALUE", - "DO", - "DOCUMENT", - "DOMAIN", - "DOUBLE", - "DROP", - "DYNAMIC", - "DYNAMIC_FUNCTION", - "DYNAMIC_FUNCTION_CODE", - "EACH", - "ELEMENT", - "ELSE", - "EMPTY", - "ENABLE", - "ENCODING", - "ENCRYPTED", - "END", - "END-EXEC", - "END_FRAME", - "END_PARTITION", - "ENFORCED", - "ENUM", - "EQUALS", - "ERROR", - "ESCAPE", - "EVENT", - "EVERY", - "EXCEPT", - "EXCEPTION", - "EXCLUDE", - "EXCLUDING", - "EXCLUSIVE", - "EXEC", - "EXECUTE", - "EXISTS", - "EXP", - "EXPLAIN", - "EXPRESSION", - "EXTENSION", - "EXTERNAL", - "EXTRACT", - "FALSE", - "FAMILY", - "FETCH", - "FILE", - "FILTER", - "FINAL", - "FINISH", - "FIRST", - "FIRST_VALUE", - "FLAG", - "FLOAT", - "FLOOR", - "FOLLOWING", - "FOR", - "FORCE", - "FOREIGN", - "FORMAT", - "FORTRAN", - "FORWARD", - "FOUND", - "FRAME_ROW", - "FREE", - "FREEZE", - "FROM", - "FS", - "FULFILL", - "FULL", - "FUNCTION", - "FUNCTIONS", - "FUSION", - "G", - "GENERAL", - "GENERATED", - "GET", - "GLOBAL", - "GO", - "GOTO", - "GRANT", - "GRANTED", - "GREATEST", - "GROUP", - "GROUPING", - "HANDLER", - "HAVING", - "HEADER", - "HEX", - "HIERARCHY", - "HOLD", - "HOUR", - "ID", - "IDENTITY", - "IF", - "IGNORE", - "ILIKE", - "IMMEDIATE", - "IMMEDIATELY", - "IMMUTABLE", - "IMPLEMENTATION", - "IMPLICIT", - "IMPORT", - "IN", - "INCLUDE", - "INCLUDING", - "INCREMENT", - "INDENT", - "INDEX", - "INDEXES", - "INDICATOR", - "INHERIT", - "INHERITS", - "INITIAL", - "INITIALLY", - "INLINE", - "INNER", - "INOUT", - "INPUT", - "INSENSITIVE", - "INSERT", - "INSTANCE", - "INSTANTIABLE", - "INSTEAD", - "INT", - "INTEGER", - "INTEGRITY", - "INTERSECT", - "INTERSECTION", - "INTERVAL", - "INTO", - "INVOKER", - "IS", - "ISNULL", - "ISOLATION", - "JOIN", - "JSON", - "JSON_ARRAY", - "JSON_ARRAYAGG", - "JSON_EXISTS", - "JSON_OBJECT", - "JSON_OBJECTAGG", - "JSON_QUERY", - "JSON_TABLE", - "JSON_TABLE_PRIMITIVE", - "JSON_VALUE", - "K", - "KEEP", - "KEY", - "KEYS", - "KEY_MEMBER", - "KEY_TYPE", - "LABEL", - "LAG", - "LANGUAGE", - "LARGE", - "LAST", - "LAST_VALUE", - "LATERAL", - "LEAD", - "LEADING", - "LEAKPROOF", - "LEAST", - "LEFT", - "LENGTH", - "LEVEL", - "LIBRARY", - "LIKE", - "LIKE_REGEX", - "LIMIT", - "LINK", - "LISTAGG", - "LISTEN", - "LN", - "LOAD", - "LOCAL", - "LOCALTIME", - "LOCALTIMESTAMP", - "LOCATION", - "LOCATOR", - "LOCK", - "LOCKED", - "LOG", - "LOG10", - "LOGGED", - "LOWER", - "M", - "MAP", - "MAPPING", - "MATCH", - "MATCHED", - "MATCHES", - "MATCH_NUMBER", - "MATCH_RECOGNIZE", - "MATERIALIZED", - "MAX", - "MAXVALUE", - "MEASURES", - "MEMBER", - "MERGE", - "MESSAGE_LENGTH", - "MESSAGE_OCTET_LENGTH", - "MESSAGE_TEXT", - "METHOD", - "MIN", - "MINUTE", - "MINVALUE", - "MOD", - "MODE", - "MODIFIES", - "MODULE", - "MONTH", - "MORE", - "MOVE", - "MULTISET", - "MUMPS", - "NAME", - "NAMES", - "NAMESPACE", - "NATIONAL", - "NATURAL", - "NCHAR", - "NCLOB", - "NESTED", - "NESTING", - "NEW", - "NEXT", - "NFC", - "NFD", - "NFKC", - "NFKD", - "NIL", - "NO", - "NONE", - "NORMALIZE", - "NORMALIZED", - "NOT", - "NOTHING", - "NOTIFY", - "NOTNULL", - "NOWAIT", - "NTH_VALUE", - "NTILE", - "NULL", - "NULLABLE", - "NULLIF", - "NULLS", - "NUMBER", - "NUMERIC", - "OBJECT", - "OCCURRENCES_REGEX", - "OCTETS", - "OCTET_LENGTH", - "OF", - "OFF", - "OFFSET", - "OIDS", - "OLD", - "OMIT", - "ON", - "ONE", - "ONLY", - "OPEN", - "OPERATOR", - "OPTION", - "OPTIONS", - "OR", - "ORDER", - "ORDERING", - "ORDINALITY", - "OTHERS", - "OUT", - "OUTER", - "OUTPUT", - "OVER", - "OVERFLOW", - "OVERLAPS", - "OVERLAY", - "OVERRIDING", - "OWNED", - "OWNER", - "P", - "PAD", - "PARALLEL", - "PARAMETER", - "PARAMETER_MODE", - "PARAMETER_NAME", - "PARAMETER_ORDINAL_POSITION", - "PARAMETER_SPECIFIC_CATALOG", - "PARAMETER_SPECIFIC_NAME", - "PARAMETER_SPECIFIC_SCHEMA", - "PARSER", - "PARTIAL", - "PARTITION", - "PASCAL", - "PASS", - "PASSING", - "PASSTHROUGH", - "PASSWORD", - "PAST", - "PATH", - "PATTERN", - "PER", - "PERCENT", - "PERCENTILE_CONT", - "PERCENTILE_DISC", - "PERCENT_RANK", - "PERIOD", - "PERMISSION", - "PERMUTE", - "PLACING", - "PLAN", - "PLANS", - "PLI", - "POLICY", - "PORTION", - "POSITION", - "POSITION_REGEX", - "POWER", - "PRECEDES", - "PRECEDING", - "PRECISION", - "PREPARE", - "PREPARED", - "PRESERVE", - "PRIMARY", - "PRIOR", - "PRIVATE", - "PRIVILEGES", - "PROCEDURAL", - "PROCEDURE", - "PROCEDURES", - "PROGRAM", - "PRUNE", - "PTF", - "PUBLICATION", - "QUOTE", - "QUOTES", - "RANGE", - "RANK", - "READ", - "READS", - "REAL", - "REASSIGN", - "RECHECK", - "RECOVERY", - "RECURSIVE", - "REF", - "REFERENCES", - "REFERENCING", - "REFRESH", - "REGR_AVGX", - "REGR_AVGY", - "REGR_COUNT", - "REGR_INTERCEPT", - "REGR_R2", - "REGR_SLOPE", - "REGR_SXX", - "REGR_SXY", - "REGR_SYY", - "REINDEX", - "RELATIVE", - "RELEASE", - "RENAME", - "REPEATABLE", - "REPLACE", - "REPLICA", - "REQUIRING", - "RESET", - "RESPECT", - "RESTART", - "RESTORE", - "RESTRICT", - "RESULT", - "RETURN", - "RETURNED_CARDINALITY", - "RETURNED_LENGTH", - "RETURNED_OCTET_LENGTH", - "RETURNED_SQLSTATE", - "RETURNING", - "RETURNS", - "REVOKE", - "RIGHT", - "ROLE", - "ROLLBACK", - "ROLLUP", - "ROUTINE", - "ROUTINES", - "ROUTINE_CATALOG", - "ROUTINE_NAME", - "ROUTINE_SCHEMA", - "ROW", - "ROWS", - "ROW_COUNT", - "ROW_NUMBER", - "RULE", - "RUNNING", - "SAVEPOINT", - "SCALAR", - "SCALE", - "SCHEMA", - "SCHEMAS", - "SCHEMA_NAME", - "SCOPE", - "SCOPE_CATALOG", - "SCOPE_NAME", - "SCOPE_SCHEMA", - "SCROLL", - "SEARCH", - "SECOND", - "SECTION", - "SECURITY", - "SEEK", - "SELECT", - "SELECTIVE", - "SELF", - "SENSITIVE", - "SEQUENCE", - "SEQUENCES", - "SERIALIZABLE", - "SERVER", - "SERVER_NAME", - "SESSION", - "SESSION_USER", - "SET", - "SETOF", - "SETS", - "SHARE", - "SHOW", - "SIMILAR", - "SIMPLE", - "SIN", - "SINH", - "SIZE", - "SKIP", - "SMALLINT", - "SNAPSHOT", - "SOME", - "SOURCE", - "SPACE", - "SPECIFIC", - "SPECIFICTYPE", - "SPECIFIC_NAME", - "SQL", - "SQLCODE", - "SQLERROR", - "SQLEXCEPTION", - "SQLSTATE", - "SQLWARNING", - "SQRT", - "STABLE", - "STANDALONE", - "START", - "STATE", - "STATEMENT", - "STATIC", - "STATISTICS", - "STDDEV_POP", - "STDDEV_SAMP", - "STDIN", - "STDOUT", - "STORAGE", - "STORED", - "STRICT", - "STRING", - "STRIP", - "STRUCTURE", - "STYLE", - "SUBCLASS_ORIGIN", - "SUBMULTISET", - "SUBSCRIPTION", - "SUBSET", - "SUBSTRING", - "SUBSTRING_REGEX", - "SUCCEEDS", - "SUM", - "SUPPORT", - "SYMMETRIC", - "SYSID", - "SYSTEM", - "SYSTEM_TIME", - "SYSTEM_USER", - "T", - "TABLE", - "TABLES", - "TABLESAMPLE", - "TABLESPACE", - "TABLE_NAME", - "TAN", - "TANH", - "TEMP", - "TEMPLATE", - "TEMPORARY", - "TEXT", - "THEN", - "THROUGH", - "TIES", - "TIME", - "TIMESTAMP", - "TIMEZONE_HOUR", - "TIMEZONE_MINUTE", - "TO", - "TOKEN", - "TOP_LEVEL_COUNT", - "TRAILING", - "TRANSACTION", - "TRANSACTIONS_COMMITTED", - "TRANSACTIONS_ROLLED_BACK", - "TRANSACTION_ACTIVE", - "TRANSFORM", - "TRANSFORMS", - "TRANSLATE", - "TRANSLATE_REGEX", - "TRANSLATION", - "TREAT", - "TRIGGER", - "TRIGGER_CATALOG", - "TRIGGER_NAME", - "TRIGGER_SCHEMA", - "TRIM", - "TRIM_ARRAY", - "TRUE", - "TRUNCATE", - "TRUSTED", - "TYPE", - "TYPES", - "UESCAPE", - "UNBOUNDED", - "UNCOMMITTED", - "UNCONDITIONAL", - "UNDER", - "UNENCRYPTED", - "UNION", - "UNIQUE", - "UNKNOWN", - "UNLINK", - "UNLISTEN", - "UNLOGGED", - "UNMATCHED", - "UNNAMED", - "UNNEST", - "UNTIL", - "UNTYPED", - "UPDATE", - "UPPER", - "URI", - "USAGE", - "USER", - "USER_DEFINED_TYPE_CATALOG", - "USER_DEFINED_TYPE_CODE", - "USER_DEFINED_TYPE_NAME", - "USER_DEFINED_TYPE_SCHEMA", - "USING", - "UTF16", - "UTF32", - "UTF8", - "VACUUM", - "VALID", - "VALIDATE", - "VALIDATOR", - "VALUE", - "VALUES", - "VALUE_OF", - "VARBINARY", - "VARCHAR", - "VARIADIC", - "VARYING", - "VAR_POP", - "VAR_SAMP", - "VERBOSE", - "VERSION", - "VERSIONING", - "VIEW", - "VIEWS", - "VOLATILE", - "WHEN", - "WHENEVER", - "WHERE", - "WHITESPACE", - "WIDTH_BUCKET", - "WINDOW", - "WITH", - "WITHIN", - "WITHOUT", - "WORK", - "WRAPPER", - "WRITE", - "XML", - "XMLAGG", - "XMLATTRIBUTES", - "XMLBINARY", - "XMLCAST", - "XMLCOMMENT", - "XMLCONCAT", - "XMLDECLARATION", - "XMLDOCUMENT", - "XMLELEMENT", - "XMLEXISTS", - "XMLFOREST", - "XMLITERATE", - "XMLNAMESPACES", - "XMLPARSE", - "XMLPI", - "XMLQUERY", - "XMLROOT", - "XMLSCHEMA", - "XMLSERIALIZE", - "XMLTABLE", - "XMLTEXT", - "XMLVALIDATE", - "YEAR", - "YES", - "ZONE", -} - -# https://docs.snowflake.com/en/sql-reference/reserved-keywords.html -SNOWFLAKE = { - "ALL", - "ALTER", - "AND", - "ANY", - "AS", - "BETWEEN", - "BY", - "CASE", - "CAST", - "CHECK", - "COLUMN", - "CONNECT", - "CONNECTION", - "CONSTRAINT", - "CREATE", - "CROSS", - "CURRENT", - "CURRENT_DATE", - "CURRENT_TIME", - "CURRENT_TIMESTAMP", - "CURRENT_USER", - "DATABASE", - "DEFAULT", - "DELETE", - "DISTINCT", - "DROP", - "ELSE", - "EXISTS", - "FALSE", - "FOLLOWING", - "FOR", - "FROM", - "FULL", - "GRANT", - "GROUP", - "GSCLUSTER", - "HAVING", - "ILIKE", - "IN", - "INCREMENT", - "INNER", - "INSERT", - "INTERSECT", - "INTO", - "IS", - "ISSUE", - "JOIN", - "LATERAL", - "LEFT", - "LIKE", - "LOCALTIME", - "LOCALTIMESTAMP", - "MINUS", - "NATURAL", - "NOT", - "NULL", - "OF", - "ON", - "OR", - "ORDER", - "ORGANIZATION", - "QUALIFY", - "REGEXP", - "REVOKE", - "RIGHT", - "RLIKE", - "ROW", - "ROWS", - "SAMPLE", - "SCHEMA", - "SELECT", - "SET", - "SOME", - "START", - "TABLE", - "TABLESAMPLE", - "THEN", - "TO", - "TRIGGER", - "TRUE", - "TRY_CAST", - "UNION", - "UNIQUE", - "UPDATE", - "USING", - "VALUES", - "VIEW", - "WHEN", - "WHENEVER", - "WHERE", - "WITH", -} - -# https://dev.mysql.com/doc/refman/8.0/en/keywords.html -MYSQL = { - "ACCESSIBLE", - "ACCOUNT", - "ACTION", - "ACTIVE", - "ADD", - "ADMIN", - "AFTER", - "AGAINST", - "AGGREGATE", - "ALGORITHM", - "ALL", - "ALTER", - "ALWAYS", - "ANALYSE", - "ANALYZE", - "AND", - "ANY", - "ARRAY", - "AS", - "ASC", - "ASCII", - "ASENSITIVE", - "AT", - "ATTRIBUTE", - "AUTOEXTEND_SIZE", - "AUTO_INCREMENT", - "AVG", - "AVG_ROW_LENGTH", - "BACKUP", - "BEFORE", - "BEGIN", - "BETWEEN", - "BIGINT", - "BINARY", - "BINLOG", - "BIT", - "BLOB", - "BLOCK", - "BOOL", - "BOOLEAN", - "BOTH", - "BTREE", - "BUCKETS", - "BY", - "BYTE", - "CACHE", - "CALL", - "CASCADE", - "CASCADED", - "CASE", - "CATALOG_NAME", - "CHAIN", - "CHANGE", - "CHANGED", - "CHANNEL", - "CHAR", - "CHARACTER", - "CHARSET", - "CHECK", - "CHECKSUM", - "CIPHER", - "CLASS_ORIGIN", - "CLIENT", - "CLONE", - "CLOSE", - "COALESCE", - "CODE", - "COLLATE", - "COLLATION", - "COLUMN", - "COLUMNS", - "COLUMN_FORMAT", - "COLUMN_NAME", - "COMMENT", - "COMMIT", - "COMMITTED", - "COMPACT", - "COMPLETION", - "COMPONENT", - "COMPRESSED", - "COMPRESSION", - "CONCURRENT", - "CONDITION", - "CONNECTION", - "CONSISTENT", - "CONSTRAINT", - "CONSTRAINT_CATALOG", - "CONSTRAINT_NAME", - "CONSTRAINT_SCHEMA", - "CONTAINS", - "CONTEXT", - "CONTINUE", - "CONVERT", - "CPU", - "CREATE", - "CROSS", - "CUBE", - "CUME_DIST", - "CURRENT", - "CURRENT_DATE", - "CURRENT_TIME", - "CURRENT_TIMESTAMP", - "CURRENT_USER", - "CURSOR", - "CURSOR_NAME", - "DATA", - "DATABASE", - "DATABASES", - "DATAFILE", - "DATE", - "DATETIME", - "DAY", - "DAY_HOUR", - "DAY_MICROSECOND", - "DAY_MINUTE", - "DAY_SECOND", - "DEALLOCATE", - "DEC", - "DECIMAL", - "DECLARE", - "DEFAULT", - "DEFAULT_AUTH", - "DEFINER", - "DEFINITION", - "DELAYED", - "DELAY_KEY_WRITE", - "DELETE", - "DENSE_RANK", - "DESC", - "DESCRIBE", - "DESCRIPTION", - "DES_KEY_FILE", - "DETERMINISTIC", - "DIAGNOSTICS", - "DIRECTORY", - "DISABLE", - "DISCARD", - "DISK", - "DISTINCT", - "DISTINCTROW", - "DIV", - "DO", - "DOUBLE", - "DROP", - "DUAL", - "DUMPFILE", - "DUPLICATE", - "DYNAMIC", - "EACH", - "ELSE", - "ELSEIF", - "EMPTY", - "ENABLE", - "ENCLOSED", - "ENCRYPTION", - "END", - "ENDS", - "ENFORCED", - "ENGINE", - "ENGINES", - "ENGINE_ATTRIBUTE", - "ENUM", - "ERROR", - "ERRORS", - "ESCAPE", - "ESCAPED", - "EVENT", - "EVENTS", - "EVERY", - "EXCEPT", - "EXCHANGE", - "EXCLUDE", - "EXECUTE", - "EXISTS", - "EXIT", - "EXPANSION", - "EXPIRE", - "EXPLAIN", - "EXPORT", - "EXTENDED", - "EXTENT_SIZE", - "FAILED_LOGIN_ATTEMPTS", - "FALSE", - "FAST", - "FAULTS", - "FETCH", - "FIELDS", - "FILE", - "FILE_BLOCK_SIZE", - "FILTER", - "FIRST", - "FIRST_VALUE", - "FIXED", - "FLOAT", - "FLOAT4", - "FLOAT8", - "FLUSH", - "FOLLOWING", - "FOLLOWS", - "FOR", - "FORCE", - "FOREIGN", - "FORMAT", - "FOUND", - "FROM", - "FULL", - "FULLTEXT", - "FUNCTION", - "GENERAL", - "GENERATED", - "GEOMCOLLECTION", - "GEOMETRY", - "GEOMETRYCOLLECTION", - "GET", - "GET_FORMAT", - "GET_MASTER_PUBLIC_KEY", - "GET_SOURCE_PUBLIC_KEY", - "GLOBAL", - "GRANT", - "GRANTS", - "GROUP", - "GROUPING", - "GROUPS", - "GROUP_REPLICATION", - "HANDLER", - "HASH", - "HAVING", - "HELP", - "HIGH_PRIORITY", - "HISTOGRAM", - "HISTORY", - "HOST", - "HOSTS", - "HOUR", - "HOUR_MICROSECOND", - "HOUR_MINUTE", - "HOUR_SECOND", - "IDENTIFIED", - "IF", - "IGNORE", - "IGNORE_SERVER_IDS", - "IMPORT", - "IN", - "INACTIVE", - "INDEX", - "INDEXES", - "INFILE", - "INITIAL_SIZE", - "INNER", - "INOUT", - "INSENSITIVE", - "INSERT", - "INSERT_METHOD", - "INSTALL", - "INSTANCE", - "INT", - "INT1", - "INT2", - "INT3", - "INT4", - "INT8", - "INTEGER", - "INTERVAL", - "INTO", - "INVISIBLE", - "INVOKER", - "IO", - "IO_AFTER_GTIDS", - "IO_BEFORE_GTIDS", - "IO_THREAD", - "IPC", - "IS", - "ISOLATION", - "ISSUER", - "ITERATE", - "JOIN", - "JSON", - "JSON_TABLE", - "JSON_VALUE", - "KEY", - "KEYRING", - "KEYS", - "KEY_BLOCK_SIZE", - "KILL", - "LAG", - "LANGUAGE", - "LAST", - "LAST_VALUE", - "LATERAL", - "LEAD", - "LEADING", - "LEAVE", - "LEAVES", - "LEFT", - "LESS", - "LEVEL", - "LIKE", - "LIMIT", - "LINEAR", - "LINES", - "LINESTRING", - "LIST", - "LOAD", - "LOCAL", - "LOCALTIME", - "LOCALTIMESTAMP", - "LOCK", - "LOCKED", - "LOCKS", - "LOGFILE", - "LOGS", - "LONG", - "LONGBLOB", - "LONGTEXT", - "LOOP", - "LOW_PRIORITY", - "MASTER", - "MASTER_AUTO_POSITION", - "MASTER_BIND", - "MASTER_COMPRESSION_ALGORITHMS", - "MASTER_CONNECT_RETRY", - "MASTER_DELAY", - "MASTER_HEARTBEAT_PERIOD", - "MASTER_HOST", - "MASTER_LOG_FILE", - "MASTER_LOG_POS", - "MASTER_PASSWORD", - "MASTER_PORT", - "MASTER_PUBLIC_KEY_PATH", - "MASTER_RETRY_COUNT", - "MASTER_SERVER_ID", - "MASTER_SSL", - "MASTER_SSL_CA", - "MASTER_SSL_CAPATH", - "MASTER_SSL_CERT", - "MASTER_SSL_CIPHER", - "MASTER_SSL_CRL", - "MASTER_SSL_CRLPATH", - "MASTER_SSL_KEY", - "MASTER_SSL_VERIFY_SERVER_CERT", - "MASTER_TLS_CIPHERSUITES", - "MASTER_TLS_VERSION", - "MASTER_USER", - "MASTER_ZSTD_COMPRESSION_LEVEL", - "MATCH", - "MAXVALUE", - "MAX_CONNECTIONS_PER_HOUR", - "MAX_QUERIES_PER_HOUR", - "MAX_ROWS", - "MAX_SIZE", - "MAX_UPDATES_PER_HOUR", - "MAX_USER_CONNECTIONS", - "MEDIUM", - "MEDIUMBLOB", - "MEDIUMINT", - "MEDIUMTEXT", - "MEMBER", - "MEMORY", - "MERGE", - "MESSAGE_TEXT", - "MICROSECOND", - "MIDDLEINT", - "MIGRATE", - "MINUTE", - "MINUTE_MICROSECOND", - "MINUTE_SECOND", - "MIN_ROWS", - "MOD", - "MODE", - "MODIFIES", - "MODIFY", - "MONTH", - "MULTILINESTRING", - "MULTIPOINT", - "MULTIPOLYGON", - "MUTEX", - "MYSQL_ERRNO", - "NAME", - "NAMES", - "NATIONAL", - "NATURAL", - "NCHAR", - "NDB", - "NDBCLUSTER", - "NESTED", - "NETWORK_NAMESPACE", - "NEVER", - "NEW", - "NEXT", - "NO", - "NODEGROUP", - "NONE", - "NOT", - "NOWAIT", - "NO_WAIT", - "NO_WRITE_TO_BINLOG", - "NTH_VALUE", - "NTILE", - "NULL", - "NULLS", - "NUMBER", - "NUMERIC", - "NVARCHAR", - "OF", - "OFF", - "OFFSET", - "OJ", - "OLD", - "ON", - "ONE", - "ONLY", - "OPEN", - "OPTIMIZE", - "OPTIMIZER_COSTS", - "OPTION", - "OPTIONAL", - "OPTIONALLY", - "OPTIONS", - "OR", - "ORDER", - "ORDINALITY", - "ORGANIZATION", - "OTHERS", - "OUT", - "OUTER", - "OUTFILE", - "OVER", - "OWNER", - "PACK_KEYS", - "PAGE", - "PARSER", - "PARSE_GCOL_EXPR", - "PARTIAL", - "PARTITION", - "PARTITIONING", - "PARTITIONS", - "PASSWORD", - "PASSWORD_LOCK_TIME", - "PATH", - "PERCENT_RANK", - "PERSIST", - "PERSIST_ONLY", - "PHASE", - "PLUGIN", - "PLUGINS", - "PLUGIN_DIR", - "POINT", - "POLYGON", - "PORT", - "PRECEDES", - "PRECEDING", - "PRECISION", - "PREPARE", - "PRESERVE", - "PREV", - "PRIMARY", - "PRIVILEGES", - "PRIVILEGE_CHECKS_USER", - "PROCEDURE", - "PROCESS", - "PROCESSLIST", - "PROFILE", - "PROFILES", - "PROXY", - "PURGE", - "QUARTER", - "QUERY", - "QUICK", - "RANDOM", - "RANGE", - "RANK", - "READ", - "READS", - "READ_ONLY", - "READ_WRITE", - "REAL", - "REBUILD", - "RECOVER", - "RECURSIVE", - "REDOFILE", - "REDO_BUFFER_SIZE", - "REDUNDANT", - "REFERENCE", - "REFERENCES", - "REGEXP", - "RELAY", - "RELAYLOG", - "RELAY_LOG_FILE", - "RELAY_LOG_POS", - "RELAY_THREAD", - "RELEASE", - "RELOAD", - "REMOTE", - "REMOVE", - "RENAME", - "REORGANIZE", - "REPAIR", - "REPEAT", - "REPEATABLE", - "REPLACE", - "REPLICA", - "REPLICAS", - "REPLICATE_DO_DB", - "REPLICATE_DO_TABLE", - "REPLICATE_IGNORE_DB", - "REPLICATE_IGNORE_TABLE", - "REPLICATE_REWRITE_DB", - "REPLICATE_WILD_DO_TABLE", - "REPLICATE_WILD_IGNORE_TABLE", - "REPLICATION", - "REQUIRE", - "REQUIRE_ROW_FORMAT", - "RESET", - "RESIGNAL", - "RESOURCE", - "RESPECT", - "RESTART", - "RESTORE", - "RESTRICT", - "RESUME", - "RETAIN", - "RETURN", - "RETURNED_SQLSTATE", - "RETURNING", - "RETURNS", - "REUSE", - "REVERSE", - "REVOKE", - "RIGHT", - "RLIKE", - "ROLE", - "ROLLBACK", - "ROLLUP", - "ROTATE", - "ROUTINE", - "ROW", - "ROWS", - "ROW_COUNT", - "ROW_FORMAT", - "ROW_NUMBER", - "RTREE", - "SAVEPOINT", - "SCHEDULE", - "SCHEMA", - "SCHEMAS", - "SCHEMA_NAME", - "SECOND", - "SECONDARY", - "SECONDARY_ENGINE", - "SECONDARY_ENGINE_ATTRIBUTE", - "SECONDARY_LOAD", - "SECONDARY_UNLOAD", - "SECOND_MICROSECOND", - "SECURITY", - "SELECT", - "SENSITIVE", - "SEPARATOR", - "SERIAL", - "SERIALIZABLE", - "SERVER", - "SESSION", - "SET", - "SHARE", - "SHOW", - "SHUTDOWN", - "SIGNAL", - "SIGNED", - "SIMPLE", - "SKIP", - "SLAVE", - "SLOW", - "SMALLINT", - "SNAPSHOT", - "SOCKET", - "SOME", - "SONAME", - "SOUNDS", - "SOURCE", - "SOURCE_AUTO_POSITION", - "SOURCE_BIND", - "SOURCE_COMPRESSION_ALGORITHMS", - "SOURCE_CONNECT_RETRY", - "SOURCE_DELAY", - "SOURCE_HEARTBEAT_PERIOD", - "SOURCE_HOST", - "SOURCE_LOG_FILE", - "SOURCE_LOG_POS", - "SOURCE_PASSWORD", - "SOURCE_PORT", - "SOURCE_PUBLIC_KEY_PATH", - "SOURCE_RETRY_COUNT", - "SOURCE_SSL", - "SOURCE_SSL_CA", - "SOURCE_SSL_CAPATH", - "SOURCE_SSL_CERT", - "SOURCE_SSL_CIPHER", - "SOURCE_SSL_CRL", - "SOURCE_SSL_CRLPATH", - "SOURCE_SSL_KEY", - "SOURCE_SSL_VERIFY_SERVER_CERT", - "SOURCE_TLS_CIPHERSUITES", - "SOURCE_TLS_VERSION", - "SOURCE_USER", - "SOURCE_ZSTD_COMPRESSION_LEVEL", - "SPATIAL", - "SPECIFIC", - "SQL", - "SQLEXCEPTION", - "SQLSTATE", - "SQLWARNING", - "SQL_AFTER_GTIDS", - "SQL_AFTER_MTS_GAPS", - "SQL_BEFORE_GTIDS", - "SQL_BIG_RESULT", - "SQL_BUFFER_RESULT", - "SQL_CACHE", - "SQL_CALC_FOUND_ROWS", - "SQL_NO_CACHE", - "SQL_SMALL_RESULT", - "SQL_THREAD", - "SQL_TSI_DAY", - "SQL_TSI_HOUR", - "SQL_TSI_MINUTE", - "SQL_TSI_MONTH", - "SQL_TSI_QUARTER", - "SQL_TSI_SECOND", - "SQL_TSI_WEEK", - "SQL_TSI_YEAR", - "SRID", - "SSL", - "STACKED", - "START", - "STARTING", - "STARTS", - "STATS_AUTO_RECALC", - "STATS_PERSISTENT", - "STATS_SAMPLE_PAGES", - "STATUS", - "STOP", - "STORAGE", - "STORED", - "STRAIGHT_JOIN", - "STREAM", - "STRING", - "SUBCLASS_ORIGIN", - "SUBJECT", - "SUBPARTITION", - "SUBPARTITIONS", - "SUPER", - "SUSPEND", - "SWAPS", - "SWITCHES", - "SYSTEM", - "TABLE", - "TABLES", - "TABLESPACE", - "TABLE_CHECKSUM", - "TABLE_NAME", - "TEMPORARY", - "TEMPTABLE", - "TERMINATED", - "TEXT", - "THAN", - "THEN", - "THREAD_PRIORITY", - "TIES", - "TIME", - "TIMESTAMP", - "TIMESTAMPADD", - "TIMESTAMPDIFF", - "TINYBLOB", - "TINYINT", - "TINYTEXT", - "TLS", - "TO", - "TRAILING", - "TRANSACTION", - "TRIGGER", - "TRIGGERS", - "TRUE", - "TRUNCATE", - "TYPE", - "TYPES", - "UNBOUNDED", - "UNCOMMITTED", - "UNDEFINED", - "UNDO", - "UNDOFILE", - "UNDO_BUFFER_SIZE", - "UNICODE", - "UNINSTALL", - "UNION", - "UNIQUE", - "UNKNOWN", - "UNLOCK", - "UNSIGNED", - "UNTIL", - "UPDATE", - "UPGRADE", - "USAGE", - "USE", - "USER", - "USER_RESOURCES", - "USE_FRM", - "USING", - "UTC_DATE", - "UTC_TIME", - "UTC_TIMESTAMP", - "VALIDATION", - "VALUE", - "VALUES", - "VARBINARY", - "VARCHAR", - "VARCHARACTER", - "VARIABLES", - "VARYING", - "VCPU", - "VIEW", - "VIRTUAL", - "VISIBLE", - "WAIT", - "WARNINGS", - "WEEK", - "WEIGHT_STRING", - "WHEN", - "WHERE", - "WHILE", - "WINDOW", - "WITH", - "WITHOUT", - "WORK", - "WRAPPER", - "WRITE", - "X509", - "XA", - "XID", - "XML", - "XOR", - "YEAR", - "YEAR_MONTH", - "ZEROFILL", - "ZONE", -} - -# https://docs.oracle.com/cd/B19306_01/server.102/b14200/ap_keywd.htm -ORACLE = { - "ACCESS", - "ADD", - "ALL", - "ALTER", - "AND", - "ANY", - "AS", - "ASC", - "AUDIT", - "BETWEEN", - "BY", - "CHAR", - "CHECK", - "CLUSTER", - "COLUMN", - "COMMENT", - "COMPRESS", - "CONNECT", - "CREATE", - "CURRENT", - "DATE", - "DECIMAL", - "DEFAULT", - "DELETE", - "DESC", - "DISTINCT", - "DROP", - "ELSE", - "EXCLUSIVE", - "EXISTS", - "FILE", - "FLOAT", - "FOR", - "FROM", - "GRANT", - "GROUP", - "HAVING", - "IDENTIFIED", - "IMMEDIATE", - "IN", - "INCREMENT", - "INDEX", - "INITIAL", - "INSERT", - "INTEGER", - "INTERSECT", - "INTO", - "IS", - "LEVEL", - "LIKE", - "LOCK", - "LONG", - "MAXEXTENTS", - "MINUS", - "MLSLABEL", - "MODE", - "MODIFY", - "NOAUDIT", - "NOCOMPRESS", - "NOT", - "NOWAIT", - "NULL", - "NUMBER", - "OF", - "OFFLINE", - "ON", - "ONLINE", - "OPTION", - "OR", - "ORDER", - "PCTFREE", - "PRIOR", - "PRIVILEGES", - "PUBLIC", - "RAW", - "RENAME", - "RESOURCE", - "REVOKE", - "ROW", - "ROWID", - "ROWNUM", - "ROWS", - "SELECT", - "SESSION", - "SET", - "SHARE", - "SIZE", - "SMALLINT", - "START", - "SUCCESSFUL", - "SYNONYM", - "SYSDATE", - "TABLE", - "THEN", - "TO", - "TRIGGER", - "UID", - "UNION", - "UNIQUE", - "UPDATE", - "USER", - "VALIDATE", - "VALUES", - "VARCHAR", - "VARCHAR2", - "VIEW", - "WHENEVER", - "WHERE", - "WITH", -} - - -# https://docs.microsoft.com/en-us/sql/t-sql/language-elements/reserved-keywords-transact-sql?view=sql-server-ver15 -MSSQL = { - "ADD", - "EXTERNAL", - "PROCEDURE", - "ALL", - "FETCH", - "PUBLIC", - "ALTER", - "FILE", - "RAISERROR", - "AND", - "FILLFACTOR", - "READ", - "ANY", - "FOR", - "READTEXT", - "AS", - "FOREIGN", - "RECONFIGURE", - "ASC", - "FREETEXT", - "REFERENCES", - "AUTHORIZATION", - "FREETEXTTABLE", - "REPLICATION", - "BACKUP", - "FROM", - "RESTORE", - "BEGIN", - "FULL", - "RESTRICT", - "BETWEEN", - "FUNCTION", - "RETURN", - "BREAK", - "GOTO", - "REVERT", - "BROWSE", - "GRANT", - "REVOKE", - "BULK", - "GROUP", - "RIGHT", - "BY", - "HAVING", - "ROLLBACK", - "CASCADE", - "HOLDLOCK", - "ROWCOUNT", - "CASE", - "IDENTITY", - "ROWGUIDCOL", - "CHECK", - "IDENTITY_INSERT", - "RULE", - "CHECKPOINT", - "IDENTITYCOL", - "SAVE", - "CLOSE", - "IF", - "SCHEMA", - "CLUSTERED", - "IN", - "SECURITYAUDIT", - "COALESCE", - "INDEX", - "SELECT", - "COLLATE", - "INNER", - "SEMANTICKEYPHRASETABLE", - "COLUMN", - "INSERT", - "SEMANTICSIMILARITYDETAILSTABLE", - "COMMIT", - "INTERSECT", - "SEMANTICSIMILARITYTABLE", - "COMPUTE", - "INTO", - "SESSION_USER", - "CONSTRAINT", - "IS", - "SET", - "CONTAINS", - "JOIN", - "SETUSER", - "CONTAINSTABLE", - "KEY", - "SHUTDOWN", - "CONTINUE", - "KILL", - "SOME", - "CONVERT", - "LEFT", - "STATISTICS", - "CREATE", - "LIKE", - "SYSTEM_USER", - "CROSS", - "LINENO", - "TABLE", - "CURRENT", - "LOAD", - "TABLESAMPLE", - "CURRENT_DATE", - "MERGE", - "TEXTSIZE", - "CURRENT_TIME", - "NATIONAL", - "THEN", - "CURRENT_TIMESTAMP", - "NOCHECK", - "TO", - "CURRENT_USER", - "NONCLUSTERED", - "TOP", - "CURSOR", - "NOT", - "TRAN", - "DATABASE", - "NULL", - "TRANSACTION", - "DBCC", - "NULLIF", - "TRIGGER", - "DEALLOCATE", - "OF", - "TRUNCATE", - "DECLARE", - "OFF", - "TRY_CONVERT", - "DEFAULT", - "OFFSETS", - "TSEQUAL", - "DELETE", - "ON", - "UNION", - "DENY", - "OPEN", - "UNIQUE", - "DESC", - "OPENDATASOURCE", - "UNPIVOT", - "DISK", - "OPENQUERY", - "UPDATE", - "DISTINCT", - "OPENROWSET", - "UPDATETEXT", - "DISTRIBUTED", - "OPENXML", - "USE", - "DOUBLE", - "OPTION", - "USER", - "DROP", - "OR", - "VALUES", - "DUMP", - "ORDER", - "VARYING", - "ELSE", - "OUTER", - "VIEW", - "END", - "OVER", - "WAITFOR", - "ERRLVL", - "PERCENT", - "WHEN", - "ESCAPE", - "PIVOT", - "WHERE", - "EXCEPT", - "PLAN", - "WHILE", - "EXEC", - "PRECISION", - "WITH", - "EXECUTE", - "PRIMARY", - "WITHIN GROUP", - "EXISTS", - "PRINT", - "WRITETEXT", - "EXIT", - "PROC", - "ABSOLUTE", - "OVERLAPS", - "ACTION", - "PAD", - "ADA", - "PARTIAL", - "PASCAL", - "EXTRACT", - "POSITION", - "ALLOCATE", - "FALSE", - "PREPARE", - "FIRST", - "PRESERVE", - "FLOAT", - "ARE", - "PRIOR", - "PRIVILEGES", - "FORTRAN", - "ASSERTION", - "FOUND", - "AT", - "REAL", - "AVG", - "GET", - "GLOBAL", - "RELATIVE", - "GO", - "BIT", - "BIT_LENGTH", - "BOTH", - "ROWS", - "HOUR", - "CASCADED", - "SCROLL", - "IMMEDIATE", - "SECOND", - "CAST", - "SECTION", - "CATALOG", - "INCLUDE", - "CHAR", - "SESSION", - "CHAR_LENGTH", - "INDICATOR", - "CHARACTER", - "INITIALLY", - "CHARACTER_LENGTH", - "SIZE", - "INPUT", - "SMALLINT", - "INSENSITIVE", - "SPACE", - "INT", - "SQL", - "COLLATION", - "INTEGER", - "SQLCA", - "SQLCODE", - "INTERVAL", - "SQLERROR", - "CONNECT", - "SQLSTATE", - "CONNECTION", - "SQLWARNING", - "ISOLATION", - "SUBSTRING", - "CONSTRAINTS", - "SUM", - "LANGUAGE", - "CORRESPONDING", - "LAST", - "TEMPORARY", - "COUNT", - "LEADING", - "TIME", - "LEVEL", - "TIMESTAMP", - "TIMEZONE_HOUR", - "LOCAL", - "TIMEZONE_MINUTE", - "LOWER", - "MATCH", - "TRAILING", - "MAX", - "MIN", - "TRANSLATE", - "DATE", - "MINUTE", - "TRANSLATION", - "DAY", - "MODULE", - "TRIM", - "MONTH", - "TRUE", - "DEC", - "NAMES", - "DECIMAL", - "NATURAL", - "UNKNOWN", - "NCHAR", - "DEFERRABLE", - "NEXT", - "UPPER", - "DEFERRED", - "NO", - "USAGE", - "NONE", - "USING", - "DESCRIBE", - "VALUE", - "DESCRIPTOR", - "DIAGNOSTICS", - "NUMERIC", - "VARCHAR", - "DISCONNECT", - "OCTET_LENGTH", - "DOMAIN", - "ONLY", - "WHENEVER", - "WORK", - "END-EXEC", - "WRITE", - "YEAR", - "OUTPUT", - "ZONE", - "EXCEPTION", - "HOST", - "RELEASE", - "ADMIN", - "IGNORE", - "RESULT", - "AFTER", - "RETURNS", - "AGGREGATE", - "ROLE", - "ALIAS", - "INITIALIZE", - "ROLLUP", - "ROUTINE", - "INOUT", - "ROW", - "ARRAY", - "ASENSITIVE", - "SAVEPOINT", - "ASYMMETRIC", - "INTERSECTION", - "SCOPE", - "SEARCH", - "ATOMIC", - "BEFORE", - "ITERATE", - "BINARY", - "SENSITIVE", - "LARGE", - "SEQUENCE", - "BLOB", - "BOOLEAN", - "LATERAL", - "SETS", - "SIMILAR", - "BREADTH", - "LESS", - "CALL", - "CALLED", - "LIKE_REGEX", - "CARDINALITY", - "LIMIT", - "SPECIFIC", - "LN", - "SPECIFICTYPE", - "LOCALTIME", - "SQLEXCEPTION", - "LOCALTIMESTAMP", - "LOCATOR", - "CLASS", - "MAP", - "START", - "CLOB", - "STATE", - "MEMBER", - "STATEMENT", - "COLLECT", - "METHOD", - "STATIC", - "COMPLETION", - "STDDEV_POP", - "CONDITION", - "MOD", - "STDDEV_SAMP", - "MODIFIES", - "STRUCTURE", - "MODIFY", - "SUBMULTISET", - "SUBSTRING_REGEX", - "CONSTRUCTOR", - "SYMMETRIC", - "CORR", - "MULTISET", - "SYSTEM", - "COVAR_POP", - "TERMINATE", - "COVAR_SAMP", - "THAN", - "CUBE", - "NCLOB", - "CUME_DIST", - "NEW", - "CURRENT_CATALOG", - "CURRENT_DEFAULT_TRANSFORM_GROUP", - "CURRENT_PATH", - "CURRENT_ROLE", - "NORMALIZE", - "TRANSLATE_REGEX", - "CURRENT_SCHEMA", - "CURRENT_TRANSFORM_GROUP_FOR_TYPE", - "OBJECT", - "TREAT", - "CYCLE", - "OCCURRENCES_REGEX", - "DATA", - "OLD", - "UESCAPE", - "UNDER", - "OPERATION", - "ORDINALITY", - "UNNEST", - "OUT", - "OVERLAY", - "DEPTH", - "VAR_POP", - "DEREF", - "PARAMETER", - "VAR_SAMP", - "PARAMETERS", - "VARIABLE", - "DESTROY", - "PARTITION", - "DESTRUCTOR", - "PATH", - "WIDTH_BUCKET", - "DETERMINISTIC", - "POSTFIX", - "WITHOUT", - "DICTIONARY", - "PREFIX", - "WINDOW", - "PREORDER", - "WITHIN", - "PERCENT_RANK", - "DYNAMIC", - "PERCENTILE_CONT", - "XMLAGG", - "EACH", - "PERCENTILE_DISC", - "XMLATTRIBUTES", - "ELEMENT", - "POSITION_REGEX", - "XMLBINARY", - "XMLCAST", - "EQUALS", - "XMLCOMMENT", - "EVERY", - "XMLCONCAT", - "RANGE", - "XMLDOCUMENT", - "READS", - "XMLELEMENT", - "FILTER", - "XMLEXISTS", - "RECURSIVE", - "XMLFOREST", - "REF", - "XMLITERATE", - "REFERENCING", - "XMLNAMESPACES", - "FREE", - "REGR_AVGX", - "XMLPARSE", - "FULLTEXTTABLE", - "REGR_AVGY", - "XMLPI", - "FUSION", - "REGR_COUNT", - "XMLQUERY", - "GENERAL", - "REGR_INTERCEPT", - "XMLSERIALIZE", - "REGR_R2", - "XMLTABLE", - "REGR_SLOPE", - "XMLTEXT", - "REGR_SXX", - "XMLVALIDATE", - "GROUPING", - "REGR_SXY", - "HOLD", - "REGR_SYY", -} - -# In ClickHouse, keywords are not reserved. -# Ref: https://clickhouse.com/docs/en/sql-reference/syntax/#syntax-keywords -CLICKHOUSE: Set[str] = set() - -# https://docs.pingcap.com/tidb/stable/keywords#keywords -TIDB = { - "ACCOUNT", - "ACTION", - "ADD", - "ADMIN", - "ADVISE", - "AFTER", - "AGAINST", - "AGO", - "ALGORITHM", - "ALL", - "ALTER", - "ALWAYS", - "ANALYZE", - "AND", - "ANY", - "AS", - "ASC", - "ASCII", - "AUTO_ID_CACHE", - "AUTO_INCREMENT", - "AUTO_RANDOM", - "AUTO_RANDOM_BASE", - "AVG", - "AVG_ROW_LENGTH", - "BACKEND", - "BACKUP", - "BACKUPS", - "BEGIN", - "BETWEEN", - "BIGINT", - "BINARY", - "BINDING", - "BINDINGS", - "BINLOG", - "BIT", - "BLOB", - "BLOCK", - "BOOL", - "BOOLEAN", - "BOTH", - "BTREE", - "BUCKETS", - "BUILTINS", - "BY", - "BYTE", - "CACHE", - "CANCEL", - "CAPTURE", - "CASCADE", - "CASCADED", - "CASE", - "CHAIN", - "CHANGE", - "CHAR", - "CHARACTER", - "CHARSET", - "CHECK", - "CHECKPOINT", - "CHECKSUM", - "CIPHER", - "CLEANUP", - "CLIENT", - "CMSKETCH", - "COALESCE", - "COLLATE", - "COLLATION", - "COLUMN", - "COLUMNS", - "COLUMN_FORMAT", - "COMMENT", - "COMMIT", - "COMMITTED", - "COMPACT", - "COMPRESSED", - "COMPRESSION", - "CONCURRENCY", - "CONFIG", - "CONNECTION", - "CONSISTENT", - "CONSTRAINT", - "CONTEXT", - "CONVERT", - "CPU", - "CREATE", - "CROSS", - "CSV_BACKSLASH_ESCAPE", - "CSV_DELIMITER", - "CSV_HEADER", - "CSV_NOT_NULL", - "CSV_NULL", - "CSV_SEPARATOR", - "CSV_TRIM_LAST_SEPARATORS", - "CUME_DIST", - "CURRENT", - "CURRENT_DATE", - "CURRENT_ROLE", - "CURRENT_TIME", - "CURRENT_TIMESTAMP", - "CURRENT_USER", - "CYCLE", - "DATA", - "DATABASE", - "DATABASES", - "DATE", - "DATETIME", - "DAY", - "DAY_HOUR", - "DAY_MICROSECOND", - "DAY_MINUTE", - "DAY_SECOND", - "DDL", - "DEALLOCATE", - "DECIMAL", - "DEFAULT", - "DEFINER", - "DELAYED", - "DELAY_KEY_WRITE", - "DELETE", - "DENSE_RANK", - "DEPTH", - "DESC", - "DESCRIBE", - "DIRECTORY", - "DISABLE", - "DISCARD", - "DISK", - "DISTINCT", - "DISTINCTROW", - "DIV", - "DO", - "DOUBLE", - "DRAINER", - "DROP", - "DUAL", - "DUPLICATE", - "DYNAMIC", - "ELSE", - "ENABLE", - "ENCLOSED", - "ENCRYPTION", - "END", - "ENFORCED", - "ENGINE", - "ENGINES", - "ENUM", - "ERROR", - "ERRORS", - "ESCAPE", - "ESCAPED", - "EVENT", - "EVENTS", - "EVOLVE", - "EXCEPT", - "EXCHANGE", - "EXCLUSIVE", - "EXECUTE", - "EXISTS", - "EXPANSION", - "EXPIRE", - "EXPLAIN", - "EXTENDED", - "FALSE", - "FAULTS", - "FIELDS", - "FILE", - "FIRST", - "FIRST_VALUE", - "FIXED", - "FLOAT", - "FLUSH", - "FOLLOWING", - "FOR", - "FORCE", - "FOREIGN", - "FORMAT", - "FROM", - "FULL", - "FULLTEXT", - "FUNCTION", - "GENERAL", - "GENERATED", - "GLOBAL", - "GRANT", - "GRANTS", - "GROUP", - "GROUPS", - "HASH", - "HAVING", - "HIGH_PRIORITY", - "HISTORY", - "HOSTS", - "HOUR", - "HOUR_MICROSECOND", - "HOUR_MINUTE", - "HOUR_SECOND", - "IDENTIFIED", - "IF", - "IGNORE", - "IMPORT", - "IMPORTS", - "IN", - "INCREMENT", - "INCREMENTAL", - "INDEX", - "INDEXES", - "INFILE", - "INNER", - "INSERT", - "INSERT_METHOD", - "INSTANCE", - "INT", - "INT1", - "INT2", - "INT3", - "INT4", - "INT8", - "INTEGER", - "INTERVAL", - "INTO", - "INVISIBLE", - "INVOKER", - "IO", - "IPC", - "IS", - "ISOLATION", - "ISSUER", - "IDENTIFIED", - "IF", - "IGNORE", - "IMPORT", - "IMPORTS", - "IN", - "INCREMENT", - "INCREMENTAL", - "INDEX", - "INDEXES", - "INFILE", - "INNER", - "INSERT", - "INSERT_METHOD", - "INSTANCE", - "INT", - "INT1", - "INT2", - "INT3", - "INT4", - "INT8", - "INTEGER", - "INTERVAL" "INTO", - "INVISIBLE", - "INVOKER", - "IO", - "IPC", - "IS", - "ISOLATION", - "ISSUER", - "KEY", - "KEYS", - "KEY_BLOCK_SIZE", - "KILL", - "LABELS", - "LAG", - "LANGUAGE", - "LAST", - "LASTVAL", - "LAST_BACKUP", - "LAST_VALUE", - "LEAD", - "LEADING", - "LEFT", - "LESS", - "LEVEL", - "LIKE", - "LIMIT", - "LINEAR", - "LINES", - "LIST", - "LOAD", - "LOCAL", - "LOCALTIME", - "LOCALTIMESTAMP", - "LOCATION", - "LOCK", - "LOGS", - "LONG", - "LONGBLOB", - "LONGTEXT", - "LOW_PRIORITY", - "MASTER", - "MATCH", - "MAXVALUE", - "MAX_CONNECTIONS_PER_HOUR", - "MAX_IDXNUM", - "MAX_MINUTES", - "MAX_QUERIES_PER_HOUR", - "MAX_ROWS", - "MAX_UPDATES_PER_HOUR", - "MAX_USER_CONNECTIONS", - "MB", - "MEDIUMBLOB", - "MEDIUMINT", - "MEDIUMTEXT", - "MEMORY", - "MERGE", - "MICROSECOND", - "MINUTE", - "MINUTE_MICROSECOND", - "MINUTE_SECOND", - "MINVALUE", - "MIN_ROWS", - "MOD", - "MODE", - "MODIFY", - "MONTH", - "NAMES", - "NATIONAL", - "NATURAL", - "NCHAR", - "NEVER", - "NEXT", - "NEXTVAL", - "NO", - "NOCACHE", - "NOCYCLE", - "NODEGROUP", - "NODE_ID", - "NODE_STATE", - "NOMAXVALUE", - "NOMINVALUE", - "NONE", - "NOT", - "NOWAIT", - "NO_WRITE_TO_BINLOG", - "NTH_VALUE", - "NTILE", - "NULL", - "NULLS", - "NUMERIC", - "NVARCHAR", - "OFFSET", - "ON", - "ONLINE", - "ONLY", - "ON_DUPLICATE", - "OPEN", - "OPTIMISTIC", - "OPTIMIZE", - "OPTION", - "OPTIONALLY", - "OR", - "ORDER", - "OUTER", - "OUTFILE", - "OVER", - "PACK_KEYS", - "PAGE", - "PARSER", - "PARTIAL", - "PARTITION", - "PARTITIONING", - "PARTITIONS", - "PASSWORD", - "PERCENT_RANK", - "PER_DB", - "PER_TABLE", - "PESSIMISTIC", - "PLUGINS", - "PRECEDING", - "PRECISION", - "PREPARE", - "PRE_SPLIT_REGIONS", - "PRIMARY", - "PRIVILEGES", - "PROCEDURE", - "PROCESS", - "PROCESSLIST", - "PROFILE", - "PROFILES", - "PUMP", - "QUARTER", - "QUERIES", - "QUERY", - "QUICK", - "RANGE", - "RANK", - "RATE_LIMIT", - "READ", - "REAL", - "REBUILD", - "RECOVER", - "REDUNDANT", - "REFERENCES", - "REGEXP", - "REGION", - "REGIONS", - "RELEASE", - "RELOAD", - "REMOVE", - "RENAME", - "REORGANIZE", - "REPAIR", - "REPEAT", - "REPEATABLE", - "REPLACE", - "REPLICA", - "REPLICATION", - "REQUIRE", - "RESPECT", - "RESTORE", - "RESTORES", - "RESTRICT", - "REVERSE", - "REVOKE", - "RIGHT", - "RLIKE", - "ROLE", - "ROLLBACK", - "ROUTINE", - "ROW", - "ROWS", - "ROW_COUNT", - "ROW_FORMAT", - "ROW_NUMBER", - "RTREE", - "SAMPLES", - "SECOND", - "SECONDARY_ENGINE", - "SECONDARY_LOAD", - "SECONDARY_UNLOAD", - "SECOND_MICROSECOND", - "SECURITY", - "SELECT", - "SEND_CREDENTIALS_TO_TIKV", - "SEPARATOR", - "SEQUENCE", - "SERIAL", - "SERIALIZABLE", - "SESSION", - "SET", - "SETVAL", - "SHARD_ROW_ID_BITS", - "SHARE", - "SHARED", - "SHOW", - "SHUTDOWN", - "SIGNED", - "SIMPLE", - "SKIP_SCHEMA_FILES", - "SLAVE", - "SLOW", - "SMALLINT", - "SNAPSHOT", - "SOME", - "SOURCE", - "SPATIAL", - "SPLIT", - "SQL", - "SQL_BIG_RESULT", - "SQL_BUFFER_RESULT", - "SQL_CACHE", - "SQL_CALC_FOUND_ROWS", - "SQL_NO_CACHE", - "SQL_SMALL_RESULT", - "SQL_TSI_DAY", - "SQL_TSI_HOUR", - "SQL_TSI_MINUTE", - "SQL_TSI_MONTH", - "SQL_TSI_QUARTER", - "SQL_TSI_SECOND", - "SQL_TSI_WEEK", - "SQL_TSI_YEAR", - "SSL", - "START", - "STARTING", - "STATS", - "STATS_AUTO_RECALC", - "STATS_BUCKETS", - "STATS_HEALTHY", - "STATS_HISTOGRAMS", - "STATS_META", - "STATS_PERSISTENT", - "STATS_SAMPLE_PAGES", - "STATUS", - "STORAGE", - "STORED", - "STRAIGHT_JOIN", - "STRICT_FORMAT", - "SUBJECT", - "SUBPARTITION", - "SUBPARTITIONS", - "SUPER", - "SWAPS", - "SWITCHES", - "SYSTEM_TIME", - "TABLE", - "TABLES", - "TABLESPACE", - "TABLE_CHECKSUM", - "TEMPORARY", - "TEMPTABLE", - "TERMINATED", - "TEXT", - "THAN", - "THEN", - "TIDB", - "TIFLASH", - "TIKV_IMPORTER", - "TIME", - "TIMESTAMP", - "TINYBLOB", - "TINYINT", - "TINYTEXT", - "TO", - "TOPN", - "TRACE", - "TRADITIONAL", - "TRAILING", - "TRANSACTION", - "TRIGGER", - "TRIGGERS", - "TRUE", - "TRUNCATE", - "TYPE", - "UNBOUNDED", - "UNCOMMITTED", - "UNDEFINED", - "UNICODE", - "UNION", - "UNIQUE", - "UNKNOWN", - "UNLOCK", - "UNSIGNED", - "UPDATE", - "USAGE", - "USE", - "USER", - "USING", - "UTC_DATE", - "UTC_TIME", - "UTC_TIMESTAMP", - "VALIDATION", - "VALUE", - "VALUES", - "VARBINARY", - "VARCHAR", - "VARCHARACTER", - "VARIABLES", - "VARYING", - "VIEW", - "VIRTUAL", - "VISIBLE", - "WARNINGS", - "WEEK", - "WEIGHT_STRING", - "WHEN", - "WHERE", - "WIDTH", - "WINDOW", - "WITH", - "WITHOUT", - "WRITE", - "X", - "X509", - "XOR", - "YEAR", - "YEAR_MONTH", - "ZEROFILL", -} - -# DuckDB uses Sqlite interface: https://www.sqlite.org/lang_keywords.html -DUCKDB = { - "ANALYZE", - "AND", - "AS", - "ASC", - "ATTACH", - "AUTOINCREMENT", - "BEFORE", - "BEGIN", - "BETWEEN", - "BY", - "CASCADE", - "CASE", - "CAST", - "CHECK", - "COLLATE", - "COLUMN", - "COMMIT", - "CONFLICT", - "CONSTRAINT", - "CREATE", - "CROSS", - "CURRENT", - "CURRENT_DATE", - "CURRENT_TIME", - "CURRENT_TIMESTAMP", - "DATABASE", - "DEFAULT", - "DEFERRABLE", - "DEFERRED", - "DELETE", - "DESC", - "DETACH", - "DISTINCT", - "DO", - "DROP", - "EACH", - "ELSE", - "END", - "ESCAPE", - "EXCEPT", - "EXCLUDE", - "EXCLUSIVE", - "EXISTS", - "EXPLAIN", - "FAIL", - "FILTER", - "FIRST", - "FOLLOWING", - "FOR", - "FOREIGN", - "FROM", - "FULL", - "GENERATED", - "GLOB", - "GROUP", - "GROUPS", - "HAVING", - "IF", - "IGNORE", - "IMMEDIATE", - "IN", - "INDEX", - "INDEXED", - "INITIALLY", - "INNER", - "INSERT", - "INSTEAD", - "INTERSECT", - "INTO", - "IS", - "ISNULL", - "JOIN", - "KEY", - "LAST", - "LEFT", - "LIKE", - "LIMIT", - "MATCH", - "MATERIALIZED", - "NATURAL", - "NO", - "NOT", - "NOTHING", - "NOTNULL", - "NULL", - "NULLS", - "OF", - "OFFSET", - "ON", - "OR", - "ORDER", - "OTHERS", - "OUTER", - "OVER", - "PARTITION", - "PLAN", - "PRAGMA", - "PRECEDING", - "PRIMARY", - "QUERY", - "RAISE", - "RANGE", - "RECURSIVE", - "REFERENCES", - "REGEXP", - "REINDEX", - "RELEASE", - "RENAME", - "REPLACE", - "RESTRICT", - "RETURNING", - "RIGHT", - "ROLLBACK", - "ROW", - "ROWS", - "SAVEPOINT", - "SELECT", - "SET", - "TABLE", - "TEMP", - "TEMPORARY", - "THEN", - "TIES", - "TO", - "TRANSACTION", - "TRIGGER", - "UNBOUNDED", - "UNION", - "UNIQUE", - "UPDATE", - "USING", - "VACUUM", - "VALUES", - "VIEW", - "VIRTUAL", - "WHEN", - "WHERE", - "WINDOW", - "WITH", - "WITHOUT", -} - -RESERVED_KEYWORDS = { - DestinationType.BIGQUERY.value: BIGQUERY, - DestinationType.POSTGRES.value: POSTGRES, - DestinationType.REDSHIFT.value: REDSHIFT, - DestinationType.SNOWFLAKE.value: SNOWFLAKE, - DestinationType.MYSQL.value: MYSQL, - DestinationType.ORACLE.value: ORACLE, - DestinationType.MSSQL.value: MSSQL, - DestinationType.CLICKHOUSE.value: CLICKHOUSE, - DestinationType.TIDB.value: TIDB, - DestinationType.DUCKDB.value: DUCKDB, -} - - -def is_reserved_keyword(token: str, integration_type: DestinationType) -> bool: - return token.upper() in RESERVED_KEYWORDS[integration_type.value] diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py deleted file mode 100644 index 6c1f70d6756c2..0000000000000 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py +++ /dev/null @@ -1,1530 +0,0 @@ -# -# Copyright (c) 2023 Airbyte, Inc., all rights reserved. -# - - -import os -import re -from enum import Enum -from typing import Any, Dict, List, Optional, Tuple, Union - -from airbyte_cdk.models.airbyte_protocol import DestinationSyncMode, SyncMode # type: ignore -from jinja2 import Template -from normalization.destination_type import DestinationType -from normalization.transform_catalog import dbt_macro -from normalization.transform_catalog.destination_name_transformer import DestinationNameTransformer, transform_json_naming -from normalization.transform_catalog.table_name_registry import TableNameRegistry -from normalization.transform_catalog.utils import ( - is_airbyte_column, - is_array, - is_big_integer, - is_boolean, - is_combining_node, - is_date, - is_datetime, - is_datetime_with_timezone, - is_datetime_without_timezone, - is_long, - is_number, - is_object, - is_simple_property, - is_string, - is_time, - is_time_with_timezone, - jinja_call, - remove_jinja, -) - -# using too many columns breaks ephemeral materialization (somewhere between 480 and 490 columns) -# let's use a lower value to be safely away from the limit... -MAXIMUM_COLUMNS_TO_USE_EPHEMERAL = 450 - - -class PartitionScheme(Enum): - """ - When possible, normalization will try to output partitioned/indexed/sorted tables (depending on the destination support) - This enum specifies which column to use when doing so (which affects how fast the table can be read using that column as predicate) - """ - - ACTIVE_ROW = "active_row" # partition by _airbyte_active_row - UNIQUE_KEY = "unique_key" # partition by _airbyte_emitted_at, sorted by _airbyte_unique_key - NOTHING = "nothing" # no partitions - DEFAULT = "" # partition by _airbyte_emitted_at - - -class TableMaterializationType(Enum): - """ - Defines the folders and dbt materialization mode of models (as configured in dbt_project.yml file) - """ - - CTE = "airbyte_ctes" - VIEW = "airbyte_views" - TABLE = "airbyte_tables" - INCREMENTAL = "airbyte_incremental" - - -class StreamProcessor(object): - """ - Takes as input an Airbyte Stream as described in the (configured) Airbyte Catalog's Json Schema. - Associated input raw data is expected to be stored in a staging area table. - - This processor generates SQL models to transform such a stream into a final table in the destination schema. - This is done by generating a DBT pipeline of transformations (multiple SQL models queries) that may be materialized - in the intermediate schema "raw_schema" (changing the dbt_project.yml settings). - The final output data should be written in "schema". - - The pipeline includes transformations such as: - - Parsing a JSON blob column and extracting each field property in its own SQL column - - Casting each SQL column to the proper JSON data type - - Generating an artificial (primary key) ID column based on the hashing of the row - - If any nested columns are discovered in the stream, a JSON blob SQL column is created in the top level parent stream - and a new StreamProcessor instance will be spawned for each children substreams. These Sub-Stream Processors are then - able to generate models to parse and extract recursively from its parent StreamProcessor model into separate SQL tables - the content of that JSON blob SQL column. - """ - - def __init__( - self, - stream_name: str, - destination_type: DestinationType, - raw_schema: str, - default_schema: str, - schema: str, - source_sync_mode: SyncMode, - destination_sync_mode: DestinationSyncMode, - cursor_field: List[str], - primary_key: List[List[str]], - json_column_name: str, - properties: Dict, - tables_registry: TableNameRegistry, - from_table: Union[str, dbt_macro.Macro], - ): - """ - See StreamProcessor.create() - """ - self.stream_name: str = stream_name - self.destination_type: DestinationType = destination_type - self.raw_schema: str = raw_schema - self.schema: str = schema - self.source_sync_mode: SyncMode = source_sync_mode - self.destination_sync_mode: DestinationSyncMode = destination_sync_mode - self.cursor_field: List[str] = cursor_field - self.primary_key: List[List[str]] = primary_key - self.json_column_name: str = json_column_name - self.properties: Dict = properties - self.tables_registry: TableNameRegistry = tables_registry - self.from_table: Union[str, dbt_macro.Macro] = from_table - - self.name_transformer: DestinationNameTransformer = DestinationNameTransformer(destination_type) - self.json_path: List[str] = [stream_name] - self.final_table_name: str = "" - self.sql_outputs: Dict[str, str] = {} - self.parent: Optional["StreamProcessor"] = None - self.is_nested_array: bool = False - self.default_schema: str = default_schema - self.airbyte_ab_id = "_airbyte_ab_id" - self.airbyte_emitted_at = "_airbyte_emitted_at" - self.airbyte_normalized_at = "_airbyte_normalized_at" - self.airbyte_unique_key = "_airbyte_unique_key" - self.models_to_source: Dict[str, str] = {} - - @staticmethod - def create_from_parent( - parent, child_name: str, json_column_name: str, properties: Dict, is_nested_array: bool, from_table: str - ) -> "StreamProcessor": - """ - @param parent is the Stream Processor that originally created this instance to handle a nested column from that parent table. - - @param json_column_name is the name of the column in the parent data table containing the json column to transform - @param properties is the json schema description of this nested stream - @param is_nested_array is a boolean flag specifying if the child is a nested array that needs to be extracted - - @param tables_registry is the global context recording all tables created so far - @param from_table is the parent table to extract the nested stream from - - The child stream processor will create a separate table to contain the unnested data. - """ - if parent.destination_sync_mode.value == DestinationSyncMode.append_dedup.value: - # nested streams can't be deduped like their parents (as they may not share the same cursor/primary keys) - parent_sync_mode = DestinationSyncMode.append - else: - parent_sync_mode = parent.destination_sync_mode - result = StreamProcessor.create( - stream_name=child_name, - destination_type=parent.destination_type, - raw_schema=parent.raw_schema, - default_schema=parent.default_schema, - schema=parent.schema, - source_sync_mode=parent.source_sync_mode, - destination_sync_mode=parent_sync_mode, - cursor_field=[], - primary_key=[], - json_column_name=json_column_name, - properties=properties, - tables_registry=parent.tables_registry, - from_table=from_table, - ) - result.parent = parent - result.is_nested_array = is_nested_array - result.json_path = parent.json_path + [child_name] - return result - - @staticmethod - def create( - stream_name: str, - destination_type: DestinationType, - raw_schema: str, - default_schema: str, - schema: str, - source_sync_mode: SyncMode, - destination_sync_mode: DestinationSyncMode, - cursor_field: List[str], - primary_key: List[List[str]], - json_column_name: str, - properties: Dict, - tables_registry: TableNameRegistry, - from_table: Union[str, dbt_macro.Macro], - ) -> "StreamProcessor": - """ - @param stream_name of the stream being processed - - @param destination_type is the destination type of warehouse - @param raw_schema is the name of the staging intermediate schema where to create internal tables/views - @param schema is the name of the schema where to store the final tables where to store the transformed data - - @param source_sync_mode is describing how source are producing data - @param destination_sync_mode is describing how destination should handle the new data batch - @param cursor_field is the field to use to determine order of records - @param primary_key is a list of fields to use as a (composite) primary key - - @param json_column_name is the name of the column in the raw data table containing the json column to transform - @param properties is the json schema description of this stream - - @param tables_registry is the global context recording all tables created so far - @param from_table is the table this stream is being extracted from originally - """ - return StreamProcessor( - stream_name, - destination_type, - raw_schema, - default_schema, - schema, - source_sync_mode, - destination_sync_mode, - cursor_field, - primary_key, - json_column_name, - properties, - tables_registry, - from_table, - ) - - def collect_table_names(self): - column_names = self.extract_column_names() - self.tables_registry.register_table(self.get_schema(True), self.get_schema(False), self.stream_name, self.json_path) - for child in self.find_children_streams(self.from_table, column_names): - child.collect_table_names() - - def get_stream_source(self): - if not self.parent: - return self.from_table.source_name + "." + self.from_table.table_name - cur = self.parent - while cur.parent: - cur = cur.parent - return cur.from_table.source_name + "." + cur.from_table.table_name - - def process(self) -> List["StreamProcessor"]: - """ - See description of StreamProcessor class. - @return List of StreamProcessor to handle recursively nested columns from this stream - """ - # Check properties - if not self.properties: - print(f" Ignoring stream '{self.stream_name}' from {self.current_json_path()} because properties list is empty") - return [] - - column_names = self.extract_column_names() - column_count = len(column_names) - - if column_count == 0: - print(f" Ignoring stream '{self.stream_name}' from {self.current_json_path()} because no columns were identified") - return [] - - from_table = str(self.from_table) - # Transformation Pipeline for this stream - from_table = self.add_to_outputs( - self.generate_json_parsing_model(from_table, column_names), - self.get_model_materialization_mode(is_intermediate=True), - is_intermediate=True, - suffix="ab1", - ) - from_table = self.add_to_outputs( - self.generate_column_typing_model(from_table, column_names), - self.get_model_materialization_mode(is_intermediate=True, column_count=column_count), - is_intermediate=True, - suffix="ab2", - ) - if self.destination_sync_mode != DestinationSyncMode.append_dedup: - from_table = self.add_to_outputs( - self.generate_id_hashing_model(from_table, column_names), - self.get_model_materialization_mode(is_intermediate=True, column_count=column_count), - is_intermediate=True, - suffix="ab3", - ) - from_table = self.add_to_outputs( - self.generate_final_model(from_table, column_names), - self.get_model_materialization_mode(is_intermediate=False, column_count=column_count), - is_intermediate=False, - ) - else: - if self.is_incremental_mode(self.destination_sync_mode): - # Force different materialization here because incremental scd models rely on star* macros that requires it - if self.destination_type.value == DestinationType.POSTGRES.value: - # because of https://github.com/dbt-labs/docs.getdbt.com/issues/335, we avoid VIEW for postgres - forced_materialization_type = TableMaterializationType.INCREMENTAL - else: - forced_materialization_type = TableMaterializationType.VIEW - else: - forced_materialization_type = TableMaterializationType.CTE - from_table = self.add_to_outputs( - self.generate_id_hashing_model(from_table, column_names), - forced_materialization_type, - is_intermediate=True, - suffix="stg", - ) - - from_table = self.add_to_outputs( - self.generate_scd_type_2_model(from_table, column_names), - self.get_model_materialization_mode(is_intermediate=False, column_count=column_count), - is_intermediate=False, - suffix="scd", - subdir="scd", - unique_key=self.name_transformer.normalize_column_name(f"{self.airbyte_unique_key}_scd"), - partition_by=PartitionScheme.ACTIVE_ROW, - ) - where_clause = f"\nand {self.name_transformer.normalize_column_name('_airbyte_active_row')} = 1" - # from_table should not use the de-duplicated final table or tables downstream (nested streams) will miss non active rows - self.add_to_outputs( - self.generate_final_model(from_table, column_names, unique_key=self.get_unique_key()) + where_clause, - self.get_model_materialization_mode(is_intermediate=False, column_count=column_count), - is_intermediate=False, - unique_key=self.get_unique_key(), - partition_by=PartitionScheme.UNIQUE_KEY, - ) - return self.find_children_streams(from_table, column_names) - - def extract_column_names(self) -> Dict[str, Tuple[str, str]]: - """ - Generate a mapping of JSON properties to normalized SQL Column names, handling collisions and avoid duplicate names - - The mapped value to a field property is a tuple where: - - the first value is the normalized "raw" column name - - the second value is the normalized quoted column name to be used in jinja context - """ - fields = [] - for field in self.properties.keys(): - if not is_airbyte_column(field): - fields.append(field) - result = {} - field_names = set() - for field in fields: - field_name = self.name_transformer.normalize_column_name(field, in_jinja=False) - field_name_lookup = self.name_transformer.normalize_column_identifier_case_for_lookup(field_name) - jinja_name = self.name_transformer.normalize_column_name(field, in_jinja=True) - if field_name_lookup in field_names: - # TODO handle column name duplicates or collisions deterministically in this stream - for i in range(1, 1000): - field_name = self.name_transformer.normalize_column_name(f"{field}_{i}", in_jinja=False) - field_name_lookup = self.name_transformer.normalize_column_identifier_case_for_lookup(field_name) - jinja_name = self.name_transformer.normalize_column_name(f"{field}_{i}", in_jinja=True) - if field_name_lookup not in field_names: - break - field_names.add(field_name_lookup) - result[field] = (field_name, jinja_name) - return result - - def find_children_streams(self, from_table: str, column_names: Dict[str, Tuple[str, str]]) -> List["StreamProcessor"]: - """ - For each complex type properties, generate a new child StreamProcessor that produce separate child pipelines. - The current stream/table is used as the parent from which to extract data from. - """ - properties = self.properties - children: List[StreamProcessor] = [] - for field in properties.keys(): - children_properties = None - is_nested_array = False - json_column_name = "" - if is_airbyte_column(field): - pass - elif is_combining_node(properties[field]): - # TODO: merge properties of all combinations - pass - elif "type" not in properties[field] or is_object(properties[field]["type"]): - # properties without 'type' field are treated like properties with 'type' = 'object' - children_properties = find_properties_object([], field, properties[field]) - is_nested_array = False - json_column_name = column_names[field][1] - elif is_array(properties[field]["type"]) and "items" in properties[field]: - quoted_field = column_names[field][1] - children_properties = find_properties_object([], field, properties[field]["items"]) - is_nested_array = True - json_column_name = f"unnested_column_value({quoted_field})" - if children_properties: - for child_key in children_properties: - stream_processor = StreamProcessor.create_from_parent( - parent=self, - child_name=field, - json_column_name=json_column_name, - properties=children_properties[child_key], - is_nested_array=is_nested_array, - from_table=from_table, - ) - children.append(stream_processor) - return children - - def generate_json_parsing_model(self, from_table: str, column_names: Dict[str, Tuple[str, str]]) -> Any: - if self.destination_type == DestinationType.ORACLE: - table_alias = "" - else: - table_alias = "as table_alias" - template = Template( - """ --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ from_table }} -{{ unnesting_before_query }} -select -{%- if parent_hash_id %} - {{ parent_hash_id }}, -{%- endif %} -{%- for field in fields %} - {{ field }}, -{%- endfor %} - {{ col_ab_id }}, - {{ col_emitted_at }}, - {{ '{{ current_timestamp() }}' }} as {{ col_normalized_at }} -from {{ from_table }} {{ table_alias }} -{{ sql_table_comment }} -{{ unnesting_from }} -where 1 = 1 -{{ unnesting_where }} -""" - ) - sql = template.render( - col_ab_id=self.get_ab_id(), - col_emitted_at=self.get_emitted_at(), - col_normalized_at=self.get_normalized_at(), - table_alias=table_alias, - unnesting_before_query=self.unnesting_before_query(from_table), - parent_hash_id=self.parent_hash_id(), - fields=self.extract_json_columns(column_names), - from_table=jinja_call(from_table), - unnesting_from=self.unnesting_from(), - unnesting_where=self.unnesting_where(), - sql_table_comment=self.sql_table_comment(), - ) - return sql - - def get_ab_id(self, in_jinja: bool = False): - # this is also tied to dbt-project-template/macros/should_full_refresh.sql - # as it is needed by the macro should_full_refresh - return self.name_transformer.normalize_column_name(self.airbyte_ab_id, in_jinja, False) - - def get_emitted_at(self, in_jinja: bool = False): - return self.name_transformer.normalize_column_name(self.airbyte_emitted_at, in_jinja, False) - - def get_normalized_at(self, in_jinja: bool = False): - return self.name_transformer.normalize_column_name(self.airbyte_normalized_at, in_jinja, False) - - def get_unique_key(self, in_jinja: bool = False): - return self.name_transformer.normalize_column_name(self.airbyte_unique_key, in_jinja, False) - - def extract_json_columns(self, column_names: Dict[str, Tuple[str, str]]) -> List[str]: - return [ - self.extract_json_column(field, self.json_column_name, self.properties[field], column_names[field][0], "table_alias") - for field in column_names - ] - - @staticmethod - def extract_json_column(property_name: str, json_column_name: str, definition: Dict, column_name: str, table_alias: str) -> str: - json_path = [property_name] - # In some cases, some destination aren't able to parse the JSON blob using the original property name - # we make their life easier by using a pre-populated and sanitized column name instead... - normalized_json_path = [transform_json_naming(property_name)] - table_alias = f"{table_alias}" - if "unnested_column_value" in json_column_name: - table_alias = "" - - json_extract = jinja_call(f"json_extract('{table_alias}', {json_column_name}, {json_path})") - if "type" in definition: - if is_array(definition["type"]): - json_extract = jinja_call(f"json_extract_array({json_column_name}, {json_path}, {normalized_json_path})") - if is_simple_property(definition.get("items", {"type": "object"})): - json_extract = jinja_call(f"json_extract_string_array({json_column_name}, {json_path}, {normalized_json_path})") - elif is_object(definition["type"]): - json_extract = jinja_call(f"json_extract('{table_alias}', {json_column_name}, {json_path}, {normalized_json_path})") - elif is_simple_property(definition): - json_extract = jinja_call(f"json_extract_scalar({json_column_name}, {json_path}, {normalized_json_path})") - - return f"{json_extract} as {column_name}" - - def generate_column_typing_model(self, from_table: str, column_names: Dict[str, Tuple[str, str]]) -> Any: - template = Template( - """ --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type --- depends_on: {{ from_table }} -select -{%- if parent_hash_id %} - {{ parent_hash_id }}, -{%- endif %} -{%- for field in fields %} - {{ field }}, -{%- endfor %} - {{ col_ab_id }}, - {{ col_emitted_at }}, - {{ '{{ current_timestamp() }}' }} as {{ col_normalized_at }} -from {{ from_table }} -{{ sql_table_comment }} -where 1 = 1 - """ - ) - sql = template.render( - col_ab_id=self.get_ab_id(), - col_emitted_at=self.get_emitted_at(), - col_normalized_at=self.get_normalized_at(), - parent_hash_id=self.parent_hash_id(), - fields=self.cast_property_types(column_names), - from_table=jinja_call(from_table), - sql_table_comment=self.sql_table_comment(), - ) - return sql - - def cast_property_types(self, column_names: Dict[str, Tuple[str, str]]) -> List[str]: - return [self.cast_property_type(field, column_names[field][0], column_names[field][1]) for field in column_names] - - def cast_property_type(self, property_name: str, column_name: str, jinja_column: str) -> Any: # noqa: C901 - definition = self.properties[property_name] - if "type" not in definition: - print(f"WARN: Unknown type for column {property_name} at {self.current_json_path()}") - return column_name - elif is_array(definition["type"]): - return column_name - elif is_object(definition["type"]): - sql_type = jinja_call("type_json()") - # Treat simple types from narrower to wider scope type: boolean < integer < number < string - elif is_boolean(definition["type"], definition): - cast_operation = jinja_call(f"cast_to_boolean({jinja_column})") - return f"{cast_operation} as {column_name}" - elif is_big_integer(definition): - sql_type = jinja_call("type_very_large_integer()") - elif is_long(definition["type"], definition): - sql_type = jinja_call("dbt_utils.type_bigint()") - elif is_number(definition["type"]): - sql_type = jinja_call("dbt_utils.type_float()") - elif is_datetime(definition): - if self.destination_type == DestinationType.SNOWFLAKE: - # snowflake uses case when statement to parse timestamp field - # in this case [cast] operator is not needed as data already converted to timestamp type - if is_datetime_without_timezone(definition): - return self.generate_snowflake_timestamp_statement(column_name) - return self.generate_snowflake_timestamp_tz_statement(column_name) - if self.destination_type == DestinationType.MYSQL and is_datetime_without_timezone(definition): - # MySQL does not support [cast] and [nullif] functions together - return self.generate_mysql_datetime_format_statement(column_name) - replace_operation = jinja_call(f"empty_string_to_null({jinja_column})") - if self.destination_type.value == DestinationType.MSSQL.value: - # in case of datetime, we don't need to use [cast] function, use try_parse instead. - if is_datetime_with_timezone(definition): - sql_type = jinja_call("type_timestamp_with_timezone()") - else: - sql_type = jinja_call("type_timestamp_without_timezone()") - return f"try_parse({replace_operation} as {sql_type}) as {column_name}" - if self.destination_type == DestinationType.CLICKHOUSE: - return f"parseDateTime64BestEffortOrNull(trim(BOTH '\"' from {replace_operation})) as {column_name}" - # in all other cases - if is_datetime_without_timezone(definition): - sql_type = jinja_call("type_timestamp_without_timezone()") - else: - sql_type = jinja_call("type_timestamp_with_timezone()") - return f"cast({replace_operation} as {sql_type}) as {column_name}" - elif is_date(definition): - if ( - self.destination_type.value == DestinationType.MYSQL.value - or self.destination_type.value == DestinationType.TIDB.value - or self.destination_type.value == DestinationType.DUCKDB.value - ): - # MySQL does not support [cast] and [nullif] functions together - return self.generate_mysql_date_format_statement(column_name) - replace_operation = jinja_call(f"empty_string_to_null({jinja_column})") - if self.destination_type.value == DestinationType.MSSQL.value: - # in case of date, we don't need to use [cast] function, use try_parse instead. - sql_type = jinja_call("type_date()") - return f"try_parse({replace_operation} as {sql_type}) as {column_name}" - if self.destination_type == DestinationType.CLICKHOUSE: - return f"toDate(parseDateTimeBestEffortOrNull(trim(BOTH '\"' from {replace_operation}))) as {column_name}" - # in all other cases - sql_type = jinja_call("type_date()") - return f"cast({replace_operation} as {sql_type}) as {column_name}" - elif is_time(definition): - if is_time_with_timezone(definition): - sql_type = jinja_call("type_time_with_timezone()") - else: - sql_type = jinja_call("type_time_without_timezone()") - if self.destination_type == DestinationType.CLICKHOUSE: - trimmed_column_name = f"trim(BOTH '\"' from {column_name})" - sql_type = f"'{sql_type}'" - return f"nullif(accurateCastOrNull({trimmed_column_name}, {sql_type}), 'null') as {column_name}" - if ( - self.destination_type == DestinationType.MYSQL - or self.destination_type == DestinationType.TIDB - or self.destination_type == DestinationType.DUCKDB - ): - return f'nullif(cast({column_name} as {sql_type}), "") as {column_name}' - replace_operation = jinja_call(f"empty_string_to_null({jinja_column})") - return f"cast({replace_operation} as {sql_type}) as {column_name}" - elif is_string(definition["type"]): - sql_type = jinja_call("dbt_utils.type_string()") - if self.destination_type == DestinationType.CLICKHOUSE: - trimmed_column_name = f"trim(BOTH '\"' from {column_name})" - sql_type = f"'{sql_type}'" - return f"nullif(accurateCastOrNull({trimmed_column_name}, {sql_type}), 'null') as {column_name}" - elif self.destination_type == DestinationType.MYSQL: - # Cast to `text` datatype. See https://github.com/airbytehq/airbyte/issues/7994 - sql_type = f"{sql_type}(1024)" - else: - print(f"WARN: Unknown type {definition['type']} for column {property_name} at {self.current_json_path()}") - return column_name - - if self.destination_type == DestinationType.CLICKHOUSE: - return f"accurateCastOrNull({column_name}, '{sql_type}') as {column_name}" - else: - return f"cast({column_name} as {sql_type}) as {column_name}" - - @staticmethod - def generate_mysql_date_format_statement(column_name: str) -> Any: - template = Template( - """ - case when {{column_name}} = '' then NULL - else cast({{column_name}} as date) - end as {{column_name}} - """ - ) - return template.render(column_name=column_name) - - @staticmethod - def generate_mysql_datetime_format_statement(column_name: str) -> Any: - regexp = r"\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}.*" - template = Template( - """ - case when {{column_name}} regexp '{{regexp}}' THEN STR_TO_DATE(SUBSTR({{column_name}}, 1, 19), '%Y-%m-%dT%H:%i:%S') - else cast(if({{column_name}} = '', NULL, {{column_name}}) as datetime) - end as {{column_name}} - """ - ) - return template.render(column_name=column_name, regexp=regexp) - - @staticmethod - def generate_snowflake_timestamp_tz_statement(column_name: str) -> Any: - """ - Generates snowflake DB specific timestamp case when statement - """ - formats = [ - {"regex": r"\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}(\\+|-)\\d{4}", "format": "YYYY-MM-DDTHH24:MI:SSTZHTZM"}, - {"regex": r"\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}(\\+|-)\\d{2}", "format": "YYYY-MM-DDTHH24:MI:SSTZH"}, - { - "regex": r"\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}\\.\\d{1,7}(\\+|-)\\d{4}", - "format": "YYYY-MM-DDTHH24:MI:SS.FFTZHTZM", - }, - {"regex": r"\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}\\.\\d{1,7}(\\+|-)\\d{2}", "format": "YYYY-MM-DDTHH24:MI:SS.FFTZH"}, - ] - template = Template( - """ - case -{% for format_item in formats %} - when {{column_name}} regexp '{{format_item['regex']}}' then to_timestamp_tz({{column_name}}, '{{format_item['format']}}') -{% endfor %} - when {{column_name}} = '' then NULL - else to_timestamp_tz({{column_name}}) - end as {{column_name}} - """ - ) - return template.render(formats=formats, column_name=column_name) - - @staticmethod - def generate_snowflake_timestamp_statement(column_name: str) -> Any: - """ - Generates snowflake DB specific timestamp case when statement - """ - formats = [ - {"regex": r"\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}", "format": "YYYY-MM-DDTHH24:MI:SS"}, - {"regex": r"\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}\\.\\d{1,7}", "format": "YYYY-MM-DDTHH24:MI:SS.FF"}, - ] - template = Template( - """ - case -{% for format_item in formats %} - when {{column_name}} regexp '{{format_item['regex']}}' then to_timestamp({{column_name}}, '{{format_item['format']}}') -{% endfor %} - when {{column_name}} = '' then NULL - else to_timestamp({{column_name}}) - end as {{column_name}} - """ - ) - return template.render(formats=formats, column_name=column_name) - - def generate_id_hashing_model(self, from_table: str, column_names: Dict[str, Tuple[str, str]]) -> Any: - - template = Template( - """ --- SQL model to build a hash column based on the values of this record --- depends_on: {{ from_table }} -select - {{ '{{' }} dbt_utils.surrogate_key([ -{%- if parent_hash_id %} - {{ parent_hash_id }}, -{%- endif %} -{%- for field in fields %} - {{ field }}, -{%- endfor %} - ]) {{ '}}' }} as {{ hash_id }}, - tmp.* -from {{ from_table }} tmp -{{ sql_table_comment }} -where 1 = 1 - """ - ) - - sql = template.render( - parent_hash_id=self.parent_hash_id(in_jinja=True), - fields=self.safe_cast_to_strings(column_names), - hash_id=self.hash_id(), - from_table=jinja_call(from_table), - sql_table_comment=self.sql_table_comment(), - ) - return sql - - def safe_cast_to_strings(self, column_names: Dict[str, Tuple[str, str]]) -> List[str]: - - return [ - StreamProcessor.safe_cast_to_string(self.properties[field], column_names[field][1], self.destination_type) - for field in column_names - ] - - @staticmethod - def safe_cast_to_string(definition: Dict, column_name: str, destination_type: DestinationType) -> str: - """ - Note that the result from this static method should always be used within a - jinja context (for example, from jinja macro surrogate_key call) - - The jinja_remove function is necessary because of Oracle database, some columns - are created with {{ quote('column_name') }} and reused the same fields for this - operation. Because the quote is injected inside a jinja macro we need to remove - the curly brackets. - """ - - if "type" not in definition: - col = column_name - elif is_boolean(definition["type"], definition): - col = f"boolean_to_string({column_name})" - elif is_array(definition["type"]): - col = f"array_to_string({column_name})" - elif is_object(definition["type"]): - col = f"object_to_string({column_name})" - else: - col = column_name - - if destination_type == DestinationType.ORACLE: - quote_in_parenthesis = re.compile(r"quote\((.*)\)") - return remove_jinja(col) if quote_in_parenthesis.findall(col) else col - - return col - - def generate_scd_type_2_model(self, from_table: str, column_names: Dict[str, Tuple[str, str]]) -> Any: - """ - This model pulls data from the ID-hashing model and appends it to a log of record updates. When inserting an update to a record, it also - checks whether that record had a previously-existing row in the SCD model; if it does, then that previous row's end_at column is set to - the new update's start_at. - - See the docs for more details: https://docs.airbyte.com/understanding-airbyte/basic-normalization#normalization-metadata-columns - """ - cursor_field = self.get_cursor_field(column_names) - order_null = f"is null asc,\n {cursor_field} desc" - if self.destination_type.value == DestinationType.ORACLE.value: - order_null = "desc nulls last" - if self.destination_type.value == DestinationType.MSSQL.value: - # SQL Server treats NULL values as the lowest values, thus NULLs come last when desc. - order_null = "desc" - - lag_begin = "lag" - lag_end = "" - input_data_table = "input_data" - if self.destination_type == DestinationType.CLICKHOUSE: - # ClickHouse doesn't support lag() yet, this is a workaround solution - # Ref: https://clickhouse.com/docs/en/sql-reference/window-functions/ - lag_begin = "anyOrNull" - lag_end = " ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING" - input_data_table = "input_data_with_active_row_num" - - enable_left_join_null = "" - cast_begin = "cast(" - cast_as = " as " - cast_end = ")" - if self.destination_type == DestinationType.CLICKHOUSE: - enable_left_join_null = "--" - cast_begin = "accurateCastOrNull(" - cast_as = ", '" - cast_end = "')" - - # TODO move all cdc columns out of scd models - cdc_active_row_pattern = "" - cdc_updated_order_pattern = "" - cdc_cols = "" - quoted_cdc_cols = "" - if "_ab_cdc_deleted_at" in column_names.keys(): - col_cdc_deleted_at = self.name_transformer.normalize_column_name("_ab_cdc_deleted_at") - col_cdc_updated_at = self.name_transformer.normalize_column_name("_ab_cdc_updated_at") - quoted_col_cdc_deleted_at = self.name_transformer.normalize_column_name("_ab_cdc_deleted_at", in_jinja=True) - quoted_col_cdc_updated_at = self.name_transformer.normalize_column_name("_ab_cdc_updated_at", in_jinja=True) - cdc_active_row_pattern = f" and {col_cdc_deleted_at} is null" - cdc_updated_order_pattern = f"\n {col_cdc_updated_at} desc," - cdc_cols = ( - f", {cast_begin}{col_cdc_deleted_at}{cast_as}" - + "{{ dbt_utils.type_string() }}" - + f"{cast_end}" - + f", {cast_begin}{col_cdc_updated_at}{cast_as}" - + "{{ dbt_utils.type_string() }}" - + f"{cast_end}" - ) - quoted_cdc_cols = f", {quoted_col_cdc_deleted_at}, {quoted_col_cdc_updated_at}" - - if "_ab_cdc_log_pos" in column_names.keys(): - col_cdc_log_pos = self.name_transformer.normalize_column_name("_ab_cdc_log_pos") - quoted_col_cdc_log_pos = self.name_transformer.normalize_column_name("_ab_cdc_log_pos", in_jinja=True) - cdc_updated_order_pattern += f"\n {col_cdc_log_pos} desc," - cdc_cols += "".join([", ", cast_begin, col_cdc_log_pos, cast_as, "{{ dbt_utils.type_string() }}", cast_end]) - quoted_cdc_cols += f", {quoted_col_cdc_log_pos}" - - if "_ab_cdc_lsn" in column_names.keys(): - col_cdc_lsn = self.name_transformer.normalize_column_name("_ab_cdc_lsn") - quoted_col_cdc_lsn = self.name_transformer.normalize_column_name("_ab_cdc_lsn", in_jinja=True) - cdc_updated_order_pattern += f"\n {col_cdc_lsn} desc," - cdc_cols += "".join([", ", cast_begin, col_cdc_lsn, cast_as, "{{ dbt_utils.type_string() }}", cast_end]) - quoted_cdc_cols += f", {quoted_col_cdc_lsn}" - - if ( - self.destination_type == DestinationType.BIGQUERY - and self.get_cursor_field_property_name(column_names) != self.airbyte_emitted_at - and is_number(self.properties[self.get_cursor_field_property_name(column_names)]["type"]) - ): - # partition by float columns is not allowed in BigQuery, cast it to string - airbyte_start_at_string = ( - cast_begin - + self.name_transformer.normalize_column_name("_airbyte_start_at") - + cast_as - + "{{ dbt_utils.type_string() }}" - + cast_end - ) - else: - airbyte_start_at_string = self.name_transformer.normalize_column_name("_airbyte_start_at") - - jinja_variables = { - "active_row": self.name_transformer.normalize_column_name("_airbyte_active_row"), - "airbyte_end_at": self.name_transformer.normalize_column_name("_airbyte_end_at"), - "airbyte_row_num": self.name_transformer.normalize_column_name("_airbyte_row_num"), - "airbyte_start_at": self.name_transformer.normalize_column_name("_airbyte_start_at"), - "airbyte_start_at_string": airbyte_start_at_string, - "airbyte_unique_key_scd": self.name_transformer.normalize_column_name(f"{self.airbyte_unique_key}_scd"), - "cdc_active_row": cdc_active_row_pattern, - "cdc_cols": cdc_cols, - "cdc_updated_at_order": cdc_updated_order_pattern, - "col_ab_id": self.get_ab_id(), - "col_emitted_at": self.get_emitted_at(), - "col_normalized_at": self.get_normalized_at(), - "cursor_field": cursor_field, - "enable_left_join_null": enable_left_join_null, - "fields": self.list_fields(column_names), - "from_table": from_table, - "hash_id": self.hash_id(), - "incremental_clause": self.get_incremental_clause("this"), - "input_data_table": input_data_table, - "lag_begin": lag_begin, - "lag_end": lag_end, - "order_null": order_null, - "parent_hash_id": self.parent_hash_id(), - "primary_key_partition": self.get_primary_key_partition(column_names), - "primary_keys": self.list_primary_keys(column_names), - "quoted_airbyte_row_num": self.name_transformer.normalize_column_name("_airbyte_row_num", in_jinja=True), - "quoted_airbyte_start_at": self.name_transformer.normalize_column_name("_airbyte_start_at", in_jinja=True), - "quoted_cdc_cols": quoted_cdc_cols, - "quoted_col_emitted_at": self.get_emitted_at(in_jinja=True), - "quoted_unique_key": self.get_unique_key(in_jinja=True), - "sql_table_comment": self.sql_table_comment(include_from_table=True), - "unique_key": self.get_unique_key(), - } - if self.destination_type == DestinationType.CLICKHOUSE: - clickhouse_active_row_sql = Template( - """ -input_data_with_active_row_num as ( - select *, - row_number() over ( - partition by {{ primary_key_partition | join(", ") }} - order by - {{ cursor_field }} {{ order_null }},{{ cdc_updated_at_order }} - {{ col_emitted_at }} desc - ) as _airbyte_active_row_num - from input_data -),""" - ).render(jinja_variables) - jinja_variables["clickhouse_active_row_sql"] = clickhouse_active_row_sql - scd_columns_sql = Template( - """ - case when _airbyte_active_row_num = 1{{ cdc_active_row }} then 1 else 0 end as {{ active_row }}, - {{ lag_begin }}({{ cursor_field }}) over ( - partition by {{ primary_key_partition | join(", ") }} - order by - {{ cursor_field }} {{ order_null }},{{ cdc_updated_at_order }} - {{ col_emitted_at }} desc - {{ lag_end }}) as {{ airbyte_end_at }}""" - ).render(jinja_variables) - jinja_variables["scd_columns_sql"] = scd_columns_sql - else: - scd_columns_sql = Template( - """ - lag({{ cursor_field }}) over ( - partition by {{ primary_key_partition | join(", ") }} - order by - {{ cursor_field }} {{ order_null }},{{ cdc_updated_at_order }} - {{ col_emitted_at }} desc - ) as {{ airbyte_end_at }}, - case when row_number() over ( - partition by {{ primary_key_partition | join(", ") }} - order by - {{ cursor_field }} {{ order_null }},{{ cdc_updated_at_order }} - {{ col_emitted_at }} desc - ) = 1{{ cdc_active_row }} then 1 else 0 end as {{ active_row }}""" - ).render(jinja_variables) - jinja_variables["scd_columns_sql"] = scd_columns_sql - sql = Template( - """ --- depends_on: {{ from_table }} -with -{{ '{% if is_incremental() %}' }} -new_data as ( - -- retrieve incremental "new" data - select - * - from {{'{{'}} {{ from_table }} {{'}}'}} - {{ sql_table_comment }} - where 1 = 1 - {{ incremental_clause }} -), -new_data_ids as ( - -- build a subset of {{ unique_key }} from rows that are new - select distinct - {{ '{{' }} dbt_utils.surrogate_key([ -{%- for primary_key in primary_keys %} - {{ primary_key }}, -{%- endfor %} - ]) {{ '}}' }} as {{ unique_key }} - from new_data -), -empty_new_data as ( - -- build an empty table to only keep the table's column types - select * from new_data where 1 = 0 -), -previous_active_scd_data as ( - -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes - select - {{ '{{' }} star_intersect({{ from_table }}, this, from_alias='inc_data', intersect_alias='this_data') {{ '}}' }} - from {{ '{{ this }}' }} as this_data - -- make a join with new_data using primary key to filter active data that need to be updated only - join new_data_ids on this_data.{{ unique_key }} = new_data_ids.{{ unique_key }} - -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) - {{ enable_left_join_null }}left join empty_new_data as inc_data on this_data.{{ col_ab_id }} = inc_data.{{ col_ab_id }} - where {{ active_row }} = 1 -), -input_data as ( - select {{ '{{' }} dbt_utils.star({{ from_table }}) {{ '}}' }} from new_data - union all - select {{ '{{' }} dbt_utils.star({{ from_table }}) {{ '}}' }} from previous_active_scd_data -), -{{ '{% else %}' }} -input_data as ( - select * - from {{'{{'}} {{ from_table }} {{'}}'}} - {{ sql_table_comment }} -), -{{ '{% endif %}' }} -{{ clickhouse_active_row_sql }} -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select -{%- if parent_hash_id %} - {{ parent_hash_id }}, -{%- endif %} - {{ '{{' }} dbt_utils.surrogate_key([ -{%- for primary_key in primary_keys %} - {{ primary_key }}, -{%- endfor %} - ]) {{ '}}' }} as {{ unique_key }}, -{%- for field in fields %} - {{ field }}, -{%- endfor %} - {{ cursor_field }} as {{ airbyte_start_at }}, - {{ scd_columns_sql }}, - {{ col_ab_id }}, - {{ col_emitted_at }}, - {{ hash_id }} - from {{ input_data_table }} -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by - {{ unique_key }}, - {{ airbyte_start_at_string }}, - {{ col_emitted_at }}{{ cdc_cols }} - order by {{ active_row }} desc, {{ col_ab_id }} - ) as {{ airbyte_row_num }}, - {{ '{{' }} dbt_utils.surrogate_key([ - {{ quoted_unique_key }}, - {{ quoted_airbyte_start_at }}, - {{ quoted_col_emitted_at }}{{ quoted_cdc_cols }} - ]) {{ '}}' }} as {{ airbyte_unique_key_scd }}, - scd_data.* - from scd_data -) -select -{%- if parent_hash_id %} - {{ parent_hash_id }}, -{%- endif %} - {{ unique_key }}, - {{ airbyte_unique_key_scd }}, -{%- for field in fields %} - {{ field }}, -{%- endfor %} - {{ airbyte_start_at }}, - {{ airbyte_end_at }}, - {{ active_row }}, - {{ col_ab_id }}, - {{ col_emitted_at }}, - {{ '{{ current_timestamp() }}' }} as {{ col_normalized_at }}, - {{ hash_id }} -from dedup_data where {{ airbyte_row_num }} = 1 -""" - ).render(jinja_variables) - return sql - - def get_cursor_field_property_name(self, column_names: Dict[str, Tuple[str, str]]) -> str: - if not self.cursor_field: - if "_ab_cdc_updated_at" in column_names.keys(): - return "_ab_cdc_updated_at" - elif "_ab_cdc_log_pos" in column_names.keys(): - return "_ab_cdc_log_pos" - elif "_ab_cdc_lsn" in column_names.keys(): - return "_ab_cdc_lsn" - else: - return self.airbyte_emitted_at - elif len(self.cursor_field) == 1: - return self.cursor_field[0] - else: - raise ValueError(f"Unsupported nested cursor field {'.'.join(self.cursor_field)} for stream {self.stream_name}") - - def get_cursor_field(self, column_names: Dict[str, Tuple[str, str]], in_jinja: bool = False) -> str: - if not self.cursor_field: - cursor = self.name_transformer.normalize_column_name(self.get_cursor_field_property_name(column_names), in_jinja) - elif len(self.cursor_field) == 1: - if not is_airbyte_column(self.cursor_field[0]): - cursor = column_names[self.cursor_field[0]][0] - else: - # using an airbyte generated column - cursor = self.cursor_field[0] - else: - raise ValueError(f"Unsupported nested cursor field {'.'.join(self.cursor_field)} for stream {self.stream_name}") - return cursor - - def list_primary_keys(self, column_names: Dict[str, Tuple[str, str]]) -> List[str]: - primary_keys = [] - for key_path in self.primary_key: - if len(key_path) == 1: - primary_keys.append(column_names[key_path[0]][1]) - else: - raise ValueError(f"Unsupported nested path {'.'.join(key_path)} for stream {self.stream_name}") - return primary_keys - - def get_primary_key_partition(self, column_names: Dict[str, Tuple[str, str]]) -> List[str]: - if self.primary_key and len(self.primary_key) > 0: - return [self.get_primary_key_from_path(column_names, path) for path in self.primary_key] - else: - raise ValueError(f"No primary key specified for stream {self.stream_name}") - - def get_primary_key_from_path(self, column_names: Dict[str, Tuple[str, str]], path: List[str]) -> str: - if path and len(path) == 1: - field = path[0] - if not is_airbyte_column(field): - if "type" in self.properties[field]: - property_type = self.properties[field]["type"] - else: - property_type = "object" - if is_number(property_type) or is_object(property_type): - # some destinations don't handle float columns (or complex types) as primary keys, turn them to string - return f"cast({column_names[field][0]} as {jinja_call('dbt_utils.type_string()')})" - else: - return column_names[field][0] - else: - # using an airbyte generated column - return f"cast({field} as {jinja_call('dbt_utils.type_string()')})" - else: - if path: - raise ValueError(f"Unsupported nested path {'.'.join(path)} for stream {self.stream_name}") - else: - raise ValueError(f"No path specified for stream {self.stream_name}") - - def generate_final_model(self, from_table: str, column_names: Dict[str, Tuple[str, str]], unique_key: str = "") -> Any: - """ - This is the table that the user actually wants. In addition to the columns that the source outputs, it has some additional metadata columns; - see the basic normalization docs for an explanation: https://docs.airbyte.com/understanding-airbyte/basic-normalization#normalization-metadata-columns - """ - template = Template( - """ --- Final base SQL model --- depends_on: {{ from_table }} -select -{%- if parent_hash_id %} - {{ parent_hash_id }}, -{%- endif %} -{%- if unique_key %} - {{ unique_key }}, -{%- endif %} -{%- for field in fields %} - {{ field }}, -{%- endfor %} - {{ col_ab_id }}, - {{ col_emitted_at }}, - {{ '{{ current_timestamp() }}' }} as {{ col_normalized_at }}, - {{ hash_id }} -from {{ from_table }} -{{ sql_table_comment }} -where 1 = 1 - """ - ) - sql = template.render( - col_ab_id=self.get_ab_id(), - col_emitted_at=self.get_emitted_at(), - col_normalized_at=self.get_normalized_at(), - parent_hash_id=self.parent_hash_id(), - fields=self.list_fields(column_names), - hash_id=self.hash_id(), - from_table=jinja_call(from_table), - sql_table_comment=self.sql_table_comment(include_from_table=True), - unique_key=unique_key, - ) - return sql - - @staticmethod - def is_incremental_mode(destination_sync_mode: DestinationSyncMode) -> bool: - return destination_sync_mode.value in [DestinationSyncMode.append.value, DestinationSyncMode.append_dedup.value] - - def add_incremental_clause(self, sql_query: str) -> Any: - template = Template( - """ -{{ sql_query }} -{{ incremental_clause }} - """ - ) - sql = template.render(sql_query=sql_query, incremental_clause=self.get_incremental_clause("this")) - return sql - - def get_incremental_clause(self, tablename: str) -> Any: - return self.get_incremental_clause_for_column(tablename, self.get_emitted_at(in_jinja=True)) - - def get_incremental_clause_for_column(self, tablename: str, column: str) -> Any: - return "{{ incremental_clause(" + column + ", " + tablename + ") }}" - - @staticmethod - def list_fields(column_names: Dict[str, Tuple[str, str]]) -> List[str]: - return [column_names[field][0] for field in column_names] - - def add_to_outputs( - self, - sql: str, - materialization_mode: TableMaterializationType, - is_intermediate: bool = True, - suffix: str = "", - unique_key: str = "", - subdir: str = "", - partition_by: PartitionScheme = PartitionScheme.DEFAULT, - ) -> str: - # Explicit function so that we can have type hints to satisfy the linter - def wrap_in_quotes(s: str) -> str: - return '"' + s + '"' - - schema = self.get_schema(is_intermediate) - # MySQL table names need to be manually truncated, because it does not do it automatically - truncate_name = ( - self.destination_type == DestinationType.MYSQL - or self.destination_type == DestinationType.TIDB - or self.destination_type == DestinationType.DUCKDB - ) - table_name = self.tables_registry.get_table_name(schema, self.json_path, self.stream_name, suffix, truncate_name) - file_name = self.tables_registry.get_file_name(schema, self.json_path, self.stream_name, suffix, truncate_name) - file = f"{file_name}.sql" - output = os.path.join(materialization_mode.value, subdir, self.schema, file) - config = self.get_model_partition_config(partition_by, unique_key) - if file_name != table_name: - # The alias() macro configs a model's final table name. - config["alias"] = f'"{table_name}"' - if self.destination_type == DestinationType.ORACLE: - # oracle does not allow changing schemas - config["schema"] = f'"{self.default_schema}"' - else: - config["schema"] = f'"{schema}"' - if self.is_incremental_mode(self.destination_sync_mode): - stg_schema = self.get_schema(True) - stg_table = self.tables_registry.get_file_name(schema, self.json_path, self.stream_name, "stg", truncate_name) - if self.name_transformer.needs_quotes(stg_table): - stg_table = jinja_call(self.name_transformer.apply_quote(stg_table)) - if suffix == "scd": - hooks = [] - - final_table_name = self.tables_registry.get_file_name(schema, self.json_path, self.stream_name, "", truncate_name) - active_row_column_name = self.name_transformer.normalize_column_name("_airbyte_active_row") - clickhouse_nullable_join_setting = "" - if self.destination_type == DestinationType.CLICKHOUSE: - # Clickhouse has special delete syntax - delete_statement = "alter table {{ final_table_relation }} delete" - unique_key_reference = self.get_unique_key(in_jinja=False) - noop_delete_statement = "alter table {{ this }} delete where 1=0" - # Without this, our LEFT JOIN would return empty string for non-matching rows, so our COUNT would include those rows. - # We want to exclude them (this is the default behavior in other DBs) so we have to set join_use_nulls=1 - clickhouse_nullable_join_setting = "SETTINGS join_use_nulls=1" - elif self.destination_type == DestinationType.BIGQUERY: - # Bigquery doesn't like the "delete from project.schema.table where project.schema.table.column in" syntax; - # it requires "delete from project.schema.table table_alias where table_alias.column in" - delete_statement = "delete from {{ final_table_relation }} final_table" - unique_key_reference = "final_table." + self.get_unique_key(in_jinja=False) - noop_delete_statement = "delete from {{ this }} where 1=0" - else: - delete_statement = "delete from {{ final_table_relation }}" - unique_key_reference = "{{ final_table_relation }}." + self.get_unique_key(in_jinja=False) - noop_delete_statement = "delete from {{ this }} where 1=0" - deletion_hook = Template( - """ - {{ '{%' }} - set final_table_relation = adapter.get_relation( - database=this.database, - schema=this.schema, - identifier='{{ final_table_name }}' - ) - {{ '%}' }} - {{ '{#' }} - If the final table doesn't exist, then obviously we can't delete anything from it. - Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) - So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) - {{ '#}' }} - {{ '{%' }} - if final_table_relation is not none and {{ quoted_unique_key }} in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') - {{ '%}' }} - - -- Delete records which are no longer active: - -- This query is equivalent, but the left join version is more performant: - -- delete from final_table where unique_key in ( - -- select unique_key from scd_table where 1 = 1 - -- ) and unique_key not in ( - -- select unique_key from scd_table where active_row = 1 - -- ) - -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD - -- entries that were _updated_ recently. This is because a deleted record will have an SCD record - -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. - {{ delete_statement }} where {{ unique_key_reference }} in ( - select recent_records.unique_key - from ( - select distinct {{ unique_key }} as unique_key - from {{ '{{ this }}' }} - where 1=1 {{ normalized_at_incremental_clause }} - ) recent_records - left join ( - select {{ unique_key }} as unique_key, count({{ unique_key }}) as active_count - from {{ '{{ this }}' }} - where {{ active_row_column_name }} = 1 {{ normalized_at_incremental_clause }} - group by {{ unique_key }} - ) active_counts - on recent_records.unique_key = active_counts.unique_key - where active_count is null or active_count = 0 - ) - {{ '{% else %}' }} - -- We have to have a non-empty query, so just do a noop delete - {{ noop_delete_statement }} - {{ '{% endif %}' }} - """ - ).render( - delete_statement=delete_statement, - noop_delete_statement=noop_delete_statement, - final_table_name=final_table_name, - unique_key=self.get_unique_key(in_jinja=False), - quoted_unique_key=self.get_unique_key(in_jinja=True), - active_row_column_name=active_row_column_name, - normalized_at_incremental_clause=self.get_incremental_clause_for_column( - "{} + '.' + {}".format( - self.name_transformer.apply_quote("this.schema", literal=False), - self.name_transformer.apply_quote(final_table_name), - ), - self.get_normalized_at(in_jinja=True), - ), - unique_key_reference=unique_key_reference, - clickhouse_nullable_join_setting=clickhouse_nullable_join_setting, - ) - hooks.append(deletion_hook) - - if self.destination_type.value == DestinationType.POSTGRES.value: - # Keep only rows with the max emitted_at to keep incremental behavior - hooks.append( - f"delete from {stg_schema}.{stg_table} where {self.airbyte_emitted_at} != (select max({self.airbyte_emitted_at}) from {stg_schema}.{stg_table})", - ) - else: - hooks.append(f"drop view {stg_schema}.{stg_table}") - - config["post_hook"] = "[" + ",".join(map(wrap_in_quotes, hooks)) + "]" - else: - # incremental is handled in the SCD SQL already - sql = self.add_incremental_clause(sql) - elif self.destination_sync_mode == DestinationSyncMode.overwrite: - if suffix == "" and not is_intermediate: - # drop SCD table after creating the destination table - scd_table_name = self.tables_registry.get_table_name(schema, self.json_path, self.stream_name, "scd", truncate_name) - print(f" Adding drop table hook for {scd_table_name} to {file_name}") - hooks = [ - Template( - """ - {{ '{%' }} - set scd_table_relation = adapter.get_relation( - database=this.database, - schema=this.schema, - identifier='{{ scd_table_name }}' - ) - {{ '%}' }} - {{ '{%' }} - if scd_table_relation is not none - {{ '%}' }} - {{ '{%' }} - do adapter.drop_relation(scd_table_relation) - {{ '%}' }} - {{ '{% endif %}' }} - """ - ).render(scd_table_name=scd_table_name) - ] - config["post_hook"] = "[" + ",".join(map(wrap_in_quotes, hooks)) + "]" - template = Template( - """ -{{ '{{' }} config( -{%- for key in config %} - {{ key }} = {{ config[key] }}, -{%- endfor %} - tags = [ {{ tags }} ] -) {{ '}}' }} -{{ sql }} - """ - ) - - self.sql_outputs[output] = template.render(config=config, sql=sql, tags=self.get_model_tags(is_intermediate)) - json_path = self.current_json_path() - print(f" Generating {output} from {json_path}") - self.models_to_source[file_name] = self.get_stream_source() - return str(dbt_macro.Ref(file_name)) - - def get_model_materialization_mode(self, is_intermediate: bool, column_count: int = 0) -> TableMaterializationType: - if is_intermediate: - if column_count <= MAXIMUM_COLUMNS_TO_USE_EPHEMERAL: - return TableMaterializationType.CTE - else: - # dbt throws "maximum recursion depth exceeded" exception at runtime - # if ephemeral is used with large number of columns, use views instead - return TableMaterializationType.VIEW - else: - if self.is_incremental_mode(self.destination_sync_mode): - return TableMaterializationType.INCREMENTAL - else: - return TableMaterializationType.TABLE - - def get_model_partition_config(self, partition_by: PartitionScheme, unique_key: str) -> Dict: - """ - Defines partition, clustering and unique key parameters for each destination. - The goal of these are to make read more performant. - - In general, we need to do lookups on the last emitted_at column to know if a record is freshly produced and need to be - incrementally processed or not. - But in certain models, such as SCD tables for example, we also need to retrieve older data to update their type 2 SCD end_dates, - thus a different partitioning scheme is used to optimize that use case. - """ - config = {} - if self.destination_type == DestinationType.BIGQUERY: - # see https://docs.getdbt.com/reference/resource-configs/bigquery-configs - if partition_by == PartitionScheme.UNIQUE_KEY: - config["cluster_by"] = f'["{self.airbyte_unique_key}","{self.airbyte_emitted_at}"]' - elif partition_by == PartitionScheme.ACTIVE_ROW: - config["cluster_by"] = f'["{self.airbyte_unique_key}_scd","{self.airbyte_emitted_at}"]' - else: - config["cluster_by"] = f'"{self.airbyte_emitted_at}"' - if partition_by == PartitionScheme.ACTIVE_ROW: - config["partition_by"] = ( - '{"field": "_airbyte_active_row", "data_type": "int64", ' '"range": {"start": 0, "end": 1, "interval": 1}}' - ) - elif partition_by == PartitionScheme.NOTHING: - pass - else: - config["partition_by"] = '{"field": "' + self.airbyte_emitted_at + '", "data_type": "timestamp", "granularity": "day"}' - elif self.destination_type == DestinationType.POSTGRES: - # see https://docs.getdbt.com/reference/resource-configs/postgres-configs - if partition_by == PartitionScheme.ACTIVE_ROW: - config["indexes"] = ( - "[{'columns':['_airbyte_active_row','" - + self.airbyte_unique_key - + "_scd','" - + self.airbyte_emitted_at - + "'],'type': 'btree'}]" - ) - elif partition_by == PartitionScheme.UNIQUE_KEY: - config["indexes"] = "[{'columns':['" + self.airbyte_unique_key + "'],'unique':True}]" - else: - config["indexes"] = "[{'columns':['" + self.airbyte_emitted_at + "'],'type':'btree'}]" - elif self.destination_type == DestinationType.REDSHIFT: - # see https://docs.getdbt.com/reference/resource-configs/redshift-configs - if partition_by == PartitionScheme.ACTIVE_ROW: - config["sort"] = f'["_airbyte_active_row", "{self.airbyte_unique_key}_scd", "{self.airbyte_emitted_at}"]' - elif partition_by == PartitionScheme.UNIQUE_KEY: - config["sort"] = f'["{self.airbyte_unique_key}", "{self.airbyte_emitted_at}"]' - elif partition_by == PartitionScheme.NOTHING: - pass - else: - config["sort"] = f'"{self.airbyte_emitted_at}"' - elif self.destination_type == DestinationType.SNOWFLAKE: - # see https://docs.getdbt.com/reference/resource-configs/snowflake-configs - if partition_by == PartitionScheme.ACTIVE_ROW: - config[ - "cluster_by" - ] = f'["_AIRBYTE_ACTIVE_ROW", "{self.airbyte_unique_key.upper()}_SCD", "{self.airbyte_emitted_at.upper()}"]' - elif partition_by == PartitionScheme.UNIQUE_KEY: - config["cluster_by"] = f'["{self.airbyte_unique_key.upper()}", "{self.airbyte_emitted_at.upper()}"]' - elif partition_by == PartitionScheme.NOTHING: - pass - else: - config["cluster_by"] = f'["{self.airbyte_emitted_at.upper()}"]' - if unique_key: - config["unique_key"] = f'"{unique_key}"' - elif not self.parent: - # in nested arrays, each element is sharing the same _airbyte_ab_id, so it's not unique - config["unique_key"] = self.get_ab_id(in_jinja=True) - return config - - def get_model_tags(self, is_intermediate: bool) -> str: - tags = "" - if self.parent: - tags += "nested" - else: - tags += "top-level" - if is_intermediate: - tags += "-intermediate" - return f'"{tags}"' - - def get_schema(self, is_intermediate: bool) -> str: - if is_intermediate: - return self.raw_schema - else: - return self.schema - - def current_json_path(self) -> str: - return "/".join(self.json_path) - - def normalized_stream_name(self) -> str: - """ - This is the normalized name of this stream to be used as a table (different as referring it as a column). - Note that it might not be the actual table name in case of collisions with other streams (see actual_table_name)... - """ - return self.name_transformer.normalize_table_name(self.stream_name) - - def sql_table_comment(self, include_from_table: bool = False) -> str: - result = f"-- {self.normalized_stream_name()}" - if len(self.json_path) > 1: - result += f" at {self.current_json_path()}" - if include_from_table: - from_table = jinja_call(self.from_table) - result += f" from {from_table}" - return result - - def hash_id(self, in_jinja: bool = False) -> str: - hash_id_col = f"_airbyte_{self.normalized_stream_name()}_hashid" - if self.parent: - if self.normalized_stream_name().lower() == self.parent.stream_name.lower(): - level = len(self.json_path) - hash_id_col = f"_airbyte_{self.normalized_stream_name()}_{level}_hashid" - - return self.name_transformer.normalize_column_name(hash_id_col, in_jinja) - - # Nested Streams - - def parent_hash_id(self, in_jinja: bool = False) -> str: - if self.parent: - return self.parent.hash_id(in_jinja) - return "" - - def unnesting_before_query(self, from_table: str) -> str: - if self.parent and self.is_nested_array: - parent_stream_name = f"'{self.parent.normalized_stream_name()}'" - quoted_field = self.name_transformer.normalize_column_name(self.stream_name, in_jinja=True) - return jinja_call(f"unnest_cte({from_table}, {parent_stream_name}, {quoted_field})") - return "" - - def unnesting_from(self) -> str: - if self.parent: - if self.is_nested_array: - parent_stream_name = f"'{self.parent.normalized_stream_name()}'" - quoted_field = self.name_transformer.normalize_column_name(self.stream_name, in_jinja=True) - return jinja_call(f"cross_join_unnest({parent_stream_name}, {quoted_field})") - return "" - - def unnesting_where(self) -> str: - if self.parent: - column_name = self.name_transformer.normalize_column_name(self.stream_name) - return f"and {column_name} is not null" - return "" - - -# Static Functions - - -def find_properties_object(path: List[str], field: str, properties) -> Dict[str, Dict]: - """ - This function is trying to look for a nested "properties" node under the current JSON node to - identify all nested objects. - - @param path JSON path traversed so far to arrive to this node - @param field is the current field being considered in the Json Tree - @param properties is the child tree of properties of the current field being searched - """ - result = {} - current_path = path + [field] - current = "_".join(current_path) - if isinstance(properties, str) or isinstance(properties, int): - return {} - else: - if "items" in properties: - return find_properties_object(path, field, properties["items"]) - elif "properties" in properties: - # we found a properties object - return {current: properties["properties"]} - elif "type" in properties and is_simple_property(properties): - # we found a basic type - return {current: {}} - elif isinstance(properties, dict): - for key in properties.keys(): - child = find_properties_object(path=current_path, field=key, properties=properties[key]) - if child: - result.update(child) - elif isinstance(properties, list): - for item in properties: - child = find_properties_object(path=current_path, field=field, properties=item) - if child: - result.update(child) - return result diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/table_name_registry.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/table_name_registry.py deleted file mode 100644 index 543554a340a37..0000000000000 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/table_name_registry.py +++ /dev/null @@ -1,376 +0,0 @@ -# -# Copyright (c) 2023 Airbyte, Inc., all rights reserved. -# - -import hashlib -from typing import Dict, List - -from normalization import DestinationType -from normalization.transform_catalog.destination_name_transformer import DestinationNameTransformer - -# minimum length of parent name used for nested streams -MINIMUM_PARENT_LENGTH = 10 - - -class NormalizedNameMetadata: - """ - A record of names collected by the TableNameRegistry - """ - - def __init__(self, intermediate_schema: str, schema: str, json_path: List[str], stream_name: str, table_name: str): - self.intermediate_schema: str = intermediate_schema - self.schema: str = schema - self.json_path: List[str] = json_path - self.stream_name: str = stream_name - self.table_name: str = table_name - - -class ConflictedNameMetadata: - """ - A record summary of a name conflict detected and resolved in TableNameRegistry - """ - - def __init__(self, schema: str, json_path: List[str], table_name_conflict: str, table_name_resolved: str): - self.schema: str = schema - self.json_path: List[str] = json_path - self.table_name_conflict: str = table_name_conflict - self.table_name_resolved: str = table_name_resolved - - -class ResolvedNameMetadata: - """ - A record of name collected and resolved by the TableNameRegistry - """ - - def __init__(self, schema: str, table_name: str, file_name: str): - self.schema: str = schema - self.table_name: str = table_name - self.file_name: str = file_name - - -class NormalizedTablesRegistry(Dict[str, List[NormalizedNameMetadata]]): - """ - An intermediate registry used by TableNameRegistry to detect conflicts in table names per schema - """ - - def __init__(self, name_transformer: DestinationNameTransformer): - super(NormalizedTablesRegistry, self).__init__() - self.name_transformer = name_transformer - - def add( - self, intermediate_schema: str, schema: str, json_path: List[str], stream_name: str, table_name: str - ) -> "NormalizedTablesRegistry": - key = self.get_table_key(schema, table_name) - if key not in self: - self[key] = [] - self[key].append(NormalizedNameMetadata(intermediate_schema, schema, json_path, stream_name, table_name)) - return self - - def get_table_key(self, schema: str, table_name: str) -> str: - return ( - f"{self.name_transformer.normalize_schema_name(schema, False, False)}." - f"{self.name_transformer.normalize_table_name(table_name, False, False)}" - ) - - def get_value(self, schema: str, table_name: str) -> List[NormalizedNameMetadata]: - return self[self.get_table_key(schema, table_name)] - - def has_collisions(self, key: str) -> bool: - return len(self[key]) > 1 - - -class NormalizedFilesRegistry(Dict[str, List[NormalizedNameMetadata]]): - """ - An intermediate registry used by TableNameRegistry to detect conflicts in file names - """ - - def __init__(self): - super(NormalizedFilesRegistry, self).__init__() - - def add( - self, intermediate_schema: str, schema: str, json_path: List[str], stream_name: str, table_name: str - ) -> "NormalizedFilesRegistry": - if table_name not in self: - self[table_name] = [] - self[table_name].append(NormalizedNameMetadata(intermediate_schema, schema, json_path, stream_name, table_name)) - return self - - def get_value(self, table_name: str) -> List[NormalizedNameMetadata]: - return self[table_name] - - def has_collisions(self, table_name: str) -> bool: - return len(self[table_name]) > 1 - - -class TableNameRegistry: - """ - A registry object that records table names being used during the run - - This registry helps detecting naming conflicts/collisions and how to resolve them. - - First, we collect all schema/stream_name/json_path listed in the catalog to detect any collisions, whether it is from: - - table naming: truncated stream name could conflict with each other within the same destination schema - - file naming: dbt use a global registry of file names without considering schema, so two tables with the same name in different schema - is valid but dbt would fail to distinguish them. Thus, the file needs should be unique within a dbt project (for example, - by adding the schema name to the file name when such collision occurs?) - - To do so, we build list of "simple" names without dealing with any collisions. - Next, we check if/when we encounter such naming conflicts. They usually happen when destination require a certain naming convention - with a limited number of characters, thus, we have to end up truncating names and creating collisions. - - In those cases, we resolve collisions using a more complex naming scheme using a suffix generated from hash of full names to make - them short and unique (but hard to remember/use). - """ - - def __init__(self, destination_type: DestinationType): - """ - @param destination_type is the destination type of warehouse - """ - self.destination_type: DestinationType = destination_type - self.name_transformer: DestinationNameTransformer = DestinationNameTransformer(destination_type) - # Simple XXX registry are collecting "simple" XXX names (with potential collisions) - self.simple_file_registry: NormalizedFilesRegistry = NormalizedFilesRegistry() - self.simple_table_registry: NormalizedTablesRegistry = NormalizedTablesRegistry(self.name_transformer) - # Registry is the collision free (resolved) mapping of schema json_path of the stream to the names that should be used - self.registry: Dict[str, ResolvedNameMetadata] = {} - - def register_table(self, intermediate_schema: str, schema: str, stream_name: str, json_path: List[str]): - """ - Record usages of simple table and file names used by each stream (top level and nested) in both - intermediate_schema and schema. - - After going through all streams and sub-streams, we'll be able to find if any collisions are present within - this catalog. - """ - intermediate_schema = self.name_transformer.normalize_schema_name(intermediate_schema, False, False) - schema = self.name_transformer.normalize_schema_name(schema, False, False) - table_name = self.get_simple_table_name(json_path) - self.simple_table_registry.add(intermediate_schema, schema, json_path, stream_name, table_name) - - def get_simple_table_name(self, json_path: List[str]) -> str: - """ - Generates a simple table name, possibly in collisions within this catalog because of truncation - """ - return self.name_transformer.normalize_table_name("_".join(json_path)) - - def resolve_names(self) -> List[ConflictedNameMetadata]: - conflicts = self.resolve_table_names() - self.resolve_file_names() - return conflicts - - def resolve_table_names(self) -> List[ConflictedNameMetadata]: - """ - Build a collision free registry from all schema/stream_name/json_path collected so far. - """ - resolved_keys = [] - # deal with table name collisions within the same schema first. - # file name should be equal to table name here - table_count = 0 - - for key in self.simple_table_registry: - for value in self.simple_table_registry[key]: - table_count += 1 - if self.simple_table_registry.has_collisions(key): - # handle collisions with unique hashed names - table_name = self.get_hashed_table_name(value.schema, value.json_path, value.stream_name, value.table_name) - resolved_keys.append(ConflictedNameMetadata(value.schema, value.json_path, value.table_name, table_name)) - else: - table_name = value.table_name - self.registry[self.get_registry_key(value.intermediate_schema, value.json_path, value.stream_name)] = ResolvedNameMetadata( - value.intermediate_schema, - table_name, - # use table_name as file_name for now - table_name, - ) - self.registry[self.get_registry_key(value.schema, value.json_path, value.stream_name)] = ResolvedNameMetadata( - value.schema, - table_name, - # use table_name as file_name for now - table_name, - ) - self.simple_file_registry.add(value.intermediate_schema, value.schema, value.json_path, value.stream_name, table_name) - registry_size = len(self.registry) - - # Oracle doesnt support namespace and this break this logic. - if self.destination_type != DestinationType.ORACLE: - assert (table_count * 2) == registry_size, f"Mismatched number of tables {table_count * 2} vs {registry_size} being resolved" - return resolved_keys - - def resolve_file_names(self): - # deal with file name collisions across schemas and update the file name to use in the registry when necessary - file_count = 0 - for key in self.simple_file_registry: - for value in self.simple_file_registry[key]: - file_count += 1 - if self.simple_file_registry.has_collisions(key): - # handle collisions with unique hashed names including schema - self.registry[ - self.get_registry_key(value.intermediate_schema, value.json_path, value.stream_name) - ] = ResolvedNameMetadata( - value.intermediate_schema, value.table_name, self.resolve_file_name(value.intermediate_schema, value.table_name) - ) - self.registry[self.get_registry_key(value.schema, value.json_path, value.stream_name)] = ResolvedNameMetadata( - value.schema, value.table_name, self.resolve_file_name(value.schema, value.table_name) - ) - registry_size = len(self.registry) - - # Oracle doesnt support namespace and this break this logic. - if self.destination_type != DestinationType.ORACLE: - assert (file_count * 2) == registry_size, f"Mismatched number of tables {file_count * 2} vs {registry_size} being resolved" - - def get_hashed_table_name(self, schema: str, json_path: List[str], stream_name: str, table_name: str) -> str: - """ - Generates a unique table name to avoid collisions within this catalog. - This is using a hash of full names but it is hard to use and remember, so this should be done rarely... - We'd prefer to use "simple" names instead as much as possible. - """ - if len(json_path) == 1: - # collisions on a top level stream name, add a hash of schema + stream name to the (truncated?) table name to make it unique - result = self.name_transformer.normalize_table_name(f"{stream_name}_{hash_json_path([schema] + json_path)}") - else: - # collisions on a nested sub-stream - result = self.name_transformer.normalize_table_name( - get_nested_hashed_table_name(self.name_transformer, schema, json_path, stream_name), False, False - ) - return result - - @staticmethod - def get_registry_key(schema: str, json_path: List[str], stream_name: str) -> str: - """ - Build the key string used to index the registry - """ - return ".".join([schema, "_".join(json_path), stream_name]).lower() - - def resolve_file_name(self, schema: str, table_name: str) -> str: - """ - We prefer to use file_name = table_name when possible... - - When a catalog has ambiguity, we have to fallback and use schema in the file name too - (which might increase a risk of truncate operation and thus collisions that we solve by adding a hash of the full names) - """ - if len(self.simple_file_registry[table_name]) == 1: - # no collisions on file naming - return table_name - else: - max_length = self.name_transformer.get_name_max_length() - # if schema . table fits into the destination, we use this naming convention - if len(schema) + len(table_name) + 1 < max_length: - return f"{schema}_{table_name}" - else: - # we have to make sure our filename is unique, use hash of full name - return self.name_transformer.normalize_table_name(f"{schema}_{table_name}_{hash_name(schema + table_name)}") - - def get_schema_name(self, schema: str, json_path: List[str], stream_name: str): - """ - Return the schema name from the registry that should be used for this combination of schema/json_path_to_substream - """ - key = self.get_registry_key(schema, json_path, stream_name) - if key in self.registry: - return self.name_transformer.normalize_schema_name(self.registry[key].schema, False, False) - else: - raise KeyError(f"Registry does not contain an entry for {schema} {json_path} {stream_name}") - - def get_table_name(self, schema: str, json_path: List[str], stream_name: str, suffix: str, truncate: bool = False): - """ - Return the table name from the registry that should be used for this combination of schema/json_path_to_substream - """ - key = self.get_registry_key(schema, json_path, stream_name) - if key in self.registry: - table_name = self.registry[key].table_name - else: - raise KeyError(f"Registry does not contain an entry for {schema} {json_path} {stream_name}") - - if suffix: - norm_suffix = suffix if not suffix or suffix.startswith("_") else f"_{suffix}" - else: - norm_suffix = "" - - conflict = False - conflict_solver = 0 - if stream_name in json_path: - conflict = True - conflict_solver = len(json_path) - - return self.name_transformer.normalize_table_name(f"{table_name}{norm_suffix}", False, truncate, conflict, conflict_solver) - - def get_file_name(self, schema: str, json_path: List[str], stream_name: str, suffix: str, truncate: bool = False): - """ - Return the file name from the registry that should be used for this combination of schema/json_path_to_substream - """ - key = self.get_registry_key(schema, json_path, stream_name) - if key in self.registry: - file_name = self.registry[key].file_name - else: - raise KeyError(f"Registry does not contain an entry for {schema} {json_path} {stream_name}") - if suffix: - norm_suffix = suffix if not suffix or suffix.startswith("_") else f"_{suffix}" - else: - norm_suffix = "" - - conflict = False - conflict_solver = 0 - if stream_name in json_path: - conflict = True - conflict_solver = len(json_path) - - return self.name_transformer.normalize_table_name(f"{file_name}{norm_suffix}", False, truncate, conflict, conflict_solver) - - def to_dict(self, apply_function=(lambda x: x)) -> Dict: - """ - Converts to a pure dict to serialize as json - """ - result = {} - for key in self.registry: - value = self.registry[key] - result[apply_function(key)] = { - apply_function("schema"): apply_function(value.schema), - apply_function("table"): apply_function(value.table_name), - apply_function("file"): apply_function(value.file_name), - } - return result - - -def hash_json_path(json_path: List[str]) -> str: - return hash_name("&airbyte&".join(json_path)) - - -def hash_name(input: str) -> str: - h = hashlib.sha1() - h.update(input.encode("utf-8").lower()) - return h.hexdigest()[:3] - - -def get_nested_hashed_table_name(name_transformer: DestinationNameTransformer, schema: str, json_path: List[str], child: str) -> str: - """ - In normalization code base, we often have to deal with naming for tables, combining informations from: - - parent table: to denote where a table is extracted from (in case of nesting) - - child table: in case of nesting, the field name or the original stream name - - extra suffix: normalization is done in multiple transformation steps, each may need to generate separate tables, - so we can add a suffix to distinguish the different transformation steps of a pipeline. - - json path: in terms of parent and nested field names in order to reach the table currently being built - - All these informations should be included (if possible) in the table naming for the user to (somehow) identify and - recognize what data is available there. - """ - parent = "_".join(json_path[:-1]) - max_length = name_transformer.get_name_max_length() - json_path_hash = hash_json_path([schema] + json_path) - norm_parent = parent if not parent else name_transformer.normalize_table_name(parent, False, False) - norm_child = name_transformer.normalize_table_name(child, False, False) - min_parent_length = min(MINIMUM_PARENT_LENGTH, len(norm_parent)) - - # no parent - if not parent: - raise RuntimeError("There is no nested table names without parents") - # if everything fits without truncation, don't truncate anything - elif (len(norm_parent) + len(json_path_hash) + len(norm_child) + 2) < max_length: - return f"{norm_parent}_{json_path_hash}_{norm_child}" - # if everything fits except for the parent, just truncate the parent (still guarantees parent is of length min_parent_length) - elif (min_parent_length + len(json_path_hash) + len(norm_child) + 2) < max_length: - max_parent_length = max_length - len(json_path_hash) - len(norm_child) - 2 - return f"{norm_parent[:max_parent_length]}_{json_path_hash}_{norm_child}" - # otherwise first truncate parent to the minimum length and middle truncate the child too - else: - norm_child_max_length = max_length - len(json_path_hash) - 2 - min_parent_length - trunc_norm_child = name_transformer.truncate_identifier_name(norm_child, norm_child_max_length) - return f"{norm_parent[:min_parent_length]}_{json_path_hash}_{trunc_norm_child}" diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/transform.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/transform.py deleted file mode 100644 index b21acb69b2e39..0000000000000 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/transform.py +++ /dev/null @@ -1,111 +0,0 @@ -# -# Copyright (c) 2023 Airbyte, Inc., all rights reserved. -# - - -import argparse -import os -from typing import Any, Dict - -import yaml -from normalization.destination_type import DestinationType -from normalization.transform_catalog.catalog_processor import CatalogProcessor - - -class TransformCatalog: - """ -To run this transformation: -``` -python3 main_dev_transform_catalog.py \ - --integration-type - --profile-config-dir . \ - --catalog integration_tests/catalog.json \ - --out dir \ - --json-column json_blob -``` - """ - - config: dict = {} - DBT_PROJECT = "dbt_project.yml" - - def __init__(self): - self.config = {} - - def run(self, args) -> None: - self.parse(args) - self.process_catalog() - - def parse(self, args) -> None: - parser = argparse.ArgumentParser(add_help=False) - parser.add_argument("--integration-type", type=str, required=True, help="type of integration dialect to use") - parser.add_argument("--profile-config-dir", type=str, required=True, help="path to directory containing DBT profiles.yml") - parser.add_argument("--catalog", nargs="+", type=str, required=True, help="path to Catalog (JSON Schema) file") - parser.add_argument("--out", type=str, required=True, help="path to output generated DBT Models to") - parser.add_argument("--json-column", type=str, required=False, help="name of the column containing the json blob") - parsed_args = parser.parse_args(args) - profiles_yml = read_profiles_yml(parsed_args.profile_config_dir) - self.config = { - "integration_type": parsed_args.integration_type, - "schema": extract_schema(profiles_yml), - "catalog": parsed_args.catalog, - "output_path": parsed_args.out, - "json_column": parsed_args.json_column, - "profile_config_dir": parsed_args.profile_config_dir, - } - - def process_catalog(self) -> None: - destination_type = DestinationType.from_string(self.config["integration_type"]) - schema = self.config["schema"] - output = self.config["output_path"] - json_col = self.config["json_column"] - processor = CatalogProcessor(output_directory=output, destination_type=destination_type) - for catalog_file in self.config["catalog"]: - print(f"Processing {catalog_file}...") - processor.process(catalog_file=catalog_file, json_column_name=json_col, default_schema=schema) - self.update_dbt_project_vars(json_column=self.config["json_column"], models_to_source=processor.models_to_source) - - def update_dbt_project_vars(self, **vars_config: Dict[str, Any]): - filename = os.path.join(self.config["profile_config_dir"], self.DBT_PROJECT) - config = read_yaml_config(filename) - config["vars"] = {**config.get("vars", {}), **vars_config} - write_yaml_config(config, filename) - - -def read_profiles_yml(profile_dir: str) -> Any: - with open(os.path.join(profile_dir, "profiles.yml"), "r") as file: - config = yaml.load(file, Loader=yaml.FullLoader) - obj = config["normalize"]["outputs"]["prod"] - return obj - - -def read_yaml_config(filename: str) -> Dict[str, Any]: - with open(filename, "r") as fp: - config = yaml.safe_load(fp) - if not isinstance(config, dict): - raise RuntimeError("{} does not parse to a dictionary".format(os.path.basename(filename))) - return config - - -def write_yaml_config(config: Dict[str, Any], filename: str): - with open(filename, "w") as fp: - fp.write(yaml.dump(config, sort_keys=False)) - - -def extract_schema(profiles_yml: Dict) -> str: - if "dataset" in profiles_yml: - return str(profiles_yml["dataset"]) - elif "schema" in profiles_yml: - return str(profiles_yml["schema"]) - else: - raise KeyError("No Dataset/Schema defined in profiles.yml") - - -def extract_path(profiles_yml: Dict) -> str: - if "path" in profiles_yml: - return str(profiles_yml["path"]) - else: - raise KeyError("No destination_path defined in profiles.yml") - - -def main(args=None): - TransformCatalog().run(args) diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/utils.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/utils.py deleted file mode 100644 index 5a9b22788f028..0000000000000 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/utils.py +++ /dev/null @@ -1,118 +0,0 @@ -# -# Copyright (c) 2023 Airbyte, Inc., all rights reserved. -# - - -from typing import Set, Union - -from normalization.transform_catalog import dbt_macro - - -def jinja_call(command: Union[str, dbt_macro.Macro]) -> str: - return "{{ " + command + " }}" - - -def remove_jinja(command: str) -> str: - return str(command).replace("{{ ", "").replace(" }}", "") - - -def is_string(property_type) -> bool: - return property_type == "string" or "string" in property_type - - -def is_datetime(definition: dict) -> bool: - return ( - is_string(definition["type"]) - and ("format" in definition.keys()) - and (definition["format"] == "date-time" or "date-time" in definition["format"]) - ) - - -def is_datetime_without_timezone(definition: dict) -> bool: - return is_datetime(definition) and definition.get("airbyte_type") == "timestamp_without_timezone" - - -def is_datetime_with_timezone(definition: dict) -> bool: - return is_datetime(definition) and (not definition.get("airbyte_type") or definition.get("airbyte_type") == "timestamp_with_timezone") - - -def is_date(definition: dict) -> bool: - return ( - is_string(definition["type"]) - and ("format" in definition.keys()) - and (definition["format"] == "date" or "date" in definition["format"]) - ) - - -def is_time(definition: dict) -> bool: - return is_string(definition["type"]) and definition.get("format") == "time" - - -def is_time_with_timezone(definition: dict) -> bool: - return is_time(definition) and definition.get("airbyte_type") == "time_with_timezone" - - -def is_time_without_timezone(definition: dict) -> bool: - return is_time(definition) and definition.get("airbyte_type") == "time_without_timezone" - - -def is_number(property_type) -> bool: - if is_string(property_type): - # Handle union type, give priority to wider scope types - return False - return property_type == "number" or "number" in property_type - - -def is_big_integer(definition: dict) -> bool: - return "airbyte_type" in definition and definition["airbyte_type"] == "big_integer" - - -def is_long(property_type, definition: dict) -> bool: - # Check specifically for {type: number, airbyte_type: integer} - if ( - (property_type == "number" or "number" in property_type) - and "airbyte_type" in definition - and definition["airbyte_type"] == "integer" - ): - return True - if is_string(property_type) or is_number(property_type): - # Handle union type, give priority to wider scope types - return False - return property_type == "integer" or "integer" in property_type - - -def is_boolean(property_type, definition: dict) -> bool: - if is_string(property_type) or is_number(property_type) or is_big_integer(definition) or is_long(property_type, definition): - # Handle union type, give priority to wider scope types - return False - return property_type == "boolean" or "boolean" in property_type - - -def is_array(property_type) -> bool: - return property_type == "array" or "array" in property_type - - -def is_object(property_type) -> bool: - return property_type == "object" or "object" in property_type - - -def is_airbyte_column(name: str) -> bool: - return name.startswith("_airbyte_") - - -def is_simple_property(definition: dict) -> bool: - if "type" not in definition: - property_type = "object" - else: - property_type = definition["type"] - return ( - is_string(property_type) - or is_big_integer(definition) - or is_long(property_type, definition) - or is_number(property_type) - or is_boolean(property_type, definition) - ) - - -def is_combining_node(properties: dict) -> Set[str]: - return set(properties).intersection({"anyOf", "oneOf", "allOf"}) diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_config/__init__.py b/airbyte-integrations/bases/base-normalization/normalization/transform_config/__init__.py deleted file mode 100644 index 94c00f0d6dd56..0000000000000 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_config/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from normalization.transform_config.transform import TransformConfig - -__all__ = ["TransformConfig"] diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_config/profile_base.yml b/airbyte-integrations/bases/base-normalization/normalization/transform_config/profile_base.yml deleted file mode 100644 index bcb6af2fd8e2e..0000000000000 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_config/profile_base.yml +++ /dev/null @@ -1,14 +0,0 @@ -# Top-level configs that apply to all profiles are set here -config: - partial_parse: true - printer_width: 120 - send_anonymous_usage_stats: false - use_colors: true - -normalize: - target: prod - outputs: - prod: -# type: "" -# database-specific configuration here... - diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_config/transform.py b/airbyte-integrations/bases/base-normalization/normalization/transform_config/transform.py deleted file mode 100644 index 7c14e02f64908..0000000000000 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_config/transform.py +++ /dev/null @@ -1,395 +0,0 @@ -# -# Copyright (c) 2023 Airbyte, Inc., all rights reserved. -# - - -import argparse -import json -import os -import pkgutil -import socket -import subprocess -from typing import Any, Dict - -import yaml -from normalization.destination_type import DestinationType - - -class TransformConfig: - def run(self, args): - inputs = self.parse(args) - original_config = self.read_json_config(inputs["config"]) - integration_type = inputs["integration_type"] - transformed_config = self.transform(integration_type, original_config) - self.write_yaml_config(inputs["output_path"], transformed_config, "profiles.yml") - if self.is_ssh_tunnelling(original_config): - self.write_ssh_config(inputs["output_path"], original_config, transformed_config) - - @staticmethod - def parse(args): - parser = argparse.ArgumentParser(add_help=False) - parser.add_argument("--config", type=str, required=True, help="path to original config") - parser.add_argument( - "--integration-type", type=DestinationType, choices=list(DestinationType), required=True, help="type of integration" - ) - parser.add_argument("--out", type=str, required=True, help="path to output transformed config to") - - parsed_args = parser.parse_args(args) - print(str(parsed_args)) - - return { - "config": parsed_args.config, - "integration_type": parsed_args.integration_type, - "output_path": parsed_args.out, - } - - def transform(self, integration_type: DestinationType, config: Dict[str, Any]): - data = pkgutil.get_data(self.__class__.__module__.split(".")[0], "transform_config/profile_base.yml") - if not data: - raise FileExistsError("Failed to load profile_base.yml") - base_profile = yaml.load(data, Loader=yaml.FullLoader) - - transformed_integration_config = { - DestinationType.BIGQUERY.value: self.transform_bigquery, - DestinationType.POSTGRES.value: self.transform_postgres, - DestinationType.REDSHIFT.value: self.transform_redshift, - DestinationType.SNOWFLAKE.value: self.transform_snowflake, - DestinationType.MYSQL.value: self.transform_mysql, - DestinationType.ORACLE.value: self.transform_oracle, - DestinationType.MSSQL.value: self.transform_mssql, - DestinationType.CLICKHOUSE.value: self.transform_clickhouse, - DestinationType.TIDB.value: self.transform_tidb, - DestinationType.DUCKDB.value: self.transform_duckdb, - }[integration_type.value](config) - - # merge pre-populated base_profile with destination-specific configuration. - base_profile["normalize"]["outputs"]["prod"] = transformed_integration_config - - return base_profile - - @staticmethod - def create_file(name, content): - f = open(name, "x") - f.write(content) - f.close() - return os.path.abspath(f.name) - - @staticmethod - def is_ssh_tunnelling(config: Dict[str, Any]) -> bool: - tunnel_methods = ["SSH_KEY_AUTH", "SSH_PASSWORD_AUTH"] - if ( - "tunnel_method" in config.keys() - and "tunnel_method" in config["tunnel_method"] - and config["tunnel_method"]["tunnel_method"].upper() in tunnel_methods - ): - return True - else: - return False - - @staticmethod - def is_port_free(port: int) -> bool: - with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: - try: - s.bind(("localhost", port)) - except Exception as e: - print(f"port {port} unsuitable: {e}") - return False - else: - print(f"port {port} is free") - return True - - @staticmethod - def pick_a_port() -> int: - """ - This function finds a free port, starting with 50001 and adding 1 until we find an open port. - """ - port_to_check = 50001 # just past start of dynamic port range (49152:65535) - while not TransformConfig.is_port_free(port_to_check): - port_to_check += 1 - # error if we somehow hit end of port range - if port_to_check > 65535: - raise RuntimeError("Couldn't find a free port to use.") - return port_to_check - - @staticmethod - def get_ssh_altered_config(config: Dict[str, Any], port_key: str = "port", host_key: str = "host") -> Dict[str, Any]: - """ - This should be called only if ssh tunneling is on. - It will return config with appropriately altered port and host values - """ - # make a copy of config rather than mutate in place - ssh_ready_config = {k: v for k, v in config.items()} - ssh_ready_config[port_key] = TransformConfig.pick_a_port() - ssh_ready_config[host_key] = "localhost" - return ssh_ready_config - - @staticmethod - def transform_bigquery(config: Dict[str, Any]): - print("transform_bigquery") - # https://docs.getdbt.com/reference/warehouse-profiles/bigquery-profile - - project_id = config["project_id"] - dataset_id = config["dataset_id"] - - if ":" in config["dataset_id"]: - splits = config["dataset_id"].split(":") - if len(splits) > 2: - raise ValueError("Invalid format for dataset ID (expected at most one colon)") - project_id, dataset_id = splits - if project_id != config["project_id"]: - raise ValueError( - f"Project ID in dataset ID did not match explicitly-provided project ID: {project_id} and {config['project_id']}" - ) - - dbt_config = { - "type": "bigquery", - "project": project_id, - "dataset": dataset_id, - "priority": config.get("transformation_priority", "interactive"), - "threads": 8, - "retries": 3, - } - if "credentials_json" in config: - dbt_config["method"] = "service-account-json" - dbt_config["keyfile_json"] = json.loads(config["credentials_json"]) - else: - dbt_config["method"] = "oauth" - if "dataset_location" in config: - dbt_config["location"] = config["dataset_location"] - return dbt_config - - @staticmethod - def transform_postgres(config: Dict[str, Any]): - print("transform_postgres") - - if TransformConfig.is_ssh_tunnelling(config): - config = TransformConfig.get_ssh_altered_config(config, port_key="port", host_key="host") - - # https://docs.getdbt.com/reference/warehouse-profiles/postgres-profile - dbt_config = { - "type": "postgres", - "host": config["host"], - "user": config["username"], - "pass": config.get("password", ""), - "port": config["port"], - "dbname": config["database"], - "schema": config["schema"], - "threads": 8, - } - - ssl = config.get("ssl") - if ssl: - ssl_mode = config.get("ssl_mode", {"mode": "allow"}) - dbt_config["sslmode"] = ssl_mode.get("mode") - if ssl_mode["mode"] == "verify-ca": - TransformConfig.create_file("ca.crt", ssl_mode["ca_certificate"]) - dbt_config["sslrootcert"] = "ca.crt" - elif ssl_mode["mode"] == "verify-full": - dbt_config["sslrootcert"] = TransformConfig.create_file("ca.crt", ssl_mode["ca_certificate"]) - dbt_config["sslcert"] = TransformConfig.create_file("client.crt", ssl_mode["client_certificate"]) - client_key = TransformConfig.create_file("client.key", ssl_mode["client_key"]) - subprocess.call("openssl pkcs8 -topk8 -inform PEM -in client.key -outform DER -out client.pk8 -nocrypt", shell=True) - dbt_config["sslkey"] = client_key.replace("client.key", "client.pk8") - - return dbt_config - - @staticmethod - def transform_redshift(config: Dict[str, Any]): - print("transform_redshift") - # https://docs.getdbt.com/reference/warehouse-profiles/redshift-profile - dbt_config = { - "type": "redshift", - "host": config["host"], - "user": config["username"], - "pass": config["password"], - "port": config["port"], - "dbname": config["database"], - "schema": config["schema"], - "threads": 4, - } - return dbt_config - - @staticmethod - def transform_snowflake(config: Dict[str, Any]): - print("transform_snowflake") - # here account is everything before ".snowflakecomputing.com" as it can include account, region & cloud environment information) - account = config["host"].replace(".snowflakecomputing.com", "").replace("http://", "").replace("https://", "") - # https://docs.getdbt.com/reference/warehouse-profiles/snowflake-profile - # snowflake coerces most of these values to uppercase, but if dbt has them as a different casing it has trouble finding the resources it needs. thus we coerce them to upper. - dbt_config = { - "type": "snowflake", - "account": account, - "user": config["username"].upper(), - "role": config["role"].upper(), - "database": config["database"].upper(), - "warehouse": config["warehouse"].upper(), - "schema": config["schema"].upper(), - "threads": 5, - "client_session_keep_alive": False, - "query_tag": "normalization", - "retry_all": True, - "retry_on_database_errors": True, - "connect_retries": 3, - "connect_timeout": 15, - } - - credentials = config.get("credentials", {}) - if credentials.get("auth_type") == "OAuth2.0": - dbt_config["authenticator"] = "oauth" - dbt_config["oauth_client_id"] = credentials["client_id"] - dbt_config["oauth_client_secret"] = credentials["client_secret"] - dbt_config["token"] = credentials["refresh_token"] - elif credentials.get("private_key"): - with open("private_key_path.txt", "w") as f: - f.write(credentials["private_key"]) - dbt_config["private_key_path"] = "private_key_path.txt" - if credentials.get("private_key_password"): - dbt_config["private_key_passphrase"] = credentials["private_key_password"] - elif credentials.get("password"): - dbt_config["password"] = credentials["password"] - else: - dbt_config["password"] = config["password"] - return dbt_config - - @staticmethod - def transform_mysql(config: Dict[str, Any]): - print("transform_mysql") - - if TransformConfig.is_ssh_tunnelling(config): - config = TransformConfig.get_ssh_altered_config(config, port_key="port", host_key="host") - - # https://github.com/dbeatty10/dbt-mysql#configuring-your-profile - dbt_config = { - # MySQL 8.x - type: mysql - # MySQL 5.x - type: mysql5 - "type": config.get("type", "mysql"), - "server": config["host"], - "port": config["port"], - # DBT schema is equivalent to MySQL database - "schema": config["database"], - "database": config["database"], - "username": config["username"], - "password": config.get("password", ""), - } - return dbt_config - - @staticmethod - def transform_oracle(config: Dict[str, Any]): - print("transform_oracle") - # https://github.com/techindicium/dbt-oracle#configure-your-profile - dbt_config = { - "type": "oracle", - "host": config["host"], - "user": config["username"], - "pass": config["password"], - "port": config["port"], - "dbname": config["sid"], - "schema": config["schema"], - "threads": 4, - } - return dbt_config - - @staticmethod - def transform_mssql(config: Dict[str, Any]): - print("transform_mssql") - # https://docs.getdbt.com/reference/warehouse-profiles/mssql-profile - - if TransformConfig.is_ssh_tunnelling(config): - config = TransformConfig.get_ssh_altered_config(config, port_key="port", host_key="host") - config["host"] = "127.0.0.1" # localhost is not supported by dbt-sqlserver. - - dbt_config = { - "type": "sqlserver", - "driver": "ODBC Driver 17 for SQL Server", - "server": config["host"], - "port": config["port"], - "schema": config["schema"], - "database": config["database"], - "user": config["username"], - "password": config["password"], - "threads": 8, - # "authentication": "sql", - # "trusted_connection": True, - } - return dbt_config - - @staticmethod - def transform_clickhouse(config: Dict[str, Any]): - print("transform_clickhouse") - # https://docs.getdbt.com/reference/warehouse-profiles/clickhouse-profile - dbt_config = { - "type": "clickhouse", - "driver": "http", - "verify": False, - "host": config["host"], - "port": config["port"], - "schema": config["database"], - "user": config["username"], - } - if "password" in config: - dbt_config["password"] = config["password"] - - # ssl is an optional configuration and is not present in strict-encrypt config - # if ssl option is not present in the config - default to True - dbt_config["secure"] = config.get("ssl", True) - - return dbt_config - - @staticmethod - def transform_tidb(config: Dict[str, Any]): - print("transform_tidb") - # https://github.com/pingcap/dbt-tidb#profile-configuration - dbt_config = { - "type": "tidb", - "server": config["host"], - "port": config["port"], - "schema": config["database"], - "database": config["database"], - "username": config["username"], - "password": config.get("password", ""), - } - return dbt_config - - @staticmethod - def transform_duckdb(config: Dict[str, Any]): - print("transform_duckdb") - dbt_config = { - "type": "duckdb", - "path": config["destination_path"], - "schema": config["schema"] if "schema" in config else "main", - } - return dbt_config - - @staticmethod - def read_json_config(input_path: str): - with open(input_path, "r") as file: - contents = file.read() - return json.loads(contents) - - @staticmethod - def write_yaml_config(output_path: str, config: Dict[str, Any], filename: str): - if not os.path.exists(output_path): - os.makedirs(output_path) - with open(os.path.join(output_path, filename), "w") as fh: - fh.write(yaml.dump(config)) - - @staticmethod - def write_ssh_config(output_path: str, original_config: Dict[str, Any], transformed_config: Dict[str, Any]): - """ - This function writes a json file with config specific to ssh. - We do this because we need these details to open the ssh tunnel for dbt. - """ - ssh_dict = { - "db_host": original_config["host"], - "db_port": original_config["port"], - "tunnel_map": original_config["tunnel_method"], - "local_port": transformed_config["normalize"]["outputs"]["prod"]["port"], - } - if not os.path.exists(output_path): - os.makedirs(output_path) - with open(os.path.join(output_path, "ssh.json"), "w") as fh: - json.dump(ssh_dict, fh) - - -def main(args=None): - TransformConfig().run(args) diff --git a/airbyte-integrations/bases/base-normalization/oracle.Dockerfile b/airbyte-integrations/bases/base-normalization/oracle.Dockerfile deleted file mode 100644 index 6041ea3cf1c50..0000000000000 --- a/airbyte-integrations/bases/base-normalization/oracle.Dockerfile +++ /dev/null @@ -1,62 +0,0 @@ -# As of today, dbt-oracle doesn't support 1.0.0 -# IF YOU UPGRADE DBT, make sure to also edit these files: -# 1. Remove the "normalization-oracle" entry here https://github.com/airbytehq/airbyte/pull/11267/files#diff-9a3bcae8cb5c56aa30c00548e06eade6ad771f3d4f098f6867ae9a183049dfd8R404 -# 2. Check if mysql.Dockerfile is on DBT 1.0.0 yet; if it is, then revert this entire edit https://github.com/airbytehq/airbyte/pull/11267/files#diff-8880e85b2b5690accc6f15f9292a8589a6eb83564803d57c4ee74e2ee8ede09eR117-R130 -FROM fishtownanalytics/dbt:0.19.1 - -USER root -WORKDIR /tmp -RUN apt-get update && apt-get install -y \ - wget \ - unzip \ - libaio-dev \ - libaio1 -RUN mkdir -p /opt/oracle -RUN wget https://download.oracle.com/otn_software/linux/instantclient/19600/instantclient-basic-linux.x64-19.6.0.0.0dbru.zip -RUN unzip instantclient-basic-linux.x64-19.6.0.0.0dbru.zip -d /opt/oracle -ENV ORACLE_HOME /opt/oracle/instantclient_19_6 -ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$ORACLE_HOME -ENV TNS_ADMIN /opt/oracle/instantclient_19_6/network/admin -RUN pip install cx_Oracle - -COPY --from=airbyte/base-airbyte-protocol-python:0.1.1 /airbyte /airbyte - -RUN apt-get update && apt-get install -y jq sshpass - -WORKDIR /airbyte -COPY entrypoint.sh . -COPY build/sshtunneling.sh . - -WORKDIR /airbyte/normalization_code -COPY normalization ./normalization -COPY setup.py . -COPY dbt-project-template/ ./dbt-template/ -COPY dbt-project-template-oracle/* ./dbt-template/ - -WORKDIR /airbyte/base_python_structs - -# workaround for https://github.com/yaml/pyyaml/issues/601 -# this should be fixed in the airbyte/base-airbyte-protocol-python image -RUN pip install "Cython<3.0" "pyyaml==5.4" --no-build-isolation - -RUN pip install . - -WORKDIR /airbyte/normalization_code -RUN pip install . -# based of https://github.com/techindicium/dbt-oracle/tree/fa9718809840ee73e6072f483233f5150cc9986c -RUN pip install dbt-oracle==0.4.3 - -WORKDIR /airbyte/normalization_code/dbt-template/ - -# Pin MarkupSafe to 2.0.1 per this issue for dbt -# https://github.com/dbt-labs/dbt-core/issues/4745#issuecomment-1044165591 -RUN pip install --force-reinstall MarkupSafe==2.0.1 - -# Download external dbt dependencies -RUN dbt deps - -WORKDIR /airbyte -ENV AIRBYTE_ENTRYPOINT "/airbyte/entrypoint.sh" -ENTRYPOINT ["/airbyte/entrypoint.sh"] - -LABEL io.airbyte.name=airbyte/normalization-oracle diff --git a/airbyte-integrations/bases/base-normalization/redshift.Dockerfile b/airbyte-integrations/bases/base-normalization/redshift.Dockerfile deleted file mode 100644 index 9b8124ebe9ed2..0000000000000 --- a/airbyte-integrations/bases/base-normalization/redshift.Dockerfile +++ /dev/null @@ -1,37 +0,0 @@ -FROM fishtownanalytics/dbt:1.0.0 -COPY --from=airbyte/base-airbyte-protocol-python:0.1.1 /airbyte /airbyte - -# Install SSH Tunneling dependencies -RUN apt-get update && apt-get install -y jq sshpass - -WORKDIR /airbyte -COPY entrypoint.sh . -COPY build/sshtunneling.sh . - -WORKDIR /airbyte/normalization_code -COPY normalization ./normalization -COPY setup.py . -COPY dbt-project-template/ ./dbt-template/ -COPY dbt-project-template-redshift/* ./dbt-template/ - -# Install python dependencies -WORKDIR /airbyte/base_python_structs - -# workaround for https://github.com/yaml/pyyaml/issues/601 -# this should be fixed in the airbyte/base-airbyte-protocol-python image -RUN pip install "Cython<3.0" "pyyaml==5.4" --no-build-isolation - -RUN pip install . - -WORKDIR /airbyte/normalization_code -RUN pip install . - -WORKDIR /airbyte/normalization_code/dbt-template/ -# Download external dbt dependencies -RUN dbt deps - -WORKDIR /airbyte -ENV AIRBYTE_ENTRYPOINT "/airbyte/entrypoint.sh" -ENTRYPOINT ["/airbyte/entrypoint.sh"] - -LABEL io.airbyte.name=airbyte/normalization-redshift diff --git a/airbyte-integrations/bases/base-normalization/requirements.txt b/airbyte-integrations/bases/base-normalization/requirements.txt deleted file mode 100644 index d6e1198b1ab1f..0000000000000 --- a/airbyte-integrations/bases/base-normalization/requirements.txt +++ /dev/null @@ -1 +0,0 @@ --e . diff --git a/airbyte-integrations/bases/base-normalization/setup.cfg b/airbyte-integrations/bases/base-normalization/setup.cfg deleted file mode 100644 index a7f638916e98b..0000000000000 --- a/airbyte-integrations/bases/base-normalization/setup.cfg +++ /dev/null @@ -1,2 +0,0 @@ -[aliases] -test='pytest' diff --git a/airbyte-integrations/bases/base-normalization/setup.py b/airbyte-integrations/bases/base-normalization/setup.py deleted file mode 100644 index cf58f57434d83..0000000000000 --- a/airbyte-integrations/bases/base-normalization/setup.py +++ /dev/null @@ -1,28 +0,0 @@ -# -# Copyright (c) 2023 Airbyte, Inc., all rights reserved. -# - -# Note: cattrs is pinned to the last known working version which does not have conflicts with typing_extensions. Learn more https://airbytehq-team.slack.com/archives/C03C4AVJWG4/p1685546430990049 - -import setuptools - -setuptools.setup( - name="normalization", - description="Normalizes data in the destination.", - author="Airbyte", - author_email="contact@airbyte.io", - url="https://github.com/airbytehq/airbyte", - packages=setuptools.find_packages(), - install_requires=["airbyte-cdk", "pyyaml", "jinja2", "types-PyYAML", "cattrs==22.2.0"], - package_data={"": ["*.yml"]}, - setup_requires=["pytest-runner"], - entry_points={ - "console_scripts": [ - "transform-config=normalization.transform_config.transform:main", - "transform-catalog=normalization.transform_catalog.transform:main", - ], - }, - extras_require={ - "tests": ["airbyte-cdk", "pyyaml", "pytest", "mypy", "types-PyYAML"], - }, -) diff --git a/airbyte-integrations/bases/base-normalization/setup/snowflake.md b/airbyte-integrations/bases/base-normalization/setup/snowflake.md deleted file mode 100644 index b536c67950beb..0000000000000 --- a/airbyte-integrations/bases/base-normalization/setup/snowflake.md +++ /dev/null @@ -1,34 +0,0 @@ -# Snowflake Setup - -## Setting up an integration user - -Here is the SQL to make an integration environment in Snowflake for this source via an ACCOUNTADMIN. Be sure to give a real password. - -```sql -CREATE WAREHOUSE INTEGRATION_TEST_WAREHOUSE_NORMALIZATION WITH WAREHOUSE_SIZE = 'XSMALL' WAREHOUSE_TYPE = 'STANDARD' AUTO_SUSPEND = 600 AUTO_RESUME = TRUE; - -CREATE DATABASE INTEGRATION_TEST_NORMALIZATION; - -CREATE ROLE INTEGRATION_TESTER_NORMALIZATION; - -GRANT ALL PRIVILEGES ON WAREHOUSE INTEGRATION_TEST_WAREHOUSE_NORMALIZATION TO ROLE INTEGRATION_TESTER_NORMALIZATION; -GRANT ALL PRIVILEGES ON DATABASE INTEGRATION_TEST_NORMALIZATION TO ROLE INTEGRATION_TESTER_NORMALIZATION; -GRANT ALL PRIVILEGES ON FUTURE SCHEMAS IN DATABASE INTEGRATION_TEST_NORMALIZATION TO ROLE INTEGRATION_TESTER_NORMALIZATION; -GRANT ALL PRIVILEGES ON FUTURE TABLES IN DATABASE INTEGRATION_TEST_NORMALIZATION TO ROLE INTEGRATION_TESTER_NORMALIZATION; - -# Add real password here and remove this comment -CREATE USER INTEGRATION_TEST_USER_NORMALIZATION PASSWORD='test' DEFAULT_ROLE=INTEGRATION_TESTER_NORMALIZATION DEFAULT_WAREHOUSE=INTEGRATION_TEST_WAREHOUSE_NORMALIZATION MUST_CHANGE_PASSWORD=false; - -GRANT ROLE INTEGRATION_TESTER_NORMALIZATION TO USER INTEGRATION_TEST_USER_NORMALIZATION; - -CREATE SCHEMA INTEGRATION_TEST_NORMALIZATION.TEST_SCHEMA; -``` - -If you ever need to start over, use this: - -```sql -DROP DATABASE IF EXISTS INTEGRATION_TEST_NORMALIZATION; -DROP USER IF EXISTS INTEGRATION_TEST_USER_NORMALIZATION; -DROP ROLE IF EXISTS INTEGRATION_TESTER_NORMALIZATION; -DROP WAREHOUSE IF EXISTS INTEGRATION_TEST_WAREHOUSE_NORMALIZATION; -``` diff --git a/airbyte-integrations/bases/base-normalization/snowflake.Dockerfile b/airbyte-integrations/bases/base-normalization/snowflake.Dockerfile deleted file mode 100644 index 41d74e50621a4..0000000000000 --- a/airbyte-integrations/bases/base-normalization/snowflake.Dockerfile +++ /dev/null @@ -1,38 +0,0 @@ -FROM fishtownanalytics/dbt:1.0.0 -COPY --from=airbyte/base-airbyte-protocol-python:0.1.1 /airbyte /airbyte - -# Install SSH Tunneling dependencies -RUN apt-get update && apt-get install -y jq sshpass - -WORKDIR /airbyte -COPY entrypoint.sh . -COPY build/sshtunneling.sh . - -WORKDIR /airbyte/normalization_code -COPY normalization ./normalization -COPY setup.py . -COPY dbt-project-template/ ./dbt-template/ -COPY dbt-project-template-snowflake/* ./dbt-template/ - -# Install python dependencies -WORKDIR /airbyte/base_python_structs - -# workaround for https://github.com/yaml/pyyaml/issues/601 -# this should be fixed in the airbyte/base-airbyte-protocol-python image -RUN pip install "Cython<3.0" "pyyaml==5.4" --no-build-isolation - -RUN pip install . - -WORKDIR /airbyte/normalization_code -RUN pip install . - -WORKDIR /airbyte/normalization_code/dbt-template/ -# Download external dbt dependencies -RUN dbt deps - -WORKDIR /airbyte -ENV AIRBYTE_ENTRYPOINT "/airbyte/entrypoint.sh" -ENTRYPOINT ["/airbyte/entrypoint.sh"] - -LABEL io.airbyte.version=0.2.5 -LABEL io.airbyte.name=airbyte/normalization-snowflake diff --git a/airbyte-integrations/bases/base-normalization/tidb.Dockerfile b/airbyte-integrations/bases/base-normalization/tidb.Dockerfile deleted file mode 100644 index a749f88a66d8c..0000000000000 --- a/airbyte-integrations/bases/base-normalization/tidb.Dockerfile +++ /dev/null @@ -1,37 +0,0 @@ -FROM fishtownanalytics/dbt:1.0.0 -COPY --from=airbyte/base-airbyte-protocol-python:0.1.1 /airbyte /airbyte - -# Install SSH Tunneling dependencies -RUN apt-get update && apt-get install -y jq sshpass - -WORKDIR /airbyte -COPY entrypoint.sh . -COPY build/sshtunneling.sh . - -WORKDIR /airbyte/normalization_code -COPY normalization ./normalization -COPY setup.py . -COPY dbt-project-template/ ./dbt-template/ - -# Install python dependencies -WORKDIR /airbyte/base_python_structs - -# workaround for https://github.com/yaml/pyyaml/issues/601 -# this should be fixed in the airbyte/base-airbyte-protocol-python image -RUN pip install "Cython<3.0" "pyyaml==5.4" --no-build-isolation - -RUN pip install . - -WORKDIR /airbyte/normalization_code -RUN pip install . -RUN pip install dbt-tidb==1.0.1 - -WORKDIR /airbyte/normalization_code/dbt-template/ -# Download external dbt dependencies -RUN dbt deps - -WORKDIR /airbyte -ENV AIRBYTE_ENTRYPOINT "/airbyte/entrypoint.sh" -ENTRYPOINT ["/airbyte/entrypoint.sh"] - -LABEL io.airbyte.name=airbyte/normalization-tidb diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/private_key_path.txt b/airbyte-integrations/bases/base-normalization/unit_tests/private_key_path.txt deleted file mode 100644 index 8b98a34afc485..0000000000000 --- a/airbyte-integrations/bases/base-normalization/unit_tests/private_key_path.txt +++ /dev/null @@ -1 +0,0 @@ -AIRBYTE_PRIVATE_KEY \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog.json deleted file mode 100644 index 7ffa2f36d4421..0000000000000 --- a/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog.json +++ /dev/null @@ -1,62 +0,0 @@ -{ - "streams": [ - { - "stream": { - "name": "postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine", - "json_schema": { - "type": ["null", "object"], - "properties": { - "id": { - "type": ["null", "string"] - } - } - }, - "supported_sync_modes": ["incremental"], - "source_defined_cursor": true, - "default_cursor_field": [] - }, - "sync_mode": "incremental", - "cursor_field": [], - "destination_sync_mode": "append" - }, - { - "stream": { - "name": "postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine", - "namespace": "another", - "json_schema": { - "type": ["null", "object"], - "properties": { - "id": { - "type": ["null", "string"] - } - } - }, - "supported_sync_modes": ["incremental"], - "source_defined_cursor": true, - "default_cursor_field": [] - }, - "sync_mode": "incremental", - "cursor_field": [], - "destination_sync_mode": "append" - }, - { - "stream": { - "name": "postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine", - "json_schema": { - "type": ["null", "object"], - "properties": { - "id": { - "type": ["null", "string"] - } - } - }, - "supported_sync_modes": ["incremental"], - "source_defined_cursor": true, - "default_cursor_field": [] - }, - "sync_mode": "incremental", - "cursor_field": [], - "destination_sync_mode": "append" - } - ] -} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog_expected_clickhouse_names.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog_expected_clickhouse_names.json deleted file mode 100644 index 84f4fa7a50eb6..0000000000000 --- a/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog_expected_clickhouse_names.json +++ /dev/null @@ -1,32 +0,0 @@ -{ - "_airbyte_another.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { - "file": "postgres_has_a_64_ch__destinations_are_fine", - "schema": "_airbyte_another", - "table": "postgres_has_a_64_ch__destinations_are_fine" - }, - "_airbyte_schema_test.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine": { - "file": "postgres_has_a_64_ch__inations_are_fine_d2b", - "schema": "_airbyte_schema_test", - "table": "postgres_has_a_64_ch__inations_are_fine_d2b" - }, - "_airbyte_schema_test.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { - "file": "postgres_has_a_64_ch__inations_are_fine_e5a", - "schema": "_airbyte_schema_test", - "table": "postgres_has_a_64_ch__inations_are_fine_e5a" - }, - "another.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { - "file": "postgres_has_a_64_ch__destinations_are_fine", - "schema": "another", - "table": "postgres_has_a_64_ch__destinations_are_fine" - }, - "schema_test.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine": { - "file": "postgres_has_a_64_ch__inations_are_fine_d2b", - "schema": "schema_test", - "table": "postgres_has_a_64_ch__inations_are_fine_d2b" - }, - "schema_test.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { - "file": "postgres_has_a_64_ch__inations_are_fine_e5a", - "schema": "schema_test", - "table": "postgres_has_a_64_ch__inations_are_fine_e5a" - } -} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog_expected_duckdb_names.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog_expected_duckdb_names.json deleted file mode 100644 index 160fc5b70b759..0000000000000 --- a/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog_expected_duckdb_names.json +++ /dev/null @@ -1,32 +0,0 @@ -{ - "_airbyte_another.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { - "file": "postgres_has_a_64_cha__destinations_are_fine", - "schema": "_airbyte_another", - "table": "postgres_has_a_64_cha__destinations_are_fine" - }, - "_airbyte_schema_test.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine": { - "file": "postgres_has_a_64_cha__inations_are_fine_d2b", - "schema": "_airbyte_schema_test", - "table": "postgres_has_a_64_cha__inations_are_fine_d2b" - }, - "_airbyte_schema_test.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { - "file": "postgres_has_a_64_cha__inations_are_fine_e5a", - "schema": "_airbyte_schema_test", - "table": "postgres_has_a_64_cha__inations_are_fine_e5a" - }, - "another.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { - "file": "postgres_has_a_64_cha__destinations_are_fine", - "schema": "another", - "table": "postgres_has_a_64_cha__destinations_are_fine" - }, - "schema_test.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine": { - "file": "postgres_has_a_64_cha__inations_are_fine_d2b", - "schema": "schema_test", - "table": "postgres_has_a_64_cha__inations_are_fine_d2b" - }, - "schema_test.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { - "file": "postgres_has_a_64_cha__inations_are_fine_e5a", - "schema": "schema_test", - "table": "postgres_has_a_64_cha__inations_are_fine_e5a" - } -} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog_expected_mssql_names.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog_expected_mssql_names.json deleted file mode 100644 index 160fc5b70b759..0000000000000 --- a/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog_expected_mssql_names.json +++ /dev/null @@ -1,32 +0,0 @@ -{ - "_airbyte_another.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { - "file": "postgres_has_a_64_cha__destinations_are_fine", - "schema": "_airbyte_another", - "table": "postgres_has_a_64_cha__destinations_are_fine" - }, - "_airbyte_schema_test.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine": { - "file": "postgres_has_a_64_cha__inations_are_fine_d2b", - "schema": "_airbyte_schema_test", - "table": "postgres_has_a_64_cha__inations_are_fine_d2b" - }, - "_airbyte_schema_test.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { - "file": "postgres_has_a_64_cha__inations_are_fine_e5a", - "schema": "_airbyte_schema_test", - "table": "postgres_has_a_64_cha__inations_are_fine_e5a" - }, - "another.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { - "file": "postgres_has_a_64_cha__destinations_are_fine", - "schema": "another", - "table": "postgres_has_a_64_cha__destinations_are_fine" - }, - "schema_test.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine": { - "file": "postgres_has_a_64_cha__inations_are_fine_d2b", - "schema": "schema_test", - "table": "postgres_has_a_64_cha__inations_are_fine_d2b" - }, - "schema_test.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { - "file": "postgres_has_a_64_cha__inations_are_fine_e5a", - "schema": "schema_test", - "table": "postgres_has_a_64_cha__inations_are_fine_e5a" - } -} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog_expected_mysql_names.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog_expected_mysql_names.json deleted file mode 100644 index 160fc5b70b759..0000000000000 --- a/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog_expected_mysql_names.json +++ /dev/null @@ -1,32 +0,0 @@ -{ - "_airbyte_another.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { - "file": "postgres_has_a_64_cha__destinations_are_fine", - "schema": "_airbyte_another", - "table": "postgres_has_a_64_cha__destinations_are_fine" - }, - "_airbyte_schema_test.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine": { - "file": "postgres_has_a_64_cha__inations_are_fine_d2b", - "schema": "_airbyte_schema_test", - "table": "postgres_has_a_64_cha__inations_are_fine_d2b" - }, - "_airbyte_schema_test.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { - "file": "postgres_has_a_64_cha__inations_are_fine_e5a", - "schema": "_airbyte_schema_test", - "table": "postgres_has_a_64_cha__inations_are_fine_e5a" - }, - "another.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { - "file": "postgres_has_a_64_cha__destinations_are_fine", - "schema": "another", - "table": "postgres_has_a_64_cha__destinations_are_fine" - }, - "schema_test.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine": { - "file": "postgres_has_a_64_cha__inations_are_fine_d2b", - "schema": "schema_test", - "table": "postgres_has_a_64_cha__inations_are_fine_d2b" - }, - "schema_test.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { - "file": "postgres_has_a_64_cha__inations_are_fine_e5a", - "schema": "schema_test", - "table": "postgres_has_a_64_cha__inations_are_fine_e5a" - } -} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog_expected_names.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog_expected_names.json deleted file mode 100644 index 760c94d2803d6..0000000000000 --- a/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog_expected_names.json +++ /dev/null @@ -1,32 +0,0 @@ -{ - "_airbyte_another.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { - "file": "_airbyte_another_postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine", - "schema": "_airbyte_another", - "table": "postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine" - }, - "_airbyte_schema_test.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine": { - "file": "postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine", - "schema": "_airbyte_schema_test", - "table": "postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine" - }, - "_airbyte_schema_test.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { - "file": "_airbyte_schema_test_postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine", - "schema": "_airbyte_schema_test", - "table": "postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine" - }, - "another.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { - "file": "another_postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine", - "schema": "another", - "table": "postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine" - }, - "schema_test.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine": { - "file": "postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine", - "schema": "schema_test", - "table": "postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine" - }, - "schema_test.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { - "file": "schema_test_postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine", - "schema": "schema_test", - "table": "postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine" - } -} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog_expected_oracle_names.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog_expected_oracle_names.json deleted file mode 100644 index 08417424a6c5b..0000000000000 --- a/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog_expected_oracle_names.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "another.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { - "file": "another_postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine", - "schema": "another", - "table": "postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine" - }, - "schema_test.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine": { - "file": "postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine", - "schema": "schema_test", - "table": "postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine" - }, - "schema_test.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { - "file": "schema_test_postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine", - "schema": "schema_test", - "table": "postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine" - } -} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog_expected_postgres_names.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog_expected_postgres_names.json deleted file mode 100644 index 84f4fa7a50eb6..0000000000000 --- a/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog_expected_postgres_names.json +++ /dev/null @@ -1,32 +0,0 @@ -{ - "_airbyte_another.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { - "file": "postgres_has_a_64_ch__destinations_are_fine", - "schema": "_airbyte_another", - "table": "postgres_has_a_64_ch__destinations_are_fine" - }, - "_airbyte_schema_test.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine": { - "file": "postgres_has_a_64_ch__inations_are_fine_d2b", - "schema": "_airbyte_schema_test", - "table": "postgres_has_a_64_ch__inations_are_fine_d2b" - }, - "_airbyte_schema_test.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { - "file": "postgres_has_a_64_ch__inations_are_fine_e5a", - "schema": "_airbyte_schema_test", - "table": "postgres_has_a_64_ch__inations_are_fine_e5a" - }, - "another.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { - "file": "postgres_has_a_64_ch__destinations_are_fine", - "schema": "another", - "table": "postgres_has_a_64_ch__destinations_are_fine" - }, - "schema_test.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine": { - "file": "postgres_has_a_64_ch__inations_are_fine_d2b", - "schema": "schema_test", - "table": "postgres_has_a_64_ch__inations_are_fine_d2b" - }, - "schema_test.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { - "file": "postgres_has_a_64_ch__inations_are_fine_e5a", - "schema": "schema_test", - "table": "postgres_has_a_64_ch__inations_are_fine_e5a" - } -} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog_expected_tidb_names.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog_expected_tidb_names.json deleted file mode 100644 index 160fc5b70b759..0000000000000 --- a/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog_expected_tidb_names.json +++ /dev/null @@ -1,32 +0,0 @@ -{ - "_airbyte_another.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { - "file": "postgres_has_a_64_cha__destinations_are_fine", - "schema": "_airbyte_another", - "table": "postgres_has_a_64_cha__destinations_are_fine" - }, - "_airbyte_schema_test.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine": { - "file": "postgres_has_a_64_cha__inations_are_fine_d2b", - "schema": "_airbyte_schema_test", - "table": "postgres_has_a_64_cha__inations_are_fine_d2b" - }, - "_airbyte_schema_test.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { - "file": "postgres_has_a_64_cha__inations_are_fine_e5a", - "schema": "_airbyte_schema_test", - "table": "postgres_has_a_64_cha__inations_are_fine_e5a" - }, - "another.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { - "file": "postgres_has_a_64_cha__destinations_are_fine", - "schema": "another", - "table": "postgres_has_a_64_cha__destinations_are_fine" - }, - "schema_test.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine": { - "file": "postgres_has_a_64_cha__inations_are_fine_d2b", - "schema": "schema_test", - "table": "postgres_has_a_64_cha__inations_are_fine_d2b" - }, - "schema_test.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { - "file": "postgres_has_a_64_cha__inations_are_fine_e5a", - "schema": "schema_test", - "table": "postgres_has_a_64_cha__inations_are_fine_e5a" - } -} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog.json deleted file mode 100644 index 94e6b4a798d9a..0000000000000 --- a/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog.json +++ /dev/null @@ -1,425 +0,0 @@ -{ - "streams": [ - { - "stream": { - "name": "adcreatives", - "json_schema": { - "type": ["null", "object"], - "properties": { - "id": { - "type": ["null", "string"] - }, - "body": { - "type": ["null", "string"] - }, - "name": { - "type": ["null", "string"] - }, - "title": { - "type": ["null", "string"] - }, - "status": { - "type": ["null", "string"] - }, - "adlabels": { - "type": ["null", "array"], - "items": { - "type": "object", - "properties": { - "id": { - "type": "string" - }, - "name": { - "type": "string" - }, - "created_time": { - "type": "string", - "format": "date-time" - }, - "updated_time": { - "type": "string", - "format": "date-time" - } - } - } - }, - "link_url": { - "type": ["null", "string"] - }, - "image_crops": { - "type": ["null", "object"], - "properties": { - "100x72": { - "type": ["null", "array"], - "items": { - "type": ["null", "array"], - "items": { - "type": ["null", "integer"] - } - } - }, - "90x160": { - "type": ["null", "array"], - "items": { - "type": ["null", "array"], - "items": { - "type": ["null", "integer"] - } - } - }, - "100x100": { - "type": ["null", "array"], - "items": { - "type": ["null", "array"], - "items": { - "type": ["null", "integer"] - } - } - }, - "191x100": { - "type": ["null", "array"], - "items": { - "type": ["null", "array"], - "items": { - "type": ["null", "integer"] - } - } - }, - "400x150": { - "type": ["null", "array"], - "items": { - "type": ["null", "array"], - "items": { - "type": ["null", "integer"] - } - } - }, - "400x500": { - "type": ["null", "array"], - "items": { - "type": ["null", "array"], - "items": { - "type": ["null", "integer"] - } - } - }, - "600x360": { - "type": ["null", "array"], - "items": { - "type": ["null", "array"], - "items": { - "type": ["null", "integer"] - } - } - } - } - }, - "object_story_spec": { - "type": ["null", "object"], - "properties": { - "page_id": { - "type": ["null", "string"] - }, - "link_data": { - "type": ["null", "object"], - "properties": { - "link": { - "type": ["null", "string"] - }, - "name": { - "type": ["null", "string"] - }, - "caption": { - "type": ["null", "string"] - }, - "message": { - "type": ["null", "string"] - }, - "image_crops": { - "type": ["null", "object"], - "properties": { - "100x72": { - "type": ["null", "array"], - "items": { - "type": ["null", "array"], - "items": { - "type": ["null", "integer"] - } - } - }, - "90x160": { - "type": ["null", "array"], - "items": { - "type": ["null", "array"], - "items": { - "type": ["null", "integer"] - } - } - }, - "100x100": { - "type": ["null", "array"], - "items": { - "type": ["null", "array"], - "items": { - "type": ["null", "integer"] - } - } - }, - "191x100": { - "type": ["null", "array"], - "items": { - "type": ["null", "array"], - "items": { - "type": ["null", "integer"] - } - } - }, - "400x150": { - "type": ["null", "array"], - "items": { - "type": ["null", "array"], - "items": { - "type": ["null", "integer"] - } - } - }, - "400x500": { - "type": ["null", "array"], - "items": { - "type": ["null", "array"], - "items": { - "type": ["null", "integer"] - } - } - }, - "600x360": { - "type": ["null", "array"], - "items": { - "type": ["null", "array"], - "items": { - "type": ["null", "integer"] - } - } - } - } - }, - "app_link_spec": { - "type": ["null", "object"], - "properties": { - "ios": { - "type": ["null", "array"], - "items": { - "type": "object", - "properties": { - "url": { - "type": "string" - }, - "app_name": { - "type": "string" - }, - "app_store_id": { - "type": "string" - } - } - } - }, - "ipad": { - "type": ["null", "array"], - "items": { - "type": "object", - "properties": { - "url": { - "type": "string" - }, - "app_name": { - "type": "string" - }, - "app_store_id": { - "type": "string" - } - } - } - }, - "iphone": { - "type": ["null", "array"], - "items": { - "type": "object", - "properties": { - "url": { - "type": "string" - }, - "app_name": { - "type": "string" - }, - "app_store_id": { - "type": "string" - } - } - } - }, - "android": { - "type": ["null", "array"], - "items": { - "type": "object", - "properties": { - "url": { - "type": "string" - }, - "class": { - "type": "string" - }, - "package": { - "type": "string" - }, - "app_name": { - "type": "string" - } - } - } - } - } - } - }, - "text_data": { - "type": ["null", "object"], - "properties": { - "message": { - "type": "string" - } - } - }, - "photo_data": { - "type": ["null", "object"], - "properties": { - "url": { - "type": ["null", "string"] - }, - "caption": { - "type": "string" - }, - "image_hash": { - "type": ["null", "string"] - }, - "page_welcome_message": { - "type": ["null", "string"] - }, - "branded_content_sponsor_page_id": { - "type": ["null", "string"] - }, - "branded_content_sponsor_relationship": { - "type": ["null", "string"] - } - } - }, - "instagram_actor_id": { - "type": ["null", "string"] - } - } - }, - "template_url_spec": { - "type": ["null", "object"], - "properties": { - "ios": { - "type": ["null", "object"], - "properties": { - "url": { - "type": "string" - }, - "app_name": { - "type": "string" - }, - "app_store_id": { - "type": "string" - } - } - }, - "web": { - "type": ["null", "object"], - "properties": { - "url": { - "type": "string" - }, - "should_fallback": { - "type": "string" - } - } - }, - "ipad": { - "type": ["null", "object"], - "properties": { - "url": { - "type": "string" - }, - "app_name": { - "type": "string" - }, - "app_store_id": { - "type": "string" - } - } - }, - "config": { - "type": ["null", "object"], - "properties": { - "app_id": { - "type": "string" - } - } - }, - "iphone": { - "type": ["null", "object"], - "properties": { - "url": { - "type": "string" - }, - "app_name": { - "type": "string" - }, - "app_store_id": { - "type": "string" - } - } - }, - "android": { - "type": ["null", "object"], - "properties": { - "url": { - "type": "string" - }, - "package": { - "type": "string" - }, - "app_name": { - "type": "string" - } - } - }, - "windows_phone": { - "type": ["null", "object"], - "properties": { - "url": { - "type": "string" - }, - "app_id": { - "type": "string" - }, - "app_name": { - "type": "string" - } - } - } - } - } - } - } - }, - "supported_sync_modes": ["full_refresh"], - "source_defined_cursor": false, - "default_cursor_field": [] - }, - "sync_mode": "full_refresh", - "cursor_field": [], - "destination_sync_mode": "overwrite" - } - ] -} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_bigquery_names.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_bigquery_names.json deleted file mode 100644 index 9b9c347db4a5a..0000000000000 --- a/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_bigquery_names.json +++ /dev/null @@ -1,252 +0,0 @@ -{ - "_airbyte_schema_test.adcreatives.adcreatives": { - "file": "adcreatives", - "schema": "_airbyte_schema_test", - "table": "adcreatives" - }, - "_airbyte_schema_test.adcreatives_adlabels.adlabels": { - "file": "adcreatives_adlabels", - "schema": "_airbyte_schema_test", - "table": "adcreatives_adlabels" - }, - "_airbyte_schema_test.adcreatives_image_crops.image_crops": { - "file": "adcreatives_image_crops", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops" - }, - "_airbyte_schema_test.adcreatives_image_crops_100x100.100x100": { - "file": "adcreatives_image_crops_100x100", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops_100x100" - }, - "_airbyte_schema_test.adcreatives_image_crops_100x72.100x72": { - "file": "adcreatives_image_crops_100x72", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops_100x72" - }, - "_airbyte_schema_test.adcreatives_image_crops_191x100.191x100": { - "file": "adcreatives_image_crops_191x100", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops_191x100" - }, - "_airbyte_schema_test.adcreatives_image_crops_400x150.400x150": { - "file": "adcreatives_image_crops_400x150", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops_400x150" - }, - "_airbyte_schema_test.adcreatives_image_crops_400x500.400x500": { - "file": "adcreatives_image_crops_400x500", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops_400x500" - }, - "_airbyte_schema_test.adcreatives_image_crops_600x360.600x360": { - "file": "adcreatives_image_crops_600x360", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops_600x360" - }, - "_airbyte_schema_test.adcreatives_image_crops_90x160.90x160": { - "file": "adcreatives_image_crops_90x160", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops_90x160" - }, - "_airbyte_schema_test.adcreatives_object_story_spec.object_story_spec": { - "file": "adcreatives_object_story_spec", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_story_spec" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data.link_data": { - "file": "adcreatives_object_story_spec_link_data", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_story_spec_link_data" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec.app_link_spec": { - "file": "adcreatives_object_story_spec_link_data_app_link_spec", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_story_spec_link_data_app_link_spec" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_android.android": { - "file": "adcreatives_object_story_spec_link_data_app_link_spec_android", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_story_spec_link_data_app_link_spec_android" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ios.ios": { - "file": "adcreatives_object_story_spec_link_data_app_link_spec_ios", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_story_spec_link_data_app_link_spec_ios" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ipad.ipad": { - "file": "adcreatives_object_story_spec_link_data_app_link_spec_ipad", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_story_spec_link_data_app_link_spec_ipad" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_iphone.iphone": { - "file": "adcreatives_object_story_spec_link_data_app_link_spec_iphone", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_story_spec_link_data_app_link_spec_iphone" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops.image_crops": { - "file": "adcreatives_object_story_spec_link_data_image_crops", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_story_spec_link_data_image_crops" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_100x100.100x100": { - "file": "adcreatives_object_story_spec_link_data_image_crops_100x100", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_story_spec_link_data_image_crops_100x100" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_100x72.100x72": { - "file": "adcreatives_object_story_spec_link_data_image_crops_100x72", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_story_spec_link_data_image_crops_100x72" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_191x100.191x100": { - "file": "adcreatives_object_story_spec_link_data_image_crops_191x100", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_story_spec_link_data_image_crops_191x100" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_400x150.400x150": { - "file": "adcreatives_object_story_spec_link_data_image_crops_400x150", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_story_spec_link_data_image_crops_400x150" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_400x500.400x500": { - "file": "adcreatives_object_story_spec_link_data_image_crops_400x500", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_story_spec_link_data_image_crops_400x500" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_600x360.600x360": { - "file": "adcreatives_object_story_spec_link_data_image_crops_600x360", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_story_spec_link_data_image_crops_600x360" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_90x160.90x160": { - "file": "adcreatives_object_story_spec_link_data_image_crops_90x160", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_story_spec_link_data_image_crops_90x160" - }, - "schema_test.adcreatives.adcreatives": { - "file": "adcreatives", - "schema": "schema_test", - "table": "adcreatives" - }, - "schema_test.adcreatives_adlabels.adlabels": { - "file": "adcreatives_adlabels", - "schema": "schema_test", - "table": "adcreatives_adlabels" - }, - "schema_test.adcreatives_image_crops.image_crops": { - "file": "adcreatives_image_crops", - "schema": "schema_test", - "table": "adcreatives_image_crops" - }, - "schema_test.adcreatives_image_crops_100x100.100x100": { - "file": "adcreatives_image_crops_100x100", - "schema": "schema_test", - "table": "adcreatives_image_crops_100x100" - }, - "schema_test.adcreatives_image_crops_100x72.100x72": { - "file": "adcreatives_image_crops_100x72", - "schema": "schema_test", - "table": "adcreatives_image_crops_100x72" - }, - "schema_test.adcreatives_image_crops_191x100.191x100": { - "file": "adcreatives_image_crops_191x100", - "schema": "schema_test", - "table": "adcreatives_image_crops_191x100" - }, - "schema_test.adcreatives_image_crops_400x150.400x150": { - "file": "adcreatives_image_crops_400x150", - "schema": "schema_test", - "table": "adcreatives_image_crops_400x150" - }, - "schema_test.adcreatives_image_crops_400x500.400x500": { - "file": "adcreatives_image_crops_400x500", - "schema": "schema_test", - "table": "adcreatives_image_crops_400x500" - }, - "schema_test.adcreatives_image_crops_600x360.600x360": { - "file": "adcreatives_image_crops_600x360", - "schema": "schema_test", - "table": "adcreatives_image_crops_600x360" - }, - "schema_test.adcreatives_image_crops_90x160.90x160": { - "file": "adcreatives_image_crops_90x160", - "schema": "schema_test", - "table": "adcreatives_image_crops_90x160" - }, - "schema_test.adcreatives_object_story_spec.object_story_spec": { - "file": "adcreatives_object_story_spec", - "schema": "schema_test", - "table": "adcreatives_object_story_spec" - }, - "schema_test.adcreatives_object_story_spec_link_data.link_data": { - "file": "adcreatives_object_story_spec_link_data", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data" - }, - "schema_test.adcreatives_object_story_spec_link_data_app_link_spec.app_link_spec": { - "file": "adcreatives_object_story_spec_link_data_app_link_spec", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data_app_link_spec" - }, - "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_android.android": { - "file": "adcreatives_object_story_spec_link_data_app_link_spec_android", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data_app_link_spec_android" - }, - "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ios.ios": { - "file": "adcreatives_object_story_spec_link_data_app_link_spec_ios", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data_app_link_spec_ios" - }, - "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ipad.ipad": { - "file": "adcreatives_object_story_spec_link_data_app_link_spec_ipad", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data_app_link_spec_ipad" - }, - "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_iphone.iphone": { - "file": "adcreatives_object_story_spec_link_data_app_link_spec_iphone", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data_app_link_spec_iphone" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops.image_crops": { - "file": "adcreatives_object_story_spec_link_data_image_crops", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data_image_crops" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_100x100.100x100": { - "file": "adcreatives_object_story_spec_link_data_image_crops_100x100", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data_image_crops_100x100" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_100x72.100x72": { - "file": "adcreatives_object_story_spec_link_data_image_crops_100x72", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data_image_crops_100x72" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_191x100.191x100": { - "file": "adcreatives_object_story_spec_link_data_image_crops_191x100", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data_image_crops_191x100" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_400x150.400x150": { - "file": "adcreatives_object_story_spec_link_data_image_crops_400x150", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data_image_crops_400x150" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_400x500.400x500": { - "file": "adcreatives_object_story_spec_link_data_image_crops_400x500", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data_image_crops_400x500" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_600x360.600x360": { - "file": "adcreatives_object_story_spec_link_data_image_crops_600x360", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data_image_crops_600x360" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_90x160.90x160": { - "file": "adcreatives_object_story_spec_link_data_image_crops_90x160", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data_image_crops_90x160" - } -} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_clickhouse_names.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_clickhouse_names.json deleted file mode 100644 index 450b8a7f4bfc8..0000000000000 --- a/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_clickhouse_names.json +++ /dev/null @@ -1,252 +0,0 @@ -{ - "_airbyte_schema_test.adcreatives.adcreatives": { - "file": "adcreatives", - "schema": "_airbyte_schema_test", - "table": "adcreatives" - }, - "_airbyte_schema_test.adcreatives_adlabels.adlabels": { - "file": "adcreatives_adlabels", - "schema": "_airbyte_schema_test", - "table": "adcreatives_adlabels" - }, - "_airbyte_schema_test.adcreatives_image_crops.image_crops": { - "file": "adcreatives_image_crops", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops" - }, - "_airbyte_schema_test.adcreatives_image_crops_100x100.100x100": { - "file": "adcreatives_image_crops_100x100", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops_100x100" - }, - "_airbyte_schema_test.adcreatives_image_crops_100x72.100x72": { - "file": "adcreatives_image_crops_100x72", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops_100x72" - }, - "_airbyte_schema_test.adcreatives_image_crops_191x100.191x100": { - "file": "adcreatives_image_crops_191x100", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops_191x100" - }, - "_airbyte_schema_test.adcreatives_image_crops_400x150.400x150": { - "file": "adcreatives_image_crops_400x150", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops_400x150" - }, - "_airbyte_schema_test.adcreatives_image_crops_400x500.400x500": { - "file": "adcreatives_image_crops_400x500", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops_400x500" - }, - "_airbyte_schema_test.adcreatives_image_crops_600x360.600x360": { - "file": "adcreatives_image_crops_600x360", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops_600x360" - }, - "_airbyte_schema_test.adcreatives_image_crops_90x160.90x160": { - "file": "adcreatives_image_crops_90x160", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops_90x160" - }, - "_airbyte_schema_test.adcreatives_object_story_spec.object_story_spec": { - "file": "adcreatives_object_story_spec", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_story_spec" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data.link_data": { - "file": "adcreatives_object_story_spec_link_data", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_story_spec_link_data" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec.app_link_spec": { - "file": "adcreatives_object_s__nk_data_app_link_spec", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_s__nk_data_app_link_spec" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_android.android": { - "file": "adcreatives_object_s__app_link_spec_android", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_s__app_link_spec_android" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ios.ios": { - "file": "adcreatives_object_s__ata_app_link_spec_ios", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_s__ata_app_link_spec_ios" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ipad.ipad": { - "file": "adcreatives_object_s__ta_app_link_spec_ipad", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_s__ta_app_link_spec_ipad" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_iphone.iphone": { - "file": "adcreatives_object_s___app_link_spec_iphone", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_s___app_link_spec_iphone" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops.image_crops": { - "file": "adcreatives_object_s__link_data_image_crops", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_s__link_data_image_crops" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_100x100.100x100": { - "file": "adcreatives_object_s__a_image_crops_100x100", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_s__a_image_crops_100x100" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_100x72.100x72": { - "file": "adcreatives_object_s__ta_image_crops_100x72", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_s__ta_image_crops_100x72" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_191x100.191x100": { - "file": "adcreatives_object_s__a_image_crops_191x100", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_s__a_image_crops_191x100" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_400x150.400x150": { - "file": "adcreatives_object_s__a_image_crops_400x150", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_s__a_image_crops_400x150" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_400x500.400x500": { - "file": "adcreatives_object_s__a_image_crops_400x500", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_s__a_image_crops_400x500" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_600x360.600x360": { - "file": "adcreatives_object_s__a_image_crops_600x360", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_s__a_image_crops_600x360" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_90x160.90x160": { - "file": "adcreatives_object_s__ta_image_crops_90x160", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_s__ta_image_crops_90x160" - }, - "schema_test.adcreatives.adcreatives": { - "file": "adcreatives", - "schema": "schema_test", - "table": "adcreatives" - }, - "schema_test.adcreatives_adlabels.adlabels": { - "file": "adcreatives_adlabels", - "schema": "schema_test", - "table": "adcreatives_adlabels" - }, - "schema_test.adcreatives_image_crops.image_crops": { - "file": "adcreatives_image_crops", - "schema": "schema_test", - "table": "adcreatives_image_crops" - }, - "schema_test.adcreatives_image_crops_100x100.100x100": { - "file": "adcreatives_image_crops_100x100", - "schema": "schema_test", - "table": "adcreatives_image_crops_100x100" - }, - "schema_test.adcreatives_image_crops_100x72.100x72": { - "file": "adcreatives_image_crops_100x72", - "schema": "schema_test", - "table": "adcreatives_image_crops_100x72" - }, - "schema_test.adcreatives_image_crops_191x100.191x100": { - "file": "adcreatives_image_crops_191x100", - "schema": "schema_test", - "table": "adcreatives_image_crops_191x100" - }, - "schema_test.adcreatives_image_crops_400x150.400x150": { - "file": "adcreatives_image_crops_400x150", - "schema": "schema_test", - "table": "adcreatives_image_crops_400x150" - }, - "schema_test.adcreatives_image_crops_400x500.400x500": { - "file": "adcreatives_image_crops_400x500", - "schema": "schema_test", - "table": "adcreatives_image_crops_400x500" - }, - "schema_test.adcreatives_image_crops_600x360.600x360": { - "file": "adcreatives_image_crops_600x360", - "schema": "schema_test", - "table": "adcreatives_image_crops_600x360" - }, - "schema_test.adcreatives_image_crops_90x160.90x160": { - "file": "adcreatives_image_crops_90x160", - "schema": "schema_test", - "table": "adcreatives_image_crops_90x160" - }, - "schema_test.adcreatives_object_story_spec.object_story_spec": { - "file": "adcreatives_object_story_spec", - "schema": "schema_test", - "table": "adcreatives_object_story_spec" - }, - "schema_test.adcreatives_object_story_spec_link_data.link_data": { - "file": "adcreatives_object_story_spec_link_data", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data" - }, - "schema_test.adcreatives_object_story_spec_link_data_app_link_spec.app_link_spec": { - "file": "adcreatives_object_s__nk_data_app_link_spec", - "schema": "schema_test", - "table": "adcreatives_object_s__nk_data_app_link_spec" - }, - "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_android.android": { - "file": "adcreatives_object_s__app_link_spec_android", - "schema": "schema_test", - "table": "adcreatives_object_s__app_link_spec_android" - }, - "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ios.ios": { - "file": "adcreatives_object_s__ata_app_link_spec_ios", - "schema": "schema_test", - "table": "adcreatives_object_s__ata_app_link_spec_ios" - }, - "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ipad.ipad": { - "file": "adcreatives_object_s__ta_app_link_spec_ipad", - "schema": "schema_test", - "table": "adcreatives_object_s__ta_app_link_spec_ipad" - }, - "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_iphone.iphone": { - "file": "adcreatives_object_s___app_link_spec_iphone", - "schema": "schema_test", - "table": "adcreatives_object_s___app_link_spec_iphone" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops.image_crops": { - "file": "adcreatives_object_s__link_data_image_crops", - "schema": "schema_test", - "table": "adcreatives_object_s__link_data_image_crops" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_100x100.100x100": { - "file": "adcreatives_object_s__a_image_crops_100x100", - "schema": "schema_test", - "table": "adcreatives_object_s__a_image_crops_100x100" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_100x72.100x72": { - "file": "adcreatives_object_s__ta_image_crops_100x72", - "schema": "schema_test", - "table": "adcreatives_object_s__ta_image_crops_100x72" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_191x100.191x100": { - "file": "adcreatives_object_s__a_image_crops_191x100", - "schema": "schema_test", - "table": "adcreatives_object_s__a_image_crops_191x100" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_400x150.400x150": { - "file": "adcreatives_object_s__a_image_crops_400x150", - "schema": "schema_test", - "table": "adcreatives_object_s__a_image_crops_400x150" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_400x500.400x500": { - "file": "adcreatives_object_s__a_image_crops_400x500", - "schema": "schema_test", - "table": "adcreatives_object_s__a_image_crops_400x500" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_600x360.600x360": { - "file": "adcreatives_object_s__a_image_crops_600x360", - "schema": "schema_test", - "table": "adcreatives_object_s__a_image_crops_600x360" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_90x160.90x160": { - "file": "adcreatives_object_s__ta_image_crops_90x160", - "schema": "schema_test", - "table": "adcreatives_object_s__ta_image_crops_90x160" - } -} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_duckdb_names.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_duckdb_names.json deleted file mode 100644 index 2bbb864cc4d87..0000000000000 --- a/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_duckdb_names.json +++ /dev/null @@ -1,252 +0,0 @@ -{ - "_airbyte_schema_test.adcreatives.adcreatives": { - "file": "adcreatives", - "schema": "_airbyte_schema_test", - "table": "adcreatives" - }, - "_airbyte_schema_test.adcreatives_adlabels.adlabels": { - "file": "adcreatives_adlabels", - "schema": "_airbyte_schema_test", - "table": "adcreatives_adlabels" - }, - "_airbyte_schema_test.adcreatives_image_crops.image_crops": { - "file": "adcreatives_image_crops", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops" - }, - "_airbyte_schema_test.adcreatives_image_crops_100x100.100x100": { - "file": "adcreatives_image_crops_100x100", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops_100x100" - }, - "_airbyte_schema_test.adcreatives_image_crops_100x72.100x72": { - "file": "adcreatives_image_crops_100x72", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops_100x72" - }, - "_airbyte_schema_test.adcreatives_image_crops_191x100.191x100": { - "file": "adcreatives_image_crops_191x100", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops_191x100" - }, - "_airbyte_schema_test.adcreatives_image_crops_400x150.400x150": { - "file": "adcreatives_image_crops_400x150", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops_400x150" - }, - "_airbyte_schema_test.adcreatives_image_crops_400x500.400x500": { - "file": "adcreatives_image_crops_400x500", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops_400x500" - }, - "_airbyte_schema_test.adcreatives_image_crops_600x360.600x360": { - "file": "adcreatives_image_crops_600x360", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops_600x360" - }, - "_airbyte_schema_test.adcreatives_image_crops_90x160.90x160": { - "file": "adcreatives_image_crops_90x160", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops_90x160" - }, - "_airbyte_schema_test.adcreatives_object_story_spec.object_story_spec": { - "file": "adcreatives_object_story_spec", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_story_spec" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data.link_data": { - "file": "adcreatives_object_story_spec_link_data", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_story_spec_link_data" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec.app_link_spec": { - "file": "adcreatives_object_st__nk_data_app_link_spec", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_st__nk_data_app_link_spec" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_android.android": { - "file": "adcreatives_object_st__app_link_spec_android", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_st__app_link_spec_android" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ios.ios": { - "file": "adcreatives_object_st__ata_app_link_spec_ios", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_st__ata_app_link_spec_ios" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ipad.ipad": { - "file": "adcreatives_object_st__ta_app_link_spec_ipad", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_st__ta_app_link_spec_ipad" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_iphone.iphone": { - "file": "adcreatives_object_st___app_link_spec_iphone", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_st___app_link_spec_iphone" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops.image_crops": { - "file": "adcreatives_object_st__link_data_image_crops", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_st__link_data_image_crops" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_100x100.100x100": { - "file": "adcreatives_object_st__a_image_crops_100x100", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_st__a_image_crops_100x100" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_100x72.100x72": { - "file": "adcreatives_object_st__ta_image_crops_100x72", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_st__ta_image_crops_100x72" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_191x100.191x100": { - "file": "adcreatives_object_st__a_image_crops_191x100", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_st__a_image_crops_191x100" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_400x150.400x150": { - "file": "adcreatives_object_st__a_image_crops_400x150", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_st__a_image_crops_400x150" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_400x500.400x500": { - "file": "adcreatives_object_st__a_image_crops_400x500", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_st__a_image_crops_400x500" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_600x360.600x360": { - "file": "adcreatives_object_st__a_image_crops_600x360", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_st__a_image_crops_600x360" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_90x160.90x160": { - "file": "adcreatives_object_st__ta_image_crops_90x160", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_st__ta_image_crops_90x160" - }, - "schema_test.adcreatives.adcreatives": { - "file": "adcreatives", - "schema": "schema_test", - "table": "adcreatives" - }, - "schema_test.adcreatives_adlabels.adlabels": { - "file": "adcreatives_adlabels", - "schema": "schema_test", - "table": "adcreatives_adlabels" - }, - "schema_test.adcreatives_image_crops.image_crops": { - "file": "adcreatives_image_crops", - "schema": "schema_test", - "table": "adcreatives_image_crops" - }, - "schema_test.adcreatives_image_crops_100x100.100x100": { - "file": "adcreatives_image_crops_100x100", - "schema": "schema_test", - "table": "adcreatives_image_crops_100x100" - }, - "schema_test.adcreatives_image_crops_100x72.100x72": { - "file": "adcreatives_image_crops_100x72", - "schema": "schema_test", - "table": "adcreatives_image_crops_100x72" - }, - "schema_test.adcreatives_image_crops_191x100.191x100": { - "file": "adcreatives_image_crops_191x100", - "schema": "schema_test", - "table": "adcreatives_image_crops_191x100" - }, - "schema_test.adcreatives_image_crops_400x150.400x150": { - "file": "adcreatives_image_crops_400x150", - "schema": "schema_test", - "table": "adcreatives_image_crops_400x150" - }, - "schema_test.adcreatives_image_crops_400x500.400x500": { - "file": "adcreatives_image_crops_400x500", - "schema": "schema_test", - "table": "adcreatives_image_crops_400x500" - }, - "schema_test.adcreatives_image_crops_600x360.600x360": { - "file": "adcreatives_image_crops_600x360", - "schema": "schema_test", - "table": "adcreatives_image_crops_600x360" - }, - "schema_test.adcreatives_image_crops_90x160.90x160": { - "file": "adcreatives_image_crops_90x160", - "schema": "schema_test", - "table": "adcreatives_image_crops_90x160" - }, - "schema_test.adcreatives_object_story_spec.object_story_spec": { - "file": "adcreatives_object_story_spec", - "schema": "schema_test", - "table": "adcreatives_object_story_spec" - }, - "schema_test.adcreatives_object_story_spec_link_data.link_data": { - "file": "adcreatives_object_story_spec_link_data", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data" - }, - "schema_test.adcreatives_object_story_spec_link_data_app_link_spec.app_link_spec": { - "file": "adcreatives_object_st__nk_data_app_link_spec", - "schema": "schema_test", - "table": "adcreatives_object_st__nk_data_app_link_spec" - }, - "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_android.android": { - "file": "adcreatives_object_st__app_link_spec_android", - "schema": "schema_test", - "table": "adcreatives_object_st__app_link_spec_android" - }, - "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ios.ios": { - "file": "adcreatives_object_st__ata_app_link_spec_ios", - "schema": "schema_test", - "table": "adcreatives_object_st__ata_app_link_spec_ios" - }, - "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ipad.ipad": { - "file": "adcreatives_object_st__ta_app_link_spec_ipad", - "schema": "schema_test", - "table": "adcreatives_object_st__ta_app_link_spec_ipad" - }, - "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_iphone.iphone": { - "file": "adcreatives_object_st___app_link_spec_iphone", - "schema": "schema_test", - "table": "adcreatives_object_st___app_link_spec_iphone" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops.image_crops": { - "file": "adcreatives_object_st__link_data_image_crops", - "schema": "schema_test", - "table": "adcreatives_object_st__link_data_image_crops" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_100x100.100x100": { - "file": "adcreatives_object_st__a_image_crops_100x100", - "schema": "schema_test", - "table": "adcreatives_object_st__a_image_crops_100x100" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_100x72.100x72": { - "file": "adcreatives_object_st__ta_image_crops_100x72", - "schema": "schema_test", - "table": "adcreatives_object_st__ta_image_crops_100x72" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_191x100.191x100": { - "file": "adcreatives_object_st__a_image_crops_191x100", - "schema": "schema_test", - "table": "adcreatives_object_st__a_image_crops_191x100" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_400x150.400x150": { - "file": "adcreatives_object_st__a_image_crops_400x150", - "schema": "schema_test", - "table": "adcreatives_object_st__a_image_crops_400x150" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_400x500.400x500": { - "file": "adcreatives_object_st__a_image_crops_400x500", - "schema": "schema_test", - "table": "adcreatives_object_st__a_image_crops_400x500" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_600x360.600x360": { - "file": "adcreatives_object_st__a_image_crops_600x360", - "schema": "schema_test", - "table": "adcreatives_object_st__a_image_crops_600x360" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_90x160.90x160": { - "file": "adcreatives_object_st__ta_image_crops_90x160", - "schema": "schema_test", - "table": "adcreatives_object_st__ta_image_crops_90x160" - } -} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_mssql_names.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_mssql_names.json deleted file mode 100644 index 2bbb864cc4d87..0000000000000 --- a/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_mssql_names.json +++ /dev/null @@ -1,252 +0,0 @@ -{ - "_airbyte_schema_test.adcreatives.adcreatives": { - "file": "adcreatives", - "schema": "_airbyte_schema_test", - "table": "adcreatives" - }, - "_airbyte_schema_test.adcreatives_adlabels.adlabels": { - "file": "adcreatives_adlabels", - "schema": "_airbyte_schema_test", - "table": "adcreatives_adlabels" - }, - "_airbyte_schema_test.adcreatives_image_crops.image_crops": { - "file": "adcreatives_image_crops", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops" - }, - "_airbyte_schema_test.adcreatives_image_crops_100x100.100x100": { - "file": "adcreatives_image_crops_100x100", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops_100x100" - }, - "_airbyte_schema_test.adcreatives_image_crops_100x72.100x72": { - "file": "adcreatives_image_crops_100x72", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops_100x72" - }, - "_airbyte_schema_test.adcreatives_image_crops_191x100.191x100": { - "file": "adcreatives_image_crops_191x100", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops_191x100" - }, - "_airbyte_schema_test.adcreatives_image_crops_400x150.400x150": { - "file": "adcreatives_image_crops_400x150", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops_400x150" - }, - "_airbyte_schema_test.adcreatives_image_crops_400x500.400x500": { - "file": "adcreatives_image_crops_400x500", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops_400x500" - }, - "_airbyte_schema_test.adcreatives_image_crops_600x360.600x360": { - "file": "adcreatives_image_crops_600x360", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops_600x360" - }, - "_airbyte_schema_test.adcreatives_image_crops_90x160.90x160": { - "file": "adcreatives_image_crops_90x160", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops_90x160" - }, - "_airbyte_schema_test.adcreatives_object_story_spec.object_story_spec": { - "file": "adcreatives_object_story_spec", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_story_spec" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data.link_data": { - "file": "adcreatives_object_story_spec_link_data", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_story_spec_link_data" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec.app_link_spec": { - "file": "adcreatives_object_st__nk_data_app_link_spec", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_st__nk_data_app_link_spec" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_android.android": { - "file": "adcreatives_object_st__app_link_spec_android", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_st__app_link_spec_android" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ios.ios": { - "file": "adcreatives_object_st__ata_app_link_spec_ios", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_st__ata_app_link_spec_ios" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ipad.ipad": { - "file": "adcreatives_object_st__ta_app_link_spec_ipad", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_st__ta_app_link_spec_ipad" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_iphone.iphone": { - "file": "adcreatives_object_st___app_link_spec_iphone", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_st___app_link_spec_iphone" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops.image_crops": { - "file": "adcreatives_object_st__link_data_image_crops", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_st__link_data_image_crops" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_100x100.100x100": { - "file": "adcreatives_object_st__a_image_crops_100x100", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_st__a_image_crops_100x100" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_100x72.100x72": { - "file": "adcreatives_object_st__ta_image_crops_100x72", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_st__ta_image_crops_100x72" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_191x100.191x100": { - "file": "adcreatives_object_st__a_image_crops_191x100", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_st__a_image_crops_191x100" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_400x150.400x150": { - "file": "adcreatives_object_st__a_image_crops_400x150", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_st__a_image_crops_400x150" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_400x500.400x500": { - "file": "adcreatives_object_st__a_image_crops_400x500", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_st__a_image_crops_400x500" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_600x360.600x360": { - "file": "adcreatives_object_st__a_image_crops_600x360", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_st__a_image_crops_600x360" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_90x160.90x160": { - "file": "adcreatives_object_st__ta_image_crops_90x160", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_st__ta_image_crops_90x160" - }, - "schema_test.adcreatives.adcreatives": { - "file": "adcreatives", - "schema": "schema_test", - "table": "adcreatives" - }, - "schema_test.adcreatives_adlabels.adlabels": { - "file": "adcreatives_adlabels", - "schema": "schema_test", - "table": "adcreatives_adlabels" - }, - "schema_test.adcreatives_image_crops.image_crops": { - "file": "adcreatives_image_crops", - "schema": "schema_test", - "table": "adcreatives_image_crops" - }, - "schema_test.adcreatives_image_crops_100x100.100x100": { - "file": "adcreatives_image_crops_100x100", - "schema": "schema_test", - "table": "adcreatives_image_crops_100x100" - }, - "schema_test.adcreatives_image_crops_100x72.100x72": { - "file": "adcreatives_image_crops_100x72", - "schema": "schema_test", - "table": "adcreatives_image_crops_100x72" - }, - "schema_test.adcreatives_image_crops_191x100.191x100": { - "file": "adcreatives_image_crops_191x100", - "schema": "schema_test", - "table": "adcreatives_image_crops_191x100" - }, - "schema_test.adcreatives_image_crops_400x150.400x150": { - "file": "adcreatives_image_crops_400x150", - "schema": "schema_test", - "table": "adcreatives_image_crops_400x150" - }, - "schema_test.adcreatives_image_crops_400x500.400x500": { - "file": "adcreatives_image_crops_400x500", - "schema": "schema_test", - "table": "adcreatives_image_crops_400x500" - }, - "schema_test.adcreatives_image_crops_600x360.600x360": { - "file": "adcreatives_image_crops_600x360", - "schema": "schema_test", - "table": "adcreatives_image_crops_600x360" - }, - "schema_test.adcreatives_image_crops_90x160.90x160": { - "file": "adcreatives_image_crops_90x160", - "schema": "schema_test", - "table": "adcreatives_image_crops_90x160" - }, - "schema_test.adcreatives_object_story_spec.object_story_spec": { - "file": "adcreatives_object_story_spec", - "schema": "schema_test", - "table": "adcreatives_object_story_spec" - }, - "schema_test.adcreatives_object_story_spec_link_data.link_data": { - "file": "adcreatives_object_story_spec_link_data", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data" - }, - "schema_test.adcreatives_object_story_spec_link_data_app_link_spec.app_link_spec": { - "file": "adcreatives_object_st__nk_data_app_link_spec", - "schema": "schema_test", - "table": "adcreatives_object_st__nk_data_app_link_spec" - }, - "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_android.android": { - "file": "adcreatives_object_st__app_link_spec_android", - "schema": "schema_test", - "table": "adcreatives_object_st__app_link_spec_android" - }, - "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ios.ios": { - "file": "adcreatives_object_st__ata_app_link_spec_ios", - "schema": "schema_test", - "table": "adcreatives_object_st__ata_app_link_spec_ios" - }, - "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ipad.ipad": { - "file": "adcreatives_object_st__ta_app_link_spec_ipad", - "schema": "schema_test", - "table": "adcreatives_object_st__ta_app_link_spec_ipad" - }, - "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_iphone.iphone": { - "file": "adcreatives_object_st___app_link_spec_iphone", - "schema": "schema_test", - "table": "adcreatives_object_st___app_link_spec_iphone" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops.image_crops": { - "file": "adcreatives_object_st__link_data_image_crops", - "schema": "schema_test", - "table": "adcreatives_object_st__link_data_image_crops" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_100x100.100x100": { - "file": "adcreatives_object_st__a_image_crops_100x100", - "schema": "schema_test", - "table": "adcreatives_object_st__a_image_crops_100x100" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_100x72.100x72": { - "file": "adcreatives_object_st__ta_image_crops_100x72", - "schema": "schema_test", - "table": "adcreatives_object_st__ta_image_crops_100x72" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_191x100.191x100": { - "file": "adcreatives_object_st__a_image_crops_191x100", - "schema": "schema_test", - "table": "adcreatives_object_st__a_image_crops_191x100" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_400x150.400x150": { - "file": "adcreatives_object_st__a_image_crops_400x150", - "schema": "schema_test", - "table": "adcreatives_object_st__a_image_crops_400x150" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_400x500.400x500": { - "file": "adcreatives_object_st__a_image_crops_400x500", - "schema": "schema_test", - "table": "adcreatives_object_st__a_image_crops_400x500" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_600x360.600x360": { - "file": "adcreatives_object_st__a_image_crops_600x360", - "schema": "schema_test", - "table": "adcreatives_object_st__a_image_crops_600x360" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_90x160.90x160": { - "file": "adcreatives_object_st__ta_image_crops_90x160", - "schema": "schema_test", - "table": "adcreatives_object_st__ta_image_crops_90x160" - } -} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_mysql_names.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_mysql_names.json deleted file mode 100644 index 2bbb864cc4d87..0000000000000 --- a/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_mysql_names.json +++ /dev/null @@ -1,252 +0,0 @@ -{ - "_airbyte_schema_test.adcreatives.adcreatives": { - "file": "adcreatives", - "schema": "_airbyte_schema_test", - "table": "adcreatives" - }, - "_airbyte_schema_test.adcreatives_adlabels.adlabels": { - "file": "adcreatives_adlabels", - "schema": "_airbyte_schema_test", - "table": "adcreatives_adlabels" - }, - "_airbyte_schema_test.adcreatives_image_crops.image_crops": { - "file": "adcreatives_image_crops", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops" - }, - "_airbyte_schema_test.adcreatives_image_crops_100x100.100x100": { - "file": "adcreatives_image_crops_100x100", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops_100x100" - }, - "_airbyte_schema_test.adcreatives_image_crops_100x72.100x72": { - "file": "adcreatives_image_crops_100x72", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops_100x72" - }, - "_airbyte_schema_test.adcreatives_image_crops_191x100.191x100": { - "file": "adcreatives_image_crops_191x100", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops_191x100" - }, - "_airbyte_schema_test.adcreatives_image_crops_400x150.400x150": { - "file": "adcreatives_image_crops_400x150", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops_400x150" - }, - "_airbyte_schema_test.adcreatives_image_crops_400x500.400x500": { - "file": "adcreatives_image_crops_400x500", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops_400x500" - }, - "_airbyte_schema_test.adcreatives_image_crops_600x360.600x360": { - "file": "adcreatives_image_crops_600x360", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops_600x360" - }, - "_airbyte_schema_test.adcreatives_image_crops_90x160.90x160": { - "file": "adcreatives_image_crops_90x160", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops_90x160" - }, - "_airbyte_schema_test.adcreatives_object_story_spec.object_story_spec": { - "file": "adcreatives_object_story_spec", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_story_spec" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data.link_data": { - "file": "adcreatives_object_story_spec_link_data", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_story_spec_link_data" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec.app_link_spec": { - "file": "adcreatives_object_st__nk_data_app_link_spec", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_st__nk_data_app_link_spec" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_android.android": { - "file": "adcreatives_object_st__app_link_spec_android", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_st__app_link_spec_android" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ios.ios": { - "file": "adcreatives_object_st__ata_app_link_spec_ios", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_st__ata_app_link_spec_ios" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ipad.ipad": { - "file": "adcreatives_object_st__ta_app_link_spec_ipad", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_st__ta_app_link_spec_ipad" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_iphone.iphone": { - "file": "adcreatives_object_st___app_link_spec_iphone", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_st___app_link_spec_iphone" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops.image_crops": { - "file": "adcreatives_object_st__link_data_image_crops", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_st__link_data_image_crops" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_100x100.100x100": { - "file": "adcreatives_object_st__a_image_crops_100x100", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_st__a_image_crops_100x100" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_100x72.100x72": { - "file": "adcreatives_object_st__ta_image_crops_100x72", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_st__ta_image_crops_100x72" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_191x100.191x100": { - "file": "adcreatives_object_st__a_image_crops_191x100", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_st__a_image_crops_191x100" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_400x150.400x150": { - "file": "adcreatives_object_st__a_image_crops_400x150", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_st__a_image_crops_400x150" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_400x500.400x500": { - "file": "adcreatives_object_st__a_image_crops_400x500", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_st__a_image_crops_400x500" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_600x360.600x360": { - "file": "adcreatives_object_st__a_image_crops_600x360", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_st__a_image_crops_600x360" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_90x160.90x160": { - "file": "adcreatives_object_st__ta_image_crops_90x160", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_st__ta_image_crops_90x160" - }, - "schema_test.adcreatives.adcreatives": { - "file": "adcreatives", - "schema": "schema_test", - "table": "adcreatives" - }, - "schema_test.adcreatives_adlabels.adlabels": { - "file": "adcreatives_adlabels", - "schema": "schema_test", - "table": "adcreatives_adlabels" - }, - "schema_test.adcreatives_image_crops.image_crops": { - "file": "adcreatives_image_crops", - "schema": "schema_test", - "table": "adcreatives_image_crops" - }, - "schema_test.adcreatives_image_crops_100x100.100x100": { - "file": "adcreatives_image_crops_100x100", - "schema": "schema_test", - "table": "adcreatives_image_crops_100x100" - }, - "schema_test.adcreatives_image_crops_100x72.100x72": { - "file": "adcreatives_image_crops_100x72", - "schema": "schema_test", - "table": "adcreatives_image_crops_100x72" - }, - "schema_test.adcreatives_image_crops_191x100.191x100": { - "file": "adcreatives_image_crops_191x100", - "schema": "schema_test", - "table": "adcreatives_image_crops_191x100" - }, - "schema_test.adcreatives_image_crops_400x150.400x150": { - "file": "adcreatives_image_crops_400x150", - "schema": "schema_test", - "table": "adcreatives_image_crops_400x150" - }, - "schema_test.adcreatives_image_crops_400x500.400x500": { - "file": "adcreatives_image_crops_400x500", - "schema": "schema_test", - "table": "adcreatives_image_crops_400x500" - }, - "schema_test.adcreatives_image_crops_600x360.600x360": { - "file": "adcreatives_image_crops_600x360", - "schema": "schema_test", - "table": "adcreatives_image_crops_600x360" - }, - "schema_test.adcreatives_image_crops_90x160.90x160": { - "file": "adcreatives_image_crops_90x160", - "schema": "schema_test", - "table": "adcreatives_image_crops_90x160" - }, - "schema_test.adcreatives_object_story_spec.object_story_spec": { - "file": "adcreatives_object_story_spec", - "schema": "schema_test", - "table": "adcreatives_object_story_spec" - }, - "schema_test.adcreatives_object_story_spec_link_data.link_data": { - "file": "adcreatives_object_story_spec_link_data", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data" - }, - "schema_test.adcreatives_object_story_spec_link_data_app_link_spec.app_link_spec": { - "file": "adcreatives_object_st__nk_data_app_link_spec", - "schema": "schema_test", - "table": "adcreatives_object_st__nk_data_app_link_spec" - }, - "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_android.android": { - "file": "adcreatives_object_st__app_link_spec_android", - "schema": "schema_test", - "table": "adcreatives_object_st__app_link_spec_android" - }, - "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ios.ios": { - "file": "adcreatives_object_st__ata_app_link_spec_ios", - "schema": "schema_test", - "table": "adcreatives_object_st__ata_app_link_spec_ios" - }, - "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ipad.ipad": { - "file": "adcreatives_object_st__ta_app_link_spec_ipad", - "schema": "schema_test", - "table": "adcreatives_object_st__ta_app_link_spec_ipad" - }, - "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_iphone.iphone": { - "file": "adcreatives_object_st___app_link_spec_iphone", - "schema": "schema_test", - "table": "adcreatives_object_st___app_link_spec_iphone" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops.image_crops": { - "file": "adcreatives_object_st__link_data_image_crops", - "schema": "schema_test", - "table": "adcreatives_object_st__link_data_image_crops" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_100x100.100x100": { - "file": "adcreatives_object_st__a_image_crops_100x100", - "schema": "schema_test", - "table": "adcreatives_object_st__a_image_crops_100x100" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_100x72.100x72": { - "file": "adcreatives_object_st__ta_image_crops_100x72", - "schema": "schema_test", - "table": "adcreatives_object_st__ta_image_crops_100x72" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_191x100.191x100": { - "file": "adcreatives_object_st__a_image_crops_191x100", - "schema": "schema_test", - "table": "adcreatives_object_st__a_image_crops_191x100" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_400x150.400x150": { - "file": "adcreatives_object_st__a_image_crops_400x150", - "schema": "schema_test", - "table": "adcreatives_object_st__a_image_crops_400x150" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_400x500.400x500": { - "file": "adcreatives_object_st__a_image_crops_400x500", - "schema": "schema_test", - "table": "adcreatives_object_st__a_image_crops_400x500" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_600x360.600x360": { - "file": "adcreatives_object_st__a_image_crops_600x360", - "schema": "schema_test", - "table": "adcreatives_object_st__a_image_crops_600x360" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_90x160.90x160": { - "file": "adcreatives_object_st__ta_image_crops_90x160", - "schema": "schema_test", - "table": "adcreatives_object_st__ta_image_crops_90x160" - } -} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_names.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_names.json deleted file mode 100644 index 9b9c347db4a5a..0000000000000 --- a/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_names.json +++ /dev/null @@ -1,252 +0,0 @@ -{ - "_airbyte_schema_test.adcreatives.adcreatives": { - "file": "adcreatives", - "schema": "_airbyte_schema_test", - "table": "adcreatives" - }, - "_airbyte_schema_test.adcreatives_adlabels.adlabels": { - "file": "adcreatives_adlabels", - "schema": "_airbyte_schema_test", - "table": "adcreatives_adlabels" - }, - "_airbyte_schema_test.adcreatives_image_crops.image_crops": { - "file": "adcreatives_image_crops", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops" - }, - "_airbyte_schema_test.adcreatives_image_crops_100x100.100x100": { - "file": "adcreatives_image_crops_100x100", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops_100x100" - }, - "_airbyte_schema_test.adcreatives_image_crops_100x72.100x72": { - "file": "adcreatives_image_crops_100x72", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops_100x72" - }, - "_airbyte_schema_test.adcreatives_image_crops_191x100.191x100": { - "file": "adcreatives_image_crops_191x100", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops_191x100" - }, - "_airbyte_schema_test.adcreatives_image_crops_400x150.400x150": { - "file": "adcreatives_image_crops_400x150", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops_400x150" - }, - "_airbyte_schema_test.adcreatives_image_crops_400x500.400x500": { - "file": "adcreatives_image_crops_400x500", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops_400x500" - }, - "_airbyte_schema_test.adcreatives_image_crops_600x360.600x360": { - "file": "adcreatives_image_crops_600x360", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops_600x360" - }, - "_airbyte_schema_test.adcreatives_image_crops_90x160.90x160": { - "file": "adcreatives_image_crops_90x160", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops_90x160" - }, - "_airbyte_schema_test.adcreatives_object_story_spec.object_story_spec": { - "file": "adcreatives_object_story_spec", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_story_spec" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data.link_data": { - "file": "adcreatives_object_story_spec_link_data", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_story_spec_link_data" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec.app_link_spec": { - "file": "adcreatives_object_story_spec_link_data_app_link_spec", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_story_spec_link_data_app_link_spec" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_android.android": { - "file": "adcreatives_object_story_spec_link_data_app_link_spec_android", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_story_spec_link_data_app_link_spec_android" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ios.ios": { - "file": "adcreatives_object_story_spec_link_data_app_link_spec_ios", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_story_spec_link_data_app_link_spec_ios" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ipad.ipad": { - "file": "adcreatives_object_story_spec_link_data_app_link_spec_ipad", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_story_spec_link_data_app_link_spec_ipad" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_iphone.iphone": { - "file": "adcreatives_object_story_spec_link_data_app_link_spec_iphone", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_story_spec_link_data_app_link_spec_iphone" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops.image_crops": { - "file": "adcreatives_object_story_spec_link_data_image_crops", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_story_spec_link_data_image_crops" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_100x100.100x100": { - "file": "adcreatives_object_story_spec_link_data_image_crops_100x100", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_story_spec_link_data_image_crops_100x100" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_100x72.100x72": { - "file": "adcreatives_object_story_spec_link_data_image_crops_100x72", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_story_spec_link_data_image_crops_100x72" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_191x100.191x100": { - "file": "adcreatives_object_story_spec_link_data_image_crops_191x100", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_story_spec_link_data_image_crops_191x100" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_400x150.400x150": { - "file": "adcreatives_object_story_spec_link_data_image_crops_400x150", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_story_spec_link_data_image_crops_400x150" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_400x500.400x500": { - "file": "adcreatives_object_story_spec_link_data_image_crops_400x500", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_story_spec_link_data_image_crops_400x500" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_600x360.600x360": { - "file": "adcreatives_object_story_spec_link_data_image_crops_600x360", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_story_spec_link_data_image_crops_600x360" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_90x160.90x160": { - "file": "adcreatives_object_story_spec_link_data_image_crops_90x160", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_story_spec_link_data_image_crops_90x160" - }, - "schema_test.adcreatives.adcreatives": { - "file": "adcreatives", - "schema": "schema_test", - "table": "adcreatives" - }, - "schema_test.adcreatives_adlabels.adlabels": { - "file": "adcreatives_adlabels", - "schema": "schema_test", - "table": "adcreatives_adlabels" - }, - "schema_test.adcreatives_image_crops.image_crops": { - "file": "adcreatives_image_crops", - "schema": "schema_test", - "table": "adcreatives_image_crops" - }, - "schema_test.adcreatives_image_crops_100x100.100x100": { - "file": "adcreatives_image_crops_100x100", - "schema": "schema_test", - "table": "adcreatives_image_crops_100x100" - }, - "schema_test.adcreatives_image_crops_100x72.100x72": { - "file": "adcreatives_image_crops_100x72", - "schema": "schema_test", - "table": "adcreatives_image_crops_100x72" - }, - "schema_test.adcreatives_image_crops_191x100.191x100": { - "file": "adcreatives_image_crops_191x100", - "schema": "schema_test", - "table": "adcreatives_image_crops_191x100" - }, - "schema_test.adcreatives_image_crops_400x150.400x150": { - "file": "adcreatives_image_crops_400x150", - "schema": "schema_test", - "table": "adcreatives_image_crops_400x150" - }, - "schema_test.adcreatives_image_crops_400x500.400x500": { - "file": "adcreatives_image_crops_400x500", - "schema": "schema_test", - "table": "adcreatives_image_crops_400x500" - }, - "schema_test.adcreatives_image_crops_600x360.600x360": { - "file": "adcreatives_image_crops_600x360", - "schema": "schema_test", - "table": "adcreatives_image_crops_600x360" - }, - "schema_test.adcreatives_image_crops_90x160.90x160": { - "file": "adcreatives_image_crops_90x160", - "schema": "schema_test", - "table": "adcreatives_image_crops_90x160" - }, - "schema_test.adcreatives_object_story_spec.object_story_spec": { - "file": "adcreatives_object_story_spec", - "schema": "schema_test", - "table": "adcreatives_object_story_spec" - }, - "schema_test.adcreatives_object_story_spec_link_data.link_data": { - "file": "adcreatives_object_story_spec_link_data", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data" - }, - "schema_test.adcreatives_object_story_spec_link_data_app_link_spec.app_link_spec": { - "file": "adcreatives_object_story_spec_link_data_app_link_spec", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data_app_link_spec" - }, - "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_android.android": { - "file": "adcreatives_object_story_spec_link_data_app_link_spec_android", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data_app_link_spec_android" - }, - "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ios.ios": { - "file": "adcreatives_object_story_spec_link_data_app_link_spec_ios", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data_app_link_spec_ios" - }, - "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ipad.ipad": { - "file": "adcreatives_object_story_spec_link_data_app_link_spec_ipad", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data_app_link_spec_ipad" - }, - "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_iphone.iphone": { - "file": "adcreatives_object_story_spec_link_data_app_link_spec_iphone", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data_app_link_spec_iphone" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops.image_crops": { - "file": "adcreatives_object_story_spec_link_data_image_crops", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data_image_crops" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_100x100.100x100": { - "file": "adcreatives_object_story_spec_link_data_image_crops_100x100", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data_image_crops_100x100" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_100x72.100x72": { - "file": "adcreatives_object_story_spec_link_data_image_crops_100x72", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data_image_crops_100x72" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_191x100.191x100": { - "file": "adcreatives_object_story_spec_link_data_image_crops_191x100", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data_image_crops_191x100" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_400x150.400x150": { - "file": "adcreatives_object_story_spec_link_data_image_crops_400x150", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data_image_crops_400x150" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_400x500.400x500": { - "file": "adcreatives_object_story_spec_link_data_image_crops_400x500", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data_image_crops_400x500" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_600x360.600x360": { - "file": "adcreatives_object_story_spec_link_data_image_crops_600x360", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data_image_crops_600x360" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_90x160.90x160": { - "file": "adcreatives_object_story_spec_link_data_image_crops_90x160", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data_image_crops_90x160" - } -} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_oracle_names.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_oracle_names.json deleted file mode 100644 index 995ced64a833c..0000000000000 --- a/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_oracle_names.json +++ /dev/null @@ -1,252 +0,0 @@ -{ - "schema_test.adcreatives.adcreatives": { - "file": "adcreatives", - "schema": "schema_test", - "table": "adcreatives" - }, - "schema_test.adcreatives_adlabels.adlabels": { - "file": "adcreatives_adlabels", - "schema": "schema_test", - "table": "adcreatives_adlabels" - }, - "schema_test.adcreatives_image_crops.image_crops": { - "file": "adcreatives_image_crops", - "schema": "schema_test", - "table": "adcreatives_image_crops" - }, - "schema_test.adcreatives_image_crops_100x100.100x100": { - "file": "adcreatives_image_crops_100x100", - "schema": "schema_test", - "table": "adcreatives_image_crops_100x100" - }, - "schema_test.adcreatives_image_crops_100x72.100x72": { - "file": "adcreatives_image_crops_100x72", - "schema": "schema_test", - "table": "adcreatives_image_crops_100x72" - }, - "schema_test.adcreatives_image_crops_191x100.191x100": { - "file": "adcreatives_image_crops_191x100", - "schema": "schema_test", - "table": "adcreatives_image_crops_191x100" - }, - "schema_test.adcreatives_image_crops_400x150.400x150": { - "file": "adcreatives_image_crops_400x150", - "schema": "schema_test", - "table": "adcreatives_image_crops_400x150" - }, - "schema_test.adcreatives_image_crops_400x500.400x500": { - "file": "adcreatives_image_crops_400x500", - "schema": "schema_test", - "table": "adcreatives_image_crops_400x500" - }, - "schema_test.adcreatives_image_crops_600x360.600x360": { - "file": "adcreatives_image_crops_600x360", - "schema": "schema_test", - "table": "adcreatives_image_crops_600x360" - }, - "schema_test.adcreatives_image_crops_90x160.90x160": { - "file": "adcreatives_image_crops_90x160", - "schema": "schema_test", - "table": "adcreatives_image_crops_90x160" - }, - "schema_test.adcreatives_object_story_spec.object_story_spec": { - "file": "adcreatives_object_story_spec", - "schema": "schema_test", - "table": "adcreatives_object_story_spec" - }, - "schema_test.adcreatives_object_story_spec_link_data.link_data": { - "file": "adcreatives_object_story_spec_link_data", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data" - }, - "schema_test.adcreatives_object_story_spec_link_data_app_link_spec.app_link_spec": { - "file": "adcreatives_object_story_spec_link_data_app_link_spec", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data_app_link_spec" - }, - "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_android.android": { - "file": "adcreatives_object_story_spec_link_data_app_link_spec_android", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data_app_link_spec_android" - }, - "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ios.ios": { - "file": "adcreatives_object_story_spec_link_data_app_link_spec_ios", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data_app_link_spec_ios" - }, - "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ipad.ipad": { - "file": "adcreatives_object_story_spec_link_data_app_link_spec_ipad", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data_app_link_spec_ipad" - }, - "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_iphone.iphone": { - "file": "adcreatives_object_story_spec_link_data_app_link_spec_iphone", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data_app_link_spec_iphone" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops.image_crops": { - "file": "adcreatives_object_story_spec_link_data_image_crops", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data_image_crops" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_100x100.100x100": { - "file": "adcreatives_object_story_spec_link_data_image_crops_100x100", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data_image_crops_100x100" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_100x72.100x72": { - "file": "adcreatives_object_story_spec_link_data_image_crops_100x72", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data_image_crops_100x72" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_191x100.191x100": { - "file": "adcreatives_object_story_spec_link_data_image_crops_191x100", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data_image_crops_191x100" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_400x150.400x150": { - "file": "adcreatives_object_story_spec_link_data_image_crops_400x150", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data_image_crops_400x150" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_400x500.400x500": { - "file": "adcreatives_object_story_spec_link_data_image_crops_400x500", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data_image_crops_400x500" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_600x360.600x360": { - "file": "adcreatives_object_story_spec_link_data_image_crops_600x360", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data_image_crops_600x360" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_90x160.90x160": { - "file": "adcreatives_object_story_spec_link_data_image_crops_90x160", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data_image_crops_90x160" - }, - "schema_test.adcreatives.adcreatives": { - "file": "adcreatives", - "schema": "schema_test", - "table": "adcreatives" - }, - "schema_test.adcreatives_adlabels.adlabels": { - "file": "adcreatives_adlabels", - "schema": "schema_test", - "table": "adcreatives_adlabels" - }, - "schema_test.adcreatives_image_crops.image_crops": { - "file": "adcreatives_image_crops", - "schema": "schema_test", - "table": "adcreatives_image_crops" - }, - "schema_test.adcreatives_image_crops_100x100.100x100": { - "file": "adcreatives_image_crops_100x100", - "schema": "schema_test", - "table": "adcreatives_image_crops_100x100" - }, - "schema_test.adcreatives_image_crops_100x72.100x72": { - "file": "adcreatives_image_crops_100x72", - "schema": "schema_test", - "table": "adcreatives_image_crops_100x72" - }, - "schema_test.adcreatives_image_crops_191x100.191x100": { - "file": "adcreatives_image_crops_191x100", - "schema": "schema_test", - "table": "adcreatives_image_crops_191x100" - }, - "schema_test.adcreatives_image_crops_400x150.400x150": { - "file": "adcreatives_image_crops_400x150", - "schema": "schema_test", - "table": "adcreatives_image_crops_400x150" - }, - "schema_test.adcreatives_image_crops_400x500.400x500": { - "file": "adcreatives_image_crops_400x500", - "schema": "schema_test", - "table": "adcreatives_image_crops_400x500" - }, - "schema_test.adcreatives_image_crops_600x360.600x360": { - "file": "adcreatives_image_crops_600x360", - "schema": "schema_test", - "table": "adcreatives_image_crops_600x360" - }, - "schema_test.adcreatives_image_crops_90x160.90x160": { - "file": "adcreatives_image_crops_90x160", - "schema": "schema_test", - "table": "adcreatives_image_crops_90x160" - }, - "schema_test.adcreatives_object_story_spec.object_story_spec": { - "file": "adcreatives_object_story_spec", - "schema": "schema_test", - "table": "adcreatives_object_story_spec" - }, - "schema_test.adcreatives_object_story_spec_link_data.link_data": { - "file": "adcreatives_object_story_spec_link_data", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data" - }, - "schema_test.adcreatives_object_story_spec_link_data_app_link_spec.app_link_spec": { - "file": "adcreatives_object_story_spec_link_data_app_link_spec", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data_app_link_spec" - }, - "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_android.android": { - "file": "adcreatives_object_story_spec_link_data_app_link_spec_android", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data_app_link_spec_android" - }, - "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ios.ios": { - "file": "adcreatives_object_story_spec_link_data_app_link_spec_ios", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data_app_link_spec_ios" - }, - "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ipad.ipad": { - "file": "adcreatives_object_story_spec_link_data_app_link_spec_ipad", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data_app_link_spec_ipad" - }, - "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_iphone.iphone": { - "file": "adcreatives_object_story_spec_link_data_app_link_spec_iphone", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data_app_link_spec_iphone" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops.image_crops": { - "file": "adcreatives_object_story_spec_link_data_image_crops", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data_image_crops" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_100x100.100x100": { - "file": "adcreatives_object_story_spec_link_data_image_crops_100x100", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data_image_crops_100x100" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_100x72.100x72": { - "file": "adcreatives_object_story_spec_link_data_image_crops_100x72", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data_image_crops_100x72" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_191x100.191x100": { - "file": "adcreatives_object_story_spec_link_data_image_crops_191x100", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data_image_crops_191x100" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_400x150.400x150": { - "file": "adcreatives_object_story_spec_link_data_image_crops_400x150", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data_image_crops_400x150" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_400x500.400x500": { - "file": "adcreatives_object_story_spec_link_data_image_crops_400x500", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data_image_crops_400x500" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_600x360.600x360": { - "file": "adcreatives_object_story_spec_link_data_image_crops_600x360", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data_image_crops_600x360" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_90x160.90x160": { - "file": "adcreatives_object_story_spec_link_data_image_crops_90x160", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data_image_crops_90x160" - } -} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_postgres_names.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_postgres_names.json deleted file mode 100644 index 450b8a7f4bfc8..0000000000000 --- a/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_postgres_names.json +++ /dev/null @@ -1,252 +0,0 @@ -{ - "_airbyte_schema_test.adcreatives.adcreatives": { - "file": "adcreatives", - "schema": "_airbyte_schema_test", - "table": "adcreatives" - }, - "_airbyte_schema_test.adcreatives_adlabels.adlabels": { - "file": "adcreatives_adlabels", - "schema": "_airbyte_schema_test", - "table": "adcreatives_adlabels" - }, - "_airbyte_schema_test.adcreatives_image_crops.image_crops": { - "file": "adcreatives_image_crops", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops" - }, - "_airbyte_schema_test.adcreatives_image_crops_100x100.100x100": { - "file": "adcreatives_image_crops_100x100", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops_100x100" - }, - "_airbyte_schema_test.adcreatives_image_crops_100x72.100x72": { - "file": "adcreatives_image_crops_100x72", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops_100x72" - }, - "_airbyte_schema_test.adcreatives_image_crops_191x100.191x100": { - "file": "adcreatives_image_crops_191x100", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops_191x100" - }, - "_airbyte_schema_test.adcreatives_image_crops_400x150.400x150": { - "file": "adcreatives_image_crops_400x150", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops_400x150" - }, - "_airbyte_schema_test.adcreatives_image_crops_400x500.400x500": { - "file": "adcreatives_image_crops_400x500", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops_400x500" - }, - "_airbyte_schema_test.adcreatives_image_crops_600x360.600x360": { - "file": "adcreatives_image_crops_600x360", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops_600x360" - }, - "_airbyte_schema_test.adcreatives_image_crops_90x160.90x160": { - "file": "adcreatives_image_crops_90x160", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops_90x160" - }, - "_airbyte_schema_test.adcreatives_object_story_spec.object_story_spec": { - "file": "adcreatives_object_story_spec", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_story_spec" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data.link_data": { - "file": "adcreatives_object_story_spec_link_data", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_story_spec_link_data" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec.app_link_spec": { - "file": "adcreatives_object_s__nk_data_app_link_spec", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_s__nk_data_app_link_spec" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_android.android": { - "file": "adcreatives_object_s__app_link_spec_android", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_s__app_link_spec_android" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ios.ios": { - "file": "adcreatives_object_s__ata_app_link_spec_ios", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_s__ata_app_link_spec_ios" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ipad.ipad": { - "file": "adcreatives_object_s__ta_app_link_spec_ipad", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_s__ta_app_link_spec_ipad" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_iphone.iphone": { - "file": "adcreatives_object_s___app_link_spec_iphone", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_s___app_link_spec_iphone" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops.image_crops": { - "file": "adcreatives_object_s__link_data_image_crops", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_s__link_data_image_crops" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_100x100.100x100": { - "file": "adcreatives_object_s__a_image_crops_100x100", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_s__a_image_crops_100x100" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_100x72.100x72": { - "file": "adcreatives_object_s__ta_image_crops_100x72", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_s__ta_image_crops_100x72" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_191x100.191x100": { - "file": "adcreatives_object_s__a_image_crops_191x100", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_s__a_image_crops_191x100" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_400x150.400x150": { - "file": "adcreatives_object_s__a_image_crops_400x150", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_s__a_image_crops_400x150" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_400x500.400x500": { - "file": "adcreatives_object_s__a_image_crops_400x500", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_s__a_image_crops_400x500" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_600x360.600x360": { - "file": "adcreatives_object_s__a_image_crops_600x360", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_s__a_image_crops_600x360" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_90x160.90x160": { - "file": "adcreatives_object_s__ta_image_crops_90x160", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_s__ta_image_crops_90x160" - }, - "schema_test.adcreatives.adcreatives": { - "file": "adcreatives", - "schema": "schema_test", - "table": "adcreatives" - }, - "schema_test.adcreatives_adlabels.adlabels": { - "file": "adcreatives_adlabels", - "schema": "schema_test", - "table": "adcreatives_adlabels" - }, - "schema_test.adcreatives_image_crops.image_crops": { - "file": "adcreatives_image_crops", - "schema": "schema_test", - "table": "adcreatives_image_crops" - }, - "schema_test.adcreatives_image_crops_100x100.100x100": { - "file": "adcreatives_image_crops_100x100", - "schema": "schema_test", - "table": "adcreatives_image_crops_100x100" - }, - "schema_test.adcreatives_image_crops_100x72.100x72": { - "file": "adcreatives_image_crops_100x72", - "schema": "schema_test", - "table": "adcreatives_image_crops_100x72" - }, - "schema_test.adcreatives_image_crops_191x100.191x100": { - "file": "adcreatives_image_crops_191x100", - "schema": "schema_test", - "table": "adcreatives_image_crops_191x100" - }, - "schema_test.adcreatives_image_crops_400x150.400x150": { - "file": "adcreatives_image_crops_400x150", - "schema": "schema_test", - "table": "adcreatives_image_crops_400x150" - }, - "schema_test.adcreatives_image_crops_400x500.400x500": { - "file": "adcreatives_image_crops_400x500", - "schema": "schema_test", - "table": "adcreatives_image_crops_400x500" - }, - "schema_test.adcreatives_image_crops_600x360.600x360": { - "file": "adcreatives_image_crops_600x360", - "schema": "schema_test", - "table": "adcreatives_image_crops_600x360" - }, - "schema_test.adcreatives_image_crops_90x160.90x160": { - "file": "adcreatives_image_crops_90x160", - "schema": "schema_test", - "table": "adcreatives_image_crops_90x160" - }, - "schema_test.adcreatives_object_story_spec.object_story_spec": { - "file": "adcreatives_object_story_spec", - "schema": "schema_test", - "table": "adcreatives_object_story_spec" - }, - "schema_test.adcreatives_object_story_spec_link_data.link_data": { - "file": "adcreatives_object_story_spec_link_data", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data" - }, - "schema_test.adcreatives_object_story_spec_link_data_app_link_spec.app_link_spec": { - "file": "adcreatives_object_s__nk_data_app_link_spec", - "schema": "schema_test", - "table": "adcreatives_object_s__nk_data_app_link_spec" - }, - "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_android.android": { - "file": "adcreatives_object_s__app_link_spec_android", - "schema": "schema_test", - "table": "adcreatives_object_s__app_link_spec_android" - }, - "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ios.ios": { - "file": "adcreatives_object_s__ata_app_link_spec_ios", - "schema": "schema_test", - "table": "adcreatives_object_s__ata_app_link_spec_ios" - }, - "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ipad.ipad": { - "file": "adcreatives_object_s__ta_app_link_spec_ipad", - "schema": "schema_test", - "table": "adcreatives_object_s__ta_app_link_spec_ipad" - }, - "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_iphone.iphone": { - "file": "adcreatives_object_s___app_link_spec_iphone", - "schema": "schema_test", - "table": "adcreatives_object_s___app_link_spec_iphone" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops.image_crops": { - "file": "adcreatives_object_s__link_data_image_crops", - "schema": "schema_test", - "table": "adcreatives_object_s__link_data_image_crops" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_100x100.100x100": { - "file": "adcreatives_object_s__a_image_crops_100x100", - "schema": "schema_test", - "table": "adcreatives_object_s__a_image_crops_100x100" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_100x72.100x72": { - "file": "adcreatives_object_s__ta_image_crops_100x72", - "schema": "schema_test", - "table": "adcreatives_object_s__ta_image_crops_100x72" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_191x100.191x100": { - "file": "adcreatives_object_s__a_image_crops_191x100", - "schema": "schema_test", - "table": "adcreatives_object_s__a_image_crops_191x100" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_400x150.400x150": { - "file": "adcreatives_object_s__a_image_crops_400x150", - "schema": "schema_test", - "table": "adcreatives_object_s__a_image_crops_400x150" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_400x500.400x500": { - "file": "adcreatives_object_s__a_image_crops_400x500", - "schema": "schema_test", - "table": "adcreatives_object_s__a_image_crops_400x500" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_600x360.600x360": { - "file": "adcreatives_object_s__a_image_crops_600x360", - "schema": "schema_test", - "table": "adcreatives_object_s__a_image_crops_600x360" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_90x160.90x160": { - "file": "adcreatives_object_s__ta_image_crops_90x160", - "schema": "schema_test", - "table": "adcreatives_object_s__ta_image_crops_90x160" - } -} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_tidb_names.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_tidb_names.json deleted file mode 100644 index 2bbb864cc4d87..0000000000000 --- a/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_tidb_names.json +++ /dev/null @@ -1,252 +0,0 @@ -{ - "_airbyte_schema_test.adcreatives.adcreatives": { - "file": "adcreatives", - "schema": "_airbyte_schema_test", - "table": "adcreatives" - }, - "_airbyte_schema_test.adcreatives_adlabels.adlabels": { - "file": "adcreatives_adlabels", - "schema": "_airbyte_schema_test", - "table": "adcreatives_adlabels" - }, - "_airbyte_schema_test.adcreatives_image_crops.image_crops": { - "file": "adcreatives_image_crops", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops" - }, - "_airbyte_schema_test.adcreatives_image_crops_100x100.100x100": { - "file": "adcreatives_image_crops_100x100", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops_100x100" - }, - "_airbyte_schema_test.adcreatives_image_crops_100x72.100x72": { - "file": "adcreatives_image_crops_100x72", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops_100x72" - }, - "_airbyte_schema_test.adcreatives_image_crops_191x100.191x100": { - "file": "adcreatives_image_crops_191x100", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops_191x100" - }, - "_airbyte_schema_test.adcreatives_image_crops_400x150.400x150": { - "file": "adcreatives_image_crops_400x150", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops_400x150" - }, - "_airbyte_schema_test.adcreatives_image_crops_400x500.400x500": { - "file": "adcreatives_image_crops_400x500", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops_400x500" - }, - "_airbyte_schema_test.adcreatives_image_crops_600x360.600x360": { - "file": "adcreatives_image_crops_600x360", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops_600x360" - }, - "_airbyte_schema_test.adcreatives_image_crops_90x160.90x160": { - "file": "adcreatives_image_crops_90x160", - "schema": "_airbyte_schema_test", - "table": "adcreatives_image_crops_90x160" - }, - "_airbyte_schema_test.adcreatives_object_story_spec.object_story_spec": { - "file": "adcreatives_object_story_spec", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_story_spec" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data.link_data": { - "file": "adcreatives_object_story_spec_link_data", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_story_spec_link_data" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec.app_link_spec": { - "file": "adcreatives_object_st__nk_data_app_link_spec", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_st__nk_data_app_link_spec" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_android.android": { - "file": "adcreatives_object_st__app_link_spec_android", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_st__app_link_spec_android" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ios.ios": { - "file": "adcreatives_object_st__ata_app_link_spec_ios", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_st__ata_app_link_spec_ios" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ipad.ipad": { - "file": "adcreatives_object_st__ta_app_link_spec_ipad", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_st__ta_app_link_spec_ipad" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_iphone.iphone": { - "file": "adcreatives_object_st___app_link_spec_iphone", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_st___app_link_spec_iphone" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops.image_crops": { - "file": "adcreatives_object_st__link_data_image_crops", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_st__link_data_image_crops" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_100x100.100x100": { - "file": "adcreatives_object_st__a_image_crops_100x100", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_st__a_image_crops_100x100" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_100x72.100x72": { - "file": "adcreatives_object_st__ta_image_crops_100x72", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_st__ta_image_crops_100x72" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_191x100.191x100": { - "file": "adcreatives_object_st__a_image_crops_191x100", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_st__a_image_crops_191x100" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_400x150.400x150": { - "file": "adcreatives_object_st__a_image_crops_400x150", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_st__a_image_crops_400x150" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_400x500.400x500": { - "file": "adcreatives_object_st__a_image_crops_400x500", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_st__a_image_crops_400x500" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_600x360.600x360": { - "file": "adcreatives_object_st__a_image_crops_600x360", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_st__a_image_crops_600x360" - }, - "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_90x160.90x160": { - "file": "adcreatives_object_st__ta_image_crops_90x160", - "schema": "_airbyte_schema_test", - "table": "adcreatives_object_st__ta_image_crops_90x160" - }, - "schema_test.adcreatives.adcreatives": { - "file": "adcreatives", - "schema": "schema_test", - "table": "adcreatives" - }, - "schema_test.adcreatives_adlabels.adlabels": { - "file": "adcreatives_adlabels", - "schema": "schema_test", - "table": "adcreatives_adlabels" - }, - "schema_test.adcreatives_image_crops.image_crops": { - "file": "adcreatives_image_crops", - "schema": "schema_test", - "table": "adcreatives_image_crops" - }, - "schema_test.adcreatives_image_crops_100x100.100x100": { - "file": "adcreatives_image_crops_100x100", - "schema": "schema_test", - "table": "adcreatives_image_crops_100x100" - }, - "schema_test.adcreatives_image_crops_100x72.100x72": { - "file": "adcreatives_image_crops_100x72", - "schema": "schema_test", - "table": "adcreatives_image_crops_100x72" - }, - "schema_test.adcreatives_image_crops_191x100.191x100": { - "file": "adcreatives_image_crops_191x100", - "schema": "schema_test", - "table": "adcreatives_image_crops_191x100" - }, - "schema_test.adcreatives_image_crops_400x150.400x150": { - "file": "adcreatives_image_crops_400x150", - "schema": "schema_test", - "table": "adcreatives_image_crops_400x150" - }, - "schema_test.adcreatives_image_crops_400x500.400x500": { - "file": "adcreatives_image_crops_400x500", - "schema": "schema_test", - "table": "adcreatives_image_crops_400x500" - }, - "schema_test.adcreatives_image_crops_600x360.600x360": { - "file": "adcreatives_image_crops_600x360", - "schema": "schema_test", - "table": "adcreatives_image_crops_600x360" - }, - "schema_test.adcreatives_image_crops_90x160.90x160": { - "file": "adcreatives_image_crops_90x160", - "schema": "schema_test", - "table": "adcreatives_image_crops_90x160" - }, - "schema_test.adcreatives_object_story_spec.object_story_spec": { - "file": "adcreatives_object_story_spec", - "schema": "schema_test", - "table": "adcreatives_object_story_spec" - }, - "schema_test.adcreatives_object_story_spec_link_data.link_data": { - "file": "adcreatives_object_story_spec_link_data", - "schema": "schema_test", - "table": "adcreatives_object_story_spec_link_data" - }, - "schema_test.adcreatives_object_story_spec_link_data_app_link_spec.app_link_spec": { - "file": "adcreatives_object_st__nk_data_app_link_spec", - "schema": "schema_test", - "table": "adcreatives_object_st__nk_data_app_link_spec" - }, - "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_android.android": { - "file": "adcreatives_object_st__app_link_spec_android", - "schema": "schema_test", - "table": "adcreatives_object_st__app_link_spec_android" - }, - "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ios.ios": { - "file": "adcreatives_object_st__ata_app_link_spec_ios", - "schema": "schema_test", - "table": "adcreatives_object_st__ata_app_link_spec_ios" - }, - "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ipad.ipad": { - "file": "adcreatives_object_st__ta_app_link_spec_ipad", - "schema": "schema_test", - "table": "adcreatives_object_st__ta_app_link_spec_ipad" - }, - "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_iphone.iphone": { - "file": "adcreatives_object_st___app_link_spec_iphone", - "schema": "schema_test", - "table": "adcreatives_object_st___app_link_spec_iphone" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops.image_crops": { - "file": "adcreatives_object_st__link_data_image_crops", - "schema": "schema_test", - "table": "adcreatives_object_st__link_data_image_crops" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_100x100.100x100": { - "file": "adcreatives_object_st__a_image_crops_100x100", - "schema": "schema_test", - "table": "adcreatives_object_st__a_image_crops_100x100" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_100x72.100x72": { - "file": "adcreatives_object_st__ta_image_crops_100x72", - "schema": "schema_test", - "table": "adcreatives_object_st__ta_image_crops_100x72" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_191x100.191x100": { - "file": "adcreatives_object_st__a_image_crops_191x100", - "schema": "schema_test", - "table": "adcreatives_object_st__a_image_crops_191x100" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_400x150.400x150": { - "file": "adcreatives_object_st__a_image_crops_400x150", - "schema": "schema_test", - "table": "adcreatives_object_st__a_image_crops_400x150" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_400x500.400x500": { - "file": "adcreatives_object_st__a_image_crops_400x500", - "schema": "schema_test", - "table": "adcreatives_object_st__a_image_crops_400x500" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_600x360.600x360": { - "file": "adcreatives_object_st__a_image_crops_600x360", - "schema": "schema_test", - "table": "adcreatives_object_st__a_image_crops_600x360" - }, - "schema_test.adcreatives_object_story_spec_link_data_image_crops_90x160.90x160": { - "file": "adcreatives_object_st__ta_image_crops_90x160", - "schema": "schema_test", - "table": "adcreatives_object_st__ta_image_crops_90x160" - } -} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog.json deleted file mode 100644 index 336cf17d71941..0000000000000 --- a/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog.json +++ /dev/null @@ -1,92 +0,0 @@ -{ - "streams": [ - { - "stream": { - "name": "simple stream name", - "namespace": "namespace", - "json_schema": { - "type": ["null", "object"], - "properties": { - "id": { - "type": ["null", "string"] - } - } - }, - "supported_sync_modes": ["incremental"], - "source_defined_cursor": true, - "default_cursor_field": [] - }, - "sync_mode": "incremental", - "cursor_field": [], - "destination_sync_mode": "append" - }, - { - "stream": { - "name": "simple", - "namespace": "namespace", - "json_schema": { - "type": ["null", "object"], - "properties": { - "id": { - "type": ["null", "string"] - }, - "stream_name": { - "type": ["null", "object"], - "properties": { - "id": { - "type": ["null", "string"] - } - } - } - } - }, - "supported_sync_modes": ["incremental"], - "source_defined_cursor": true, - "default_cursor_field": [] - }, - "sync_mode": "incremental", - "cursor_field": [], - "destination_sync_mode": "append" - }, - { - "stream": { - "name": "simple_b94_stream_name", - "namespace": "other_namespace", - "json_schema": { - "type": ["null", "object"], - "properties": { - "id": { - "type": ["null", "string"] - } - } - }, - "supported_sync_modes": ["incremental"], - "source_defined_cursor": true, - "default_cursor_field": [] - }, - "sync_mode": "incremental", - "cursor_field": [], - "destination_sync_mode": "append" - }, - { - "stream": { - "name": "simple_b94_stream_name", - "namespace": "yet_another_namespace_with_a_very_long_name", - "json_schema": { - "type": ["null", "object"], - "properties": { - "id": { - "type": ["null", "string"] - } - } - }, - "supported_sync_modes": ["incremental"], - "source_defined_cursor": true, - "default_cursor_field": [] - }, - "sync_mode": "incremental", - "cursor_field": [], - "destination_sync_mode": "append" - } - ] -} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog_expected_clickhouse_names.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog_expected_clickhouse_names.json deleted file mode 100644 index 047c8cb29a298..0000000000000 --- a/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog_expected_clickhouse_names.json +++ /dev/null @@ -1,52 +0,0 @@ -{ - "_airbyte_namespace.simple stream name.simple stream name": { - "file": "simple_stream_name_f35", - "schema": "_airbyte_namespace", - "table": "simple_stream_name_f35" - }, - "_airbyte_namespace.simple.simple": { - "file": "simple", - "schema": "_airbyte_namespace", - "table": "simple" - }, - "_airbyte_namespace.simple_stream_name.stream_name": { - "file": "_airbyte_namespace_simple_b94_stream_name", - "schema": "_airbyte_namespace", - "table": "simple_b94_stream_name" - }, - "_airbyte_other_namespace.simple_b94_stream_name.simple_b94_stream_name": { - "file": "_airbyte_other_names__e_b94_stream_name_f9d", - "schema": "_airbyte_other_namespace", - "table": "simple_b94_stream_name" - }, - "_airbyte_yet_another_namespace_with_a_very_long_name.simple_b94_stream_name.simple_b94_stream_name": { - "file": "_airbyte_yet_another__e_b94_stream_name_bae", - "schema": "_airbyte_yet_another_namespace_with_a_very_long_name", - "table": "simple_b94_stream_name" - }, - "namespace.simple stream name.simple stream name": { - "file": "simple_stream_name_f35", - "schema": "namespace", - "table": "simple_stream_name_f35" - }, - "namespace.simple.simple": { - "file": "simple", - "schema": "namespace", - "table": "simple" - }, - "namespace.simple_stream_name.stream_name": { - "file": "namespace_simple_b94_stream_name", - "schema": "namespace", - "table": "simple_b94_stream_name" - }, - "other_namespace.simple_b94_stream_name.simple_b94_stream_name": { - "file": "other_namespace_simple_b94_stream_name", - "schema": "other_namespace", - "table": "simple_b94_stream_name" - }, - "yet_another_namespace_with_a_very_long_name.simple_b94_stream_name.simple_b94_stream_name": { - "file": "yet_another_namespac__e_b94_stream_name_5d1", - "schema": "yet_another_namespace_with_a_very_long_name", - "table": "simple_b94_stream_name" - } -} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog_expected_duckdb_names.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog_expected_duckdb_names.json deleted file mode 100644 index 0ae55a762fd8c..0000000000000 --- a/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog_expected_duckdb_names.json +++ /dev/null @@ -1,52 +0,0 @@ -{ - "_airbyte_namespace.simple stream name.simple stream name": { - "file": "simple_stream_name_f35", - "schema": "_airbyte_namespace", - "table": "simple_stream_name_f35" - }, - "namespace.simple stream name.simple stream name": { - "file": "simple_stream_name_f35", - "schema": "namespace", - "table": "simple_stream_name_f35" - }, - "_airbyte_namespace.simple_stream_name.stream_name": { - "file": "_airbyte_namespace_simple_b94_stream_name", - "schema": "_airbyte_namespace", - "table": "simple_b94_stream_name" - }, - "namespace.simple_stream_name.stream_name": { - "file": "namespace_simple_b94_stream_name", - "schema": "namespace", - "table": "simple_b94_stream_name" - }, - "_airbyte_namespace.simple.simple": { - "file": "simple", - "schema": "_airbyte_namespace", - "table": "simple" - }, - "namespace.simple.simple": { - "file": "simple", - "schema": "namespace", - "table": "simple" - }, - "_airbyte_other_namespace.simple_b94_stream_name.simple_b94_stream_name": { - "file": "_airbyte_other_namesp__e_b94_stream_name_f9d", - "schema": "_airbyte_other_namespace", - "table": "simple_b94_stream_name" - }, - "other_namespace.simple_b94_stream_name.simple_b94_stream_name": { - "file": "other_namespace_simple_b94_stream_name", - "schema": "other_namespace", - "table": "simple_b94_stream_name" - }, - "_airbyte_yet_another_namespace_with_a_very_long_name.simple_b94_stream_name.simple_b94_stream_name": { - "file": "_airbyte_yet_another___e_b94_stream_name_bae", - "schema": "_airbyte_yet_another_namespace_with_a_very_long_name", - "table": "simple_b94_stream_name" - }, - "yet_another_namespace_with_a_very_long_name.simple_b94_stream_name.simple_b94_stream_name": { - "file": "yet_another_namespace__e_b94_stream_name_5d1", - "schema": "yet_another_namespace_with_a_very_long_name", - "table": "simple_b94_stream_name" - } -} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog_expected_mssql_names.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog_expected_mssql_names.json deleted file mode 100644 index 0ae55a762fd8c..0000000000000 --- a/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog_expected_mssql_names.json +++ /dev/null @@ -1,52 +0,0 @@ -{ - "_airbyte_namespace.simple stream name.simple stream name": { - "file": "simple_stream_name_f35", - "schema": "_airbyte_namespace", - "table": "simple_stream_name_f35" - }, - "namespace.simple stream name.simple stream name": { - "file": "simple_stream_name_f35", - "schema": "namespace", - "table": "simple_stream_name_f35" - }, - "_airbyte_namespace.simple_stream_name.stream_name": { - "file": "_airbyte_namespace_simple_b94_stream_name", - "schema": "_airbyte_namespace", - "table": "simple_b94_stream_name" - }, - "namespace.simple_stream_name.stream_name": { - "file": "namespace_simple_b94_stream_name", - "schema": "namespace", - "table": "simple_b94_stream_name" - }, - "_airbyte_namespace.simple.simple": { - "file": "simple", - "schema": "_airbyte_namespace", - "table": "simple" - }, - "namespace.simple.simple": { - "file": "simple", - "schema": "namespace", - "table": "simple" - }, - "_airbyte_other_namespace.simple_b94_stream_name.simple_b94_stream_name": { - "file": "_airbyte_other_namesp__e_b94_stream_name_f9d", - "schema": "_airbyte_other_namespace", - "table": "simple_b94_stream_name" - }, - "other_namespace.simple_b94_stream_name.simple_b94_stream_name": { - "file": "other_namespace_simple_b94_stream_name", - "schema": "other_namespace", - "table": "simple_b94_stream_name" - }, - "_airbyte_yet_another_namespace_with_a_very_long_name.simple_b94_stream_name.simple_b94_stream_name": { - "file": "_airbyte_yet_another___e_b94_stream_name_bae", - "schema": "_airbyte_yet_another_namespace_with_a_very_long_name", - "table": "simple_b94_stream_name" - }, - "yet_another_namespace_with_a_very_long_name.simple_b94_stream_name.simple_b94_stream_name": { - "file": "yet_another_namespace__e_b94_stream_name_5d1", - "schema": "yet_another_namespace_with_a_very_long_name", - "table": "simple_b94_stream_name" - } -} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog_expected_mysql_names.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog_expected_mysql_names.json deleted file mode 100644 index 0ae55a762fd8c..0000000000000 --- a/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog_expected_mysql_names.json +++ /dev/null @@ -1,52 +0,0 @@ -{ - "_airbyte_namespace.simple stream name.simple stream name": { - "file": "simple_stream_name_f35", - "schema": "_airbyte_namespace", - "table": "simple_stream_name_f35" - }, - "namespace.simple stream name.simple stream name": { - "file": "simple_stream_name_f35", - "schema": "namespace", - "table": "simple_stream_name_f35" - }, - "_airbyte_namespace.simple_stream_name.stream_name": { - "file": "_airbyte_namespace_simple_b94_stream_name", - "schema": "_airbyte_namespace", - "table": "simple_b94_stream_name" - }, - "namespace.simple_stream_name.stream_name": { - "file": "namespace_simple_b94_stream_name", - "schema": "namespace", - "table": "simple_b94_stream_name" - }, - "_airbyte_namespace.simple.simple": { - "file": "simple", - "schema": "_airbyte_namespace", - "table": "simple" - }, - "namespace.simple.simple": { - "file": "simple", - "schema": "namespace", - "table": "simple" - }, - "_airbyte_other_namespace.simple_b94_stream_name.simple_b94_stream_name": { - "file": "_airbyte_other_namesp__e_b94_stream_name_f9d", - "schema": "_airbyte_other_namespace", - "table": "simple_b94_stream_name" - }, - "other_namespace.simple_b94_stream_name.simple_b94_stream_name": { - "file": "other_namespace_simple_b94_stream_name", - "schema": "other_namespace", - "table": "simple_b94_stream_name" - }, - "_airbyte_yet_another_namespace_with_a_very_long_name.simple_b94_stream_name.simple_b94_stream_name": { - "file": "_airbyte_yet_another___e_b94_stream_name_bae", - "schema": "_airbyte_yet_another_namespace_with_a_very_long_name", - "table": "simple_b94_stream_name" - }, - "yet_another_namespace_with_a_very_long_name.simple_b94_stream_name.simple_b94_stream_name": { - "file": "yet_another_namespace__e_b94_stream_name_5d1", - "schema": "yet_another_namespace_with_a_very_long_name", - "table": "simple_b94_stream_name" - } -} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog_expected_names.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog_expected_names.json deleted file mode 100644 index ec95f346d6d74..0000000000000 --- a/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog_expected_names.json +++ /dev/null @@ -1,52 +0,0 @@ -{ - "_airbyte_namespace.simple stream name.simple stream name": { - "file": "simple_stream_name_f35", - "schema": "_airbyte_namespace", - "table": "simple_stream_name_f35" - }, - "_airbyte_namespace.simple.simple": { - "file": "simple", - "schema": "_airbyte_namespace", - "table": "simple" - }, - "_airbyte_namespace.simple_stream_name.stream_name": { - "file": "_airbyte_namespace_simple_b94_stream_name", - "schema": "_airbyte_namespace", - "table": "simple_b94_stream_name" - }, - "_airbyte_other_namespace.simple_b94_stream_name.simple_b94_stream_name": { - "file": "_airbyte_other_namespace_simple_b94_stream_name", - "schema": "_airbyte_other_namespace", - "table": "simple_b94_stream_name" - }, - "_airbyte_yet_another_namespace_with_a_very_long_name.simple_b94_stream_name.simple_b94_stream_name": { - "file": "_airbyte_yet_another_namespace_with_a_very_long_name_simple_b94_stream_name", - "schema": "_airbyte_yet_another_namespace_with_a_very_long_name", - "table": "simple_b94_stream_name" - }, - "namespace.simple stream name.simple stream name": { - "file": "simple_stream_name_f35", - "schema": "namespace", - "table": "simple_stream_name_f35" - }, - "namespace.simple.simple": { - "file": "simple", - "schema": "namespace", - "table": "simple" - }, - "namespace.simple_stream_name.stream_name": { - "file": "namespace_simple_b94_stream_name", - "schema": "namespace", - "table": "simple_b94_stream_name" - }, - "other_namespace.simple_b94_stream_name.simple_b94_stream_name": { - "file": "other_namespace_simple_b94_stream_name", - "schema": "other_namespace", - "table": "simple_b94_stream_name" - }, - "yet_another_namespace_with_a_very_long_name.simple_b94_stream_name.simple_b94_stream_name": { - "file": "yet_another_namespace_with_a_very_long_name_simple_b94_stream_name", - "schema": "yet_another_namespace_with_a_very_long_name", - "table": "simple_b94_stream_name" - } -} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog_expected_oracle_names.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog_expected_oracle_names.json deleted file mode 100644 index 397069ffdb961..0000000000000 --- a/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog_expected_oracle_names.json +++ /dev/null @@ -1,27 +0,0 @@ -{ - "namespace.simple stream name.simple stream name": { - "file": "simple_stream_name_f35", - "schema": "namespace", - "table": "simple_stream_name_f35" - }, - "namespace.simple.simple": { - "file": "simple", - "schema": "namespace", - "table": "simple" - }, - "namespace.simple_stream_name.stream_name": { - "file": "namespace_simple_b94_stream_name", - "schema": "namespace", - "table": "simple_b94_stream_name" - }, - "other_namespace.simple_b94_stream_name.simple_b94_stream_name": { - "file": "other_namespace_simple_b94_stream_name", - "schema": "other_namespace", - "table": "simple_b94_stream_name" - }, - "yet_another_namespace_with_a_very_long_name.simple_b94_stream_name.simple_b94_stream_name": { - "file": "yet_another_namespace_with_a_very_long_name_simple_b94_stream_name", - "schema": "yet_another_namespace_with_a_very_long_name", - "table": "simple_b94_stream_name" - } -} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog_expected_postgres_names.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog_expected_postgres_names.json deleted file mode 100644 index 047c8cb29a298..0000000000000 --- a/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog_expected_postgres_names.json +++ /dev/null @@ -1,52 +0,0 @@ -{ - "_airbyte_namespace.simple stream name.simple stream name": { - "file": "simple_stream_name_f35", - "schema": "_airbyte_namespace", - "table": "simple_stream_name_f35" - }, - "_airbyte_namespace.simple.simple": { - "file": "simple", - "schema": "_airbyte_namespace", - "table": "simple" - }, - "_airbyte_namespace.simple_stream_name.stream_name": { - "file": "_airbyte_namespace_simple_b94_stream_name", - "schema": "_airbyte_namespace", - "table": "simple_b94_stream_name" - }, - "_airbyte_other_namespace.simple_b94_stream_name.simple_b94_stream_name": { - "file": "_airbyte_other_names__e_b94_stream_name_f9d", - "schema": "_airbyte_other_namespace", - "table": "simple_b94_stream_name" - }, - "_airbyte_yet_another_namespace_with_a_very_long_name.simple_b94_stream_name.simple_b94_stream_name": { - "file": "_airbyte_yet_another__e_b94_stream_name_bae", - "schema": "_airbyte_yet_another_namespace_with_a_very_long_name", - "table": "simple_b94_stream_name" - }, - "namespace.simple stream name.simple stream name": { - "file": "simple_stream_name_f35", - "schema": "namespace", - "table": "simple_stream_name_f35" - }, - "namespace.simple.simple": { - "file": "simple", - "schema": "namespace", - "table": "simple" - }, - "namespace.simple_stream_name.stream_name": { - "file": "namespace_simple_b94_stream_name", - "schema": "namespace", - "table": "simple_b94_stream_name" - }, - "other_namespace.simple_b94_stream_name.simple_b94_stream_name": { - "file": "other_namespace_simple_b94_stream_name", - "schema": "other_namespace", - "table": "simple_b94_stream_name" - }, - "yet_another_namespace_with_a_very_long_name.simple_b94_stream_name.simple_b94_stream_name": { - "file": "yet_another_namespac__e_b94_stream_name_5d1", - "schema": "yet_another_namespace_with_a_very_long_name", - "table": "simple_b94_stream_name" - } -} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog_expected_tidb_names.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog_expected_tidb_names.json deleted file mode 100644 index 0ae55a762fd8c..0000000000000 --- a/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog_expected_tidb_names.json +++ /dev/null @@ -1,52 +0,0 @@ -{ - "_airbyte_namespace.simple stream name.simple stream name": { - "file": "simple_stream_name_f35", - "schema": "_airbyte_namespace", - "table": "simple_stream_name_f35" - }, - "namespace.simple stream name.simple stream name": { - "file": "simple_stream_name_f35", - "schema": "namespace", - "table": "simple_stream_name_f35" - }, - "_airbyte_namespace.simple_stream_name.stream_name": { - "file": "_airbyte_namespace_simple_b94_stream_name", - "schema": "_airbyte_namespace", - "table": "simple_b94_stream_name" - }, - "namespace.simple_stream_name.stream_name": { - "file": "namespace_simple_b94_stream_name", - "schema": "namespace", - "table": "simple_b94_stream_name" - }, - "_airbyte_namespace.simple.simple": { - "file": "simple", - "schema": "_airbyte_namespace", - "table": "simple" - }, - "namespace.simple.simple": { - "file": "simple", - "schema": "namespace", - "table": "simple" - }, - "_airbyte_other_namespace.simple_b94_stream_name.simple_b94_stream_name": { - "file": "_airbyte_other_namesp__e_b94_stream_name_f9d", - "schema": "_airbyte_other_namespace", - "table": "simple_b94_stream_name" - }, - "other_namespace.simple_b94_stream_name.simple_b94_stream_name": { - "file": "other_namespace_simple_b94_stream_name", - "schema": "other_namespace", - "table": "simple_b94_stream_name" - }, - "_airbyte_yet_another_namespace_with_a_very_long_name.simple_b94_stream_name.simple_b94_stream_name": { - "file": "_airbyte_yet_another___e_b94_stream_name_bae", - "schema": "_airbyte_yet_another_namespace_with_a_very_long_name", - "table": "simple_b94_stream_name" - }, - "yet_another_namespace_with_a_very_long_name.simple_b94_stream_name.simple_b94_stream_name": { - "file": "yet_another_namespace__e_b94_stream_name_5d1", - "schema": "yet_another_namespace_with_a_very_long_name", - "table": "simple_b94_stream_name" - } -} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/test_destination_name_transformer.py b/airbyte-integrations/bases/base-normalization/unit_tests/test_destination_name_transformer.py deleted file mode 100644 index 22e590b29fab9..0000000000000 --- a/airbyte-integrations/bases/base-normalization/unit_tests/test_destination_name_transformer.py +++ /dev/null @@ -1,251 +0,0 @@ -# -# Copyright (c) 2023 Airbyte, Inc., all rights reserved. -# - - -import os - -import pytest -from normalization.destination_type import DestinationType -from normalization.transform_catalog.destination_name_transformer import ( - DestinationNameTransformer, - strip_accents, - transform_standard_naming, -) - - -@pytest.fixture(scope="function", autouse=True) -def before_tests(request): - # This makes the test run whether it is executed from the tests folder (with pytest/gradle) - # or from the base-normalization folder (through pycharm) - unit_tests_dir = os.path.join(request.fspath.dirname, "unit_tests") - if os.path.exists(unit_tests_dir): - os.chdir(unit_tests_dir) - else: - os.chdir(request.fspath.dirname) - yield - os.chdir(request.config.invocation_dir) - - -@pytest.mark.parametrize( - "input_str, destination_type, expected", - [ - # Contains Space character - ("Hello World", "Postgres", True), - ("Hello World", "BigQuery", False), - ("Hello World", "Snowflake", True), - ("Hello World", "Redshift", True), - ("Hello World", "MySQL", True), - ("Hello World", "MSSQL", True), - ("Hello World", "TiDB", True), - ("Hello World", "DuckDB", True), - # Reserved Word for BigQuery and MySQL only - ("Groups", "Postgres", False), - ("Groups", "BigQuery", True), - ("Groups", "Snowflake", False), - ("Groups", "Redshift", False), - ("Groups", "MySQL", True), - ("Groups", "MSSQL", False), - ("Groups", "TiDB", True), - ("Groups", "DuckDB", True), - # Doesnt start with alpha or underscore - ("100x200", "Postgres", True), - ("100x200", "BigQuery", False), - ("100x200", "Snowflake", True), - ("100x200", "Redshift", True), - ("100x200", "MySQL", True), - ("100x200", "MSSQL", True), - ("100x200", "TiDB", True), - ("100x200", "DuckDB", True), - # Contains non alpha numeric - ("post.wall", "Postgres", True), - ("post.wall", "BigQuery", False), - ("post.wall", "Snowflake", True), - ("post.wall", "Redshift", True), - ("post.wall", "MySQL", True), - ("post.wall", "MSSQL", True), - ("post.wall", "TiDB", True), - ("post.wall", "DuckDB", True), - ], -) -def test_needs_quote(input_str: str, destination_type: str, expected: bool): - name_transformer = DestinationNameTransformer(DestinationType.from_string(destination_type)) - assert name_transformer.needs_quotes(input_str) == expected - - -@pytest.mark.parametrize( - "input_str, expected", - [ - ("Hello World!", "Hello World!"), - ("àêî öÙ", "aei oU"), - ], -) -def test_strip_accents(input_str: str, expected: str): - assert strip_accents(input_str) == expected - - -@pytest.mark.parametrize( - "expected, input_str", - [ - ("__identifier_name", "__identifier_name"), - ("IDENTIFIER_NAME", "IDENTIFIER_NAME"), - ("123identifier_name", "123identifier_name"), - ("i0d0e0n0t0i0f0i0e0r0n0a0m0e", "i0d0e0n0t0i0f0i0e0r0n0a0m0e"), - ("_identifier_name", ",identifier+name"), - ("identifier_name", "identifiêr name"), - ("a_unicode_name__", "a_unicode_name_文"), - ("identifier__name__", "identifier__name__"), - ("identifier_name_weee", "identifier-name.weee"), - ("_identifier_name_", '"identifier name"'), - ("identifier_name", "identifier name"), - ("identifier_", "identifier%"), - ("_identifier_", "`identifier`"), - ], -) -def test_transform_standard_naming(input_str: str, expected: str): - assert transform_standard_naming(input_str) == expected - - -@pytest.mark.parametrize( - "input_str, destination_type, expected, expected_in_jinja", - [ - # Case sensitive names - ("Identifier Name", "Postgres", "{{ adapter.quote('Identifier Name') }}", "adapter.quote('Identifier Name')"), - ("Identifier Name", "BigQuery", "Identifier_Name", "'Identifier_Name'"), - ("Identifier Name", "Snowflake", "{{ adapter.quote('Identifier Name') }}", "adapter.quote('Identifier Name')"), - ("Identifier Name", "Redshift", "{{ adapter.quote('identifier name') }}", "adapter.quote('identifier name')"), - ("Identifier Name", "MySQL", "{{ adapter.quote('Identifier Name') }}", "adapter.quote('Identifier Name')"), - ("Identifier Name", "MSSQL", "{{ adapter.quote('Identifier Name') }}", "adapter.quote('Identifier Name')"), - ("Identifier Name", "TiDB", "{{ adapter.quote('Identifier Name') }}", "adapter.quote('Identifier Name')"), - ("Identifier Name", "DuckDB", "{{ adapter.quote('Identifier Name') }}", "adapter.quote('Identifier Name')"), - # Reserved Word for BigQuery and MySQL only - ("Groups", "Postgres", "groups", "'groups'"), - ("Groups", "BigQuery", "{{ adapter.quote('Groups') }}", "adapter.quote('Groups')"), - ("Groups", "Snowflake", "GROUPS", "'GROUPS'"), - ("Groups", "Redshift", "groups", "'groups'"), - ("Groups", "MySQL", "{{ adapter.quote('Groups') }}", "adapter.quote('Groups')"), - ("Groups", "MSSQL", "groups", "'groups'"), - ("Groups", "TiDB", "{{ adapter.quote('Groups') }}", "adapter.quote('Groups')"), - ("Groups", "DuckDB", "{{ adapter.quote('Groups') }}", "adapter.quote('Groups')"), - ], -) -def test_normalize_column_name(input_str: str, destination_type: str, expected: str, expected_in_jinja: str): - t = DestinationType.from_string(destination_type) - assert DestinationNameTransformer(t).normalize_column_name(input_str, in_jinja=False) == expected - assert DestinationNameTransformer(t).normalize_column_name(input_str, in_jinja=True) == expected_in_jinja - - -@pytest.mark.parametrize( - "input_str, expected", - [ - # below the limit - ("Aaaa_Bbbb_Cccc_Dddd_Eeee_Ffff_Gggg_Hhhh", "Aaaa_Bbbb_Cccc_Dddd_Eeee_Ffff_Gggg_Hhhh"), - # at the limit - ("Aaaa_Bbbb_Cccc_Dddd_Eeee_Ffff_Gggg_Hhhh_Iii", "Aaaa_Bbbb_Cccc_Dddd_Eeee_Ffff_Gggg_Hhhh_Iii"), - # over the limit - ("Aaaa_Bbbb_Cccc_Dddd_Eeee_Ffff_Gggg_Hhhh_Iiii", "Aaaa_Bbbb_Cccc_Dddd___e_Ffff_Gggg_Hhhh_Iiii"), - ("Aaaa_Bbbb_Cccc_Dddd_Eeee_a_very_long_name_Ffff_Gggg_Hhhh_Iiii", "Aaaa_Bbbb_Cccc_Dddd___e_Ffff_Gggg_Hhhh_Iiii"), - ("Aaaa_Bbbb_Cccc_Dddd_Eeee_Ffff_Gggg_Hhhh_Iiii_Jjjj_Kkkk", "Aaaa_Bbbb_Cccc_Dddd___g_Hhhh_Iiii_Jjjj_Kkkk"), - ("ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz_0123456789", "ABCDEFGHIJKLMNOPQRST__qrstuvwxyz_0123456789"), - ], -) -def test_truncate_identifier(input_str: str, expected: str): - """ - Rules about truncations, for example for both of these strings which are too long for the postgres 64 limit: - - `Aaaa_Bbbb_Cccc_Dddd_Eeee_Ffff_Gggg_Hhhh_Iiii` - - `Aaaa_Bbbb_Cccc_Dddd_Eeee_a_very_long_name_Ffff_Gggg_Hhhh_Iiii` - - Deciding on how to truncate (in the middle) are being verified in these tests. - In this instance, both strings ends up as:`Aaaa_Bbbb_Cccc_Dddd___e_Ffff_Gggg_Hhhh_Iiii` - and can potentially cause a collision in table names. - - Note that dealing with such collisions is not part of `destination_name_transformer` but of the `stream_processor`. - """ - name_transformer = DestinationNameTransformer(DestinationType.POSTGRES) - print(f"Truncating from #{len(input_str)} to #{len(expected)}") - assert name_transformer.truncate_identifier_name(input_str) == expected - - -@pytest.mark.parametrize( - "input_str, destination_type, expected, expected_column", - [ - # Case sensitive names - ("Identifier Name1", "Postgres", "identifier_name1", "{{ adapter.quote('Identifier Name1') }}"), - ("Identifier Name2", "BigQuery", "Identifier_Name2", "Identifier_Name2"), - ("Identifier Name3", "Snowflake", "IDENTIFIER_NAME3", "{{ adapter.quote('Identifier Name3') }}"), - ("Identifier Name4", "Redshift", "identifier_name4", "{{ adapter.quote('identifier name4') }}"), - ("Identifier Name5", "MySQL", "identifier_name5", "{{ adapter.quote('Identifier Name5') }}"), - ("Identifier Name6", "MSSQL", "identifier_name6", "{{ adapter.quote('Identifier Name6') }}"), - ("Identifier Name7", "TiDB", "identifier_name7", "{{ adapter.quote('Identifier Name7') }}"), - ("Identifier Name8", "DuckDB", "identifier_name8", "{{ adapter.quote('Identifier Name8') }}"), - # Unicode - ("a-Unicode_name_文1", "Postgres", "a_unicode_name__1", "{{ adapter.quote('a-Unicode_name_文1') }}"), - ("a-Unicode_name_文2", "BigQuery", "a_Unicode_name__2", "a_Unicode_name__2"), - ("a-Unicode_name_文3", "Snowflake", "A_UNICODE_NAME__3", "{{ adapter.quote('a-Unicode_name_文3') }}"), - ("a-Unicode_name_文4", "Redshift", "a_unicode_name__4", "{{ adapter.quote('a-unicode_name_文4') }}"), - ("a-Unicode_name_文5", "MySQL", "a_unicode_name__5", "{{ adapter.quote('a-Unicode_name_文5') }}"), - ("a-Unicode_name_文6", "MSSQL", "a_unicode_name__6", "{{ adapter.quote('a-Unicode_name_文6') }}"), - ("a-Unicode_name_文7", "TiDB", "a_unicode_name__7", "{{ adapter.quote('a-Unicode_name_文7') }}"), - ("a-Unicode_name_文8", "DuckDB", "a_unicode_name__8", "{{ adapter.quote('a-Unicode_name_文8') }}"), - # Doesnt start with alpha or underscore - ("100x2001", "Postgres", "100x2001", "{{ adapter.quote('100x2001') }}"), - ("100x2002", "BigQuery", "100x2002", "_100x2002"), - ("文2_a-Unicode_name", "BigQuery", "_2_a_Unicode_name", "_2_a_Unicode_name"), - ("100x2003", "Snowflake", "100x2003", "{{ adapter.quote('100x2003') }}"), - ("100x2004", "Redshift", "100x2004", "{{ adapter.quote('100x2004') }}"), - ("100x2005", "MySQL", "100x2005", "{{ adapter.quote('100x2005') }}"), - ("100x2006", "MSSQL", "_100x2006", "{{ adapter.quote('100x2006') }}"), - ("100x2007", "TiDB", "100x2007", "{{ adapter.quote('100x2007') }}"), - ("100x2008", "DuckDB", "100x2008", "{{ adapter.quote('100x2008') }}"), - # Reserved Keywords in BQ and MySQL - ("Groups", "Postgres", "groups", "groups"), - ("Groups", "BigQuery", "Groups", "{{ adapter.quote('Groups') }}"), - ("Groups", "Snowflake", "GROUPS", "GROUPS"), - ("Groups", "Redshift", "groups", "groups"), - ("Groups", "MySQL", "Groups", "{{ adapter.quote('Groups') }}"), - ("Groups", "MSSQL", "groups", "groups"), - ("Groups", "TiDB", "Groups", "{{ adapter.quote('Groups') }}"), - ("Groups", "DuckDB", "Groups", "{{ adapter.quote('Groups') }}"), - # Reserved Keywords - ("DisTincT", "Postgres", "DisTincT", "{{ adapter.quote('DisTincT') }}"), - ("DisTincT", "BigQuery", "DisTincT", "{{ adapter.quote('DisTincT') }}"), - ("DisTincT", "Snowflake", "DisTincT", "{{ adapter.quote('DisTincT') }}"), - ("DisTincT", "Redshift", "distinct", "{{ adapter.quote('distinct') }}"), - ("DisTincT", "MySQL", "DisTincT", "{{ adapter.quote('DisTincT') }}"), - ("DisTincT", "MSSQL", "DisTincT", "{{ adapter.quote('DisTincT') }}"), - ("DisTincT", "TiDB", "DisTincT", "{{ adapter.quote('DisTincT') }}"), - ("DisTincT", "DuckDB", "DisTincT", "{{ adapter.quote('DisTincT') }}"), - # Quoted identifiers - ("'QuoTed1 IdenTifiER'", "Postgres", "_quoted1_identifier_", "{{ adapter.quote('\\'QuoTed1 IdenTifiER\\'') }}"), - ("'QuoTed2 IdenTifiER'", "BigQuery", "_QuoTed2_IdenTifiER_", "_QuoTed2_IdenTifiER_"), - ("'QuoTed3 IdenTifiER'", "Snowflake", "_QUOTED3_IDENTIFIER_", "{{ adapter.quote('\\'QuoTed3 IdenTifiER\\'') }}"), - ("'QuoTed4 IdenTifiER'", "Redshift", "_quoted4_identifier_", "{{ adapter.quote('\\'quoted4 identifier\\'') }}"), - ("'QuoTed5 IdenTifiER'", "MySQL", "_quoted5_identifier_", "{{ adapter.quote('\\'QuoTed5 IdenTifiER\\'') }}"), - ("'QuoTed6 IdenTifiER'", "MSSQL", "_quoted6_identifier_", "{{ adapter.quote('\\'QuoTed6 IdenTifiER\\'') }}"), - ("'QuoTed7 IdenTifiER'", "TiDB", "_quoted7_identifier_", "{{ adapter.quote('\\'QuoTed7 IdenTifiER\\'') }}"), - ("'QuoTed8 IdenTifiER'", "DuckDB", "_quoted8_identifier_", "{{ adapter.quote('\\'QuoTed8 IdenTifiER\\'') }}"), - # Double Quoted identifiers - ('"QuoTed7 IdenTifiER"', "Postgres", "_quoted7_identifier_", '{{ adapter.quote(\'""QuoTed7 IdenTifiER""\') }}'), - ('"QuoTed8 IdenTifiER"', "BigQuery", "_QuoTed8_IdenTifiER_", "_QuoTed8_IdenTifiER_"), - ('"QuoTed9 IdenTifiER"', "Snowflake", "_QUOTED9_IDENTIFIER_", '{{ adapter.quote(\'""QuoTed9 IdenTifiER""\') }}'), - ('"QuoTed10 IdenTifiER"', "Redshift", "_quoted10_identifier_", '{{ adapter.quote(\'""quoted10 identifier""\') }}'), - ('"QuoTed11 IdenTifiER"', "MySQL", "_quoted11_identifier_", "{{ adapter.quote('\"QuoTed11 IdenTifiER\"') }}"), - ('"QuoTed12 IdenTifiER"', "MSSQL", "_quoted12_identifier_", '{{ adapter.quote(\'""QuoTed12 IdenTifiER""\') }}'), - ('"QuoTed13 IdenTifiER"', "TiDB", "_quoted13_identifier_", "{{ adapter.quote('\"QuoTed13 IdenTifiER\"') }}"), - ('"QuoTed14 IdenTifiER"', "DuckDB", "_quoted14_identifier_", "{{ adapter.quote('\"QuoTed14 IdenTifiER\"') }}"), - # Back Quoted identifiers - ("`QuoTed13 IdenTifiER`", "Postgres", "_quoted13_identifier_", "{{ adapter.quote('`QuoTed13 IdenTifiER`') }}"), - ("`QuoTed14 IdenTifiER`", "BigQuery", "_QuoTed14_IdenTifiER_", "_QuoTed14_IdenTifiER_"), - ("`QuoTed15 IdenTifiER`", "Snowflake", "_QUOTED15_IDENTIFIER_", "{{ adapter.quote('`QuoTed15 IdenTifiER`') }}"), - ("`QuoTed16 IdenTifiER`", "Redshift", "_quoted16_identifier_", "{{ adapter.quote('`quoted16 identifier`') }}"), - ("`QuoTed17 IdenTifiER`", "MySQL", "_quoted17_identifier_", "{{ adapter.quote('_QuoTed17 IdenTifiER_') }}"), - ("`QuoTed18 IdenTifiER`", "MSSQL", "_quoted18_identifier_", "{{ adapter.quote('`QuoTed18 IdenTifiER`') }}"), - ("`QuoTed17 IdenTifiER`", "TiDB", "_quoted17_identifier_", "{{ adapter.quote('_QuoTed17 IdenTifiER_') }}"), - ("`QuoTed19 IdenTifiER`", "DuckDB", "_quoted19_identifier_", "{{ adapter.quote('_QuoTed19 IdenTifiER_') }}"), - ], -) -def test_normalize_name(input_str: str, destination_type: str, expected: str, expected_column: str): - t = DestinationType.from_string(destination_type) - assert DestinationNameTransformer(t).normalize_schema_name(input_str) == expected - assert DestinationNameTransformer(t).normalize_table_name(input_str) == expected - assert DestinationNameTransformer(t).normalize_column_name(input_str) == expected_column diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/test_stream_processor.py b/airbyte-integrations/bases/base-normalization/unit_tests/test_stream_processor.py deleted file mode 100644 index 7251d1bb54c24..0000000000000 --- a/airbyte-integrations/bases/base-normalization/unit_tests/test_stream_processor.py +++ /dev/null @@ -1,105 +0,0 @@ -# -# Copyright (c) 2023 Airbyte, Inc., all rights reserved. -# - - -import os -from typing import List - -import pytest -from airbyte_cdk.models import DestinationSyncMode, SyncMode -from normalization.destination_type import DestinationType -from normalization.transform_catalog.stream_processor import StreamProcessor -from normalization.transform_catalog.table_name_registry import TableNameRegistry - - -@pytest.fixture(scope="function", autouse=True) -def before_tests(request): - # This makes the test run whether it is executed from the tests folder (with pytest/gradle) - # or from the base-normalization folder (through pycharm) - unit_tests_dir = os.path.join(request.fspath.dirname, "unit_tests") - if os.path.exists(unit_tests_dir): - os.chdir(unit_tests_dir) - else: - os.chdir(request.fspath.dirname) - yield - os.chdir(request.config.invocation_dir) - - -@pytest.mark.parametrize( - "cursor_field, expecting_exception, expected_cursor_field", - [ - (None, False, "_airbyte_emitted_at"), - (["updated_at"], False, "updated_at"), - (["_airbyte_emitted_at"], False, "_airbyte_emitted_at"), - (["parent", "nested_field"], True, "nested_field"), - ], -) -def test_cursor_field(cursor_field: List[str], expecting_exception: bool, expected_cursor_field: str): - stream_processor = StreamProcessor.create( - stream_name="test_cursor_field", - destination_type=DestinationType.POSTGRES, - default_schema="default_schema", - raw_schema="raw_schema", - schema="schema_name", - source_sync_mode=SyncMode.incremental, - destination_sync_mode=DestinationSyncMode.append_dedup, - cursor_field=cursor_field, - primary_key=[], - json_column_name="json_column_name", - properties=dict(), - tables_registry=TableNameRegistry(DestinationType.POSTGRES), - from_table="", - ) - try: - assert ( - stream_processor.get_cursor_field(column_names={expected_cursor_field: (expected_cursor_field, "random")}) - == expected_cursor_field - ) - except ValueError as e: - if not expecting_exception: - raise e - - -@pytest.mark.parametrize( - "primary_key, column_type, expecting_exception, expected_primary_keys, expected_final_primary_key_string", - [ - ([["id"]], "string", False, ["id"], "{{ adapter.quote('id') }}"), - ([["id"]], "number", False, ["id"], "cast({{ adapter.quote('id') }} as {{ dbt_utils.type_string() }})"), - ([["first_name"], ["last_name"]], "string", False, ["first_name", "last_name"], "first_name, last_name"), - ([["float_id"]], "number", False, ["float_id"], "cast(float_id as {{ dbt_utils.type_string() }})"), - ([["_airbyte_emitted_at"]], "string", False, [], "cast(_airbyte_emitted_at as {{ dbt_utils.type_string() }})"), - (None, "string", True, [], ""), - ([["parent", "nested_field"]], "string", True, [], ""), - ], -) -def test_primary_key( - primary_key: List[List[str]], - column_type: str, - expecting_exception: bool, - expected_primary_keys: List[str], - expected_final_primary_key_string: str, -): - stream_processor = StreamProcessor.create( - stream_name="test_primary_key", - destination_type=DestinationType.POSTGRES, - raw_schema="raw_schema", - default_schema="default_schema", - schema="schema_name", - source_sync_mode=SyncMode.incremental, - destination_sync_mode=DestinationSyncMode.append_dedup, - cursor_field=[], - primary_key=primary_key, - json_column_name="json_column_name", - properties={key: {"type": column_type} for key in expected_primary_keys}, - tables_registry=TableNameRegistry(DestinationType.POSTGRES), - from_table="", - ) - try: - assert ( - ", ".join(stream_processor.get_primary_key_partition(column_names=stream_processor.extract_column_names())) - == expected_final_primary_key_string - ) - except ValueError as e: - if not expecting_exception: - raise e diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/test_table_name_registry.py b/airbyte-integrations/bases/base-normalization/unit_tests/test_table_name_registry.py deleted file mode 100644 index cd645850f6991..0000000000000 --- a/airbyte-integrations/bases/base-normalization/unit_tests/test_table_name_registry.py +++ /dev/null @@ -1,186 +0,0 @@ -# -# Copyright (c) 2023 Airbyte, Inc., all rights reserved. -# - - -import json -import os -from typing import List - -import pytest -from normalization.destination_type import DestinationType -from normalization.transform_catalog.catalog_processor import CatalogProcessor -from normalization.transform_catalog.destination_name_transformer import DestinationNameTransformer -from normalization.transform_catalog.table_name_registry import TableNameRegistry, get_nested_hashed_table_name - - -@pytest.fixture(scope="function", autouse=True) -def before_tests(request): - # This makes the test run whether it is executed from the tests folder (with pytest/gradle) - # or from the base-normalization folder (through pycharm) - unit_tests_dir = os.path.join(request.fspath.dirname, "unit_tests") - if os.path.exists(unit_tests_dir): - os.chdir(unit_tests_dir) - else: - os.chdir(request.fspath.dirname) - yield - os.chdir(request.config.invocation_dir) - - -@pytest.mark.parametrize( - "catalog_file", - [ - "long_name_truncate_collisions_catalog", # collisions are generated on postgres because of character limits - "un-nesting_collisions_catalog", # collisions between top-level streams and nested ones - "nested_catalog", # sample catalog from facebook - ], -) -@pytest.mark.parametrize("destination_type", DestinationType.testable_destinations()) -def test_resolve_names(destination_type: DestinationType, catalog_file: str): - """ - For a given catalog.json and destination, multiple cases can occur where naming becomes tricky. - (especially since some destination like postgres have a very low limit to identifiers length of 64 characters) - - In case of nested objects/arrays in a stream, names can drag on to very long names. - Tests are built here using resources files as follow: - - `_catalog.json`: - input catalog.json, typically as what source would provide. - For example Hubspot, Stripe and Facebook catalog.json contains some level of nesting. - (here, nested_catalog.json is an extracted smaller sample of stream/properties from the facebook catalog) - - `_expected_names.json`: - list of expected table names - - For the expected json files, it is possible to specialize the file to a certain destination. - So if for example, the resources folder contains these two expected files: - - edge_cases_catalog_expected_names.json - - edge_cases_catalog_expected_postgres_names.json - Then the test will be using the first edge_cases_catalog_expected_names.json except for - Postgres destination where the expected table names will come from edge_cases_catalog_expected_postgres_names.json - - The content of the expected_*.json files are the serialization of the stream_processor.tables_registry.registry - """ - integration_type = destination_type.value - tables_registry = TableNameRegistry(destination_type) - - catalog = read_json(f"resources/{catalog_file}.json") - - # process top level - stream_processors = CatalogProcessor.build_stream_processor( - catalog=catalog, - json_column_name="'json_column_name_test'", - default_schema="schema_test", - name_transformer=DestinationNameTransformer(destination_type), - destination_type=destination_type, - tables_registry=tables_registry, - ) - for stream_processor in stream_processors: - # Check properties - if not stream_processor.properties: - raise EOFError("Invalid Catalog: Unexpected empty properties in catalog") - stream_processor.collect_table_names() - for conflict in tables_registry.resolve_names(): - print( - f"WARN: Resolving conflict: {conflict.schema}.{conflict.table_name_conflict} " - f"from '{'.'.join(conflict.json_path)}' into {conflict.table_name_resolved}" - ) - apply_function = identity - if DestinationType.SNOWFLAKE.value == destination_type.value: - apply_function = str.upper - elif DestinationType.REDSHIFT.value == destination_type.value: - apply_function = str.lower - if os.path.exists(f"resources/{catalog_file}_expected_{integration_type.lower()}_names.json"): - expected_names = read_json(f"resources/{catalog_file}_expected_{integration_type.lower()}_names.json", apply_function) - else: - expected_names = read_json(f"resources/{catalog_file}_expected_names.json", apply_function) - - assert tables_registry.to_dict(apply_function) == expected_names - - -def identity(x): - return x - - -def read_json(input_path: str, apply_function=(lambda x: x)): - with open(input_path, "r") as file: - contents = file.read() - if apply_function: - contents = apply_function(contents) - return json.loads(contents) - - -# This test is not intended to be exhaustive over the destinations, -# so it's not mandatory to add new destination expected field here. -# The intent here is to unit test simple_name vs nested_hashed_name -# functions in the table_name_registry. There are other tests that -# automatically test naming against all destinations whenever it is -# added to the enum. -@pytest.mark.parametrize( - "json_path, expected_postgres, expected_bigquery", - [ - ( - ["parent", "child"], - "parent_child", - "parent_child", - ), - ( - ["The parent stream has a nested column with a", "short_substream_name"], - "the_parent_stream_ha___short_substream_name", - "The_parent_stream_has_a_nested_column_with_a_short_substream_name", - ), - ( - ["The parent stream has a nested column with a", "substream with a rather long name"], - "the_parent_stream_ha__th_a_rather_long_name", - "The_parent_stream_has_a_nested_column_with_a_substream_with_a_rather_long_name", - ), - ], -) -def test_get_simple_table_name(json_path: List[str], expected_postgres: str, expected_bigquery: str): - """ - Checks how to generate a simple and easy to understand name from a json path - """ - postgres_registry = TableNameRegistry(DestinationType.POSTGRES) - actual_postgres_name = postgres_registry.get_simple_table_name(json_path) - assert actual_postgres_name == expected_postgres - assert len(actual_postgres_name) <= 43 # explicitly check for our max postgres length in case tests are changed in the future - - bigquery_registry = TableNameRegistry(DestinationType.BIGQUERY) - actual_bigquery_name = bigquery_registry.get_simple_table_name(json_path) - assert actual_bigquery_name == expected_bigquery - - -@pytest.mark.parametrize( - "json_path, expected_postgres, expected_bigquery", - [ - ( - ["parent", "child"], - "parent_30c_child", - "parent_30c_child", - ), - ( - ["The parent stream has a nested column with a", "short_substream_name"], - "the_parent_stream__cd9_short_substream_name", - "The_parent_stream_has_a_nested_column_with_a_cd9_short_substream_name", - ), - ( - ["The parent stream has a nested column with a", "substream with a rather long name"], - "the_parent_0a5_substream_wi__her_long_name", - "The_parent_stream_has_a_nested_column_with_a_0a5_substream_with_a_rather_long_name", - ), - ], -) -def test_get_nested_hashed_table_name(json_path: List[str], expected_postgres: str, expected_bigquery: str): - """ - Checks how to generate a unique name with strategies of combining all fields into a single table name for the user to (somehow) - identify and recognize what data is available in there. - A set of complicated rules are done in order to choose what parts to truncate or what to leave and handle - name collisions. - """ - child = json_path[-1] - postgres_name_transformer = DestinationNameTransformer(DestinationType.POSTGRES) - actual_postgres_name = get_nested_hashed_table_name(postgres_name_transformer, "schema", json_path, child) - assert actual_postgres_name == expected_postgres - assert len(actual_postgres_name) <= 43 # explicitly check for our max postgres length in case tests are changed in the future - - bigquery_name_transformer = DestinationNameTransformer(DestinationType.BIGQUERY) - actual_bigquery_name = get_nested_hashed_table_name(bigquery_name_transformer, "schema", json_path, child) - assert actual_bigquery_name == expected_bigquery diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/test_transform_config.py b/airbyte-integrations/bases/base-normalization/unit_tests/test_transform_config.py deleted file mode 100644 index 2c3fc60f7ea3e..0000000000000 --- a/airbyte-integrations/bases/base-normalization/unit_tests/test_transform_config.py +++ /dev/null @@ -1,595 +0,0 @@ -# -# Copyright (c) 2023 Airbyte, Inc., all rights reserved. -# - - -import json -import os -import socket -import tempfile -import time - -import pytest -from normalization.destination_type import DestinationType -from normalization.transform_catalog.transform import extract_path, extract_schema -from normalization.transform_config.transform import TransformConfig - - -class TestTransformConfig: - """ - This class is testing the transform config functionality that converts a destination_config.json into the adequate profiles.yml file for dbt to use - """ - - @pytest.fixture(scope="class", autouse=True) - def before_all_tests(self, request): - # This makes the test run whether it is executed from the tests folder (with pytest/gradle) - # or from the base-normalization folder (through pycharm) - unit_tests_dir = os.path.join(request.fspath.dirname, "unit_tests") - if os.path.exists(unit_tests_dir): - os.chdir(unit_tests_dir) - else: - os.chdir(request.fspath.dirname) - yield - os.chdir(request.config.invocation_dir) - - def test_is_ssh_tunnelling(self): - def single_test(config, expected_output): - assert TransformConfig.is_ssh_tunnelling(config) == expected_output - - inputs = [ - ({}, False), - ( - { - "type": "postgres", - "dbname": "my_db", - "host": "airbyte.io", - "pass": "password123", - "port": 5432, - "schema": "public", - "threads": 32, - "user": "a user", - }, - False, - ), - ( - { - "type": "postgres", - "dbname": "my_db", - "host": "airbyte.io", - "pass": "password123", - "port": 5432, - "schema": "public", - "threads": 32, - "user": "a user", - "tunnel_method": { - "tunnel_host": "1.2.3.4", - "tunnel_method": "SSH_PASSWORD_AUTH", - "tunnel_port": 22, - "tunnel_user": "user", - "tunnel_user_password": "pass", - }, - }, - True, - ), - ( - { - "type": "postgres", - "dbname": "my_db", - "host": "airbyte.io", - "pass": "password123", - "port": 5432, - "schema": "public", - "threads": 32, - "user": "a user", - "tunnel_method": { - "tunnel_method": "SSH_KEY_AUTH", - }, - }, - True, - ), - ( - { - "type": "postgres", - "dbname": "my_db", - "host": "airbyte.io", - "pass": "password123", - "port": 5432, - "schema": "public", - "threads": 32, - "user": "a user", - "tunnel_method": { - "nothing": "nothing", - }, - }, - False, - ), - ] - for input_tuple in inputs: - single_test(input_tuple[0], input_tuple[1]) - - def test_is_port_free(self): - # to test that this accurately identifies 'free' ports, we'll find a 'free' port and then try to use it - test_port = 13055 - while not TransformConfig.is_port_free(test_port): - test_port += 1 - if test_port > 65535: - raise RuntimeError("couldn't find a free port...") - - with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: - s.bind(("localhost", test_port)) - # if we haven't failed then we accurately identified a 'free' port. - # now we can test for accurate identification of 'in-use' port since we're using it - assert TransformConfig.is_port_free(test_port) is False - - # and just for good measure now that our context manager is closed (and port open again) - time.sleep(1) - assert TransformConfig.is_port_free(test_port) is True - - def test_pick_a_port(self): - supposedly_open_port = TransformConfig.pick_a_port() - with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: - s.bind(("localhost", supposedly_open_port)) - - def test_transform_bigquery(self): - input = { - "project_id": "my_project_id", - "dataset_id": "my_dataset_id", - "credentials_json": '{ "type": "service_account-json" }', - "transformation_priority": "interactive", - "dataset_location": "EU", - } - - actual_output = TransformConfig().transform_bigquery(input) - expected_output = { - "type": "bigquery", - "method": "service-account-json", - "project": "my_project_id", - "dataset": "my_dataset_id", - "priority": "interactive", - "keyfile_json": {"type": "service_account-json"}, - "location": "EU", - "retries": 3, - "threads": 8, - } - - actual_keyfile = actual_output["keyfile_json"] - expected_keyfile = {"type": "service_account-json"} - assert actual_output == expected_output - assert actual_keyfile == expected_keyfile - assert extract_schema(actual_output) == "my_dataset_id" - - def test_transform_bigquery_no_credentials(self): - input = {"project_id": "my_project_id", "dataset_id": "my_dataset_id"} - - actual_output = TransformConfig().transform_bigquery(input) - expected_output = { - "type": "bigquery", - "method": "oauth", - "project": "my_project_id", - "dataset": "my_dataset_id", - "priority": "interactive", - "retries": 3, - "threads": 8, - } - - assert actual_output == expected_output - assert extract_schema(actual_output) == "my_dataset_id" - - def test_transform_bigquery_with_embedded_project_id(self): - input = {"project_id": "my_project_id", "dataset_id": "my_project_id:my_dataset_id"} - - actual_output = TransformConfig().transform_bigquery(input) - expected_output = { - "type": "bigquery", - "method": "oauth", - "project": "my_project_id", - "dataset": "my_dataset_id", - "priority": "interactive", - "retries": 3, - "threads": 8, - } - - assert actual_output == expected_output - assert extract_schema(actual_output) == "my_dataset_id" - - def test_transform_bigquery_with_embedded_mismatched_project_id(self): - input = {"project_id": "my_project_id", "dataset_id": "bad_project_id:my_dataset_id"} - - try: - TransformConfig().transform_bigquery(input) - assert False, "transform_bigquery should have raised an exception" - except ValueError: - pass - - def test_transform_bigquery_with_invalid_format(self): - input = {"project_id": "my_project_id", "dataset_id": "foo:bar:baz"} - - try: - TransformConfig().transform_bigquery(input) - assert False, "transform_bigquery should have raised an exception" - except ValueError: - pass - - def test_transform_postgres(self): - input = { - "host": "airbyte.io", - "port": 5432, - "username": "a user", - "password": "password123", - "database": "my_db", - "schema": "public", - } - - actual = TransformConfig().transform_postgres(input) - expected = { - "type": "postgres", - "dbname": "my_db", - "host": "airbyte.io", - "pass": "password123", - "port": 5432, - "schema": "public", - "threads": 8, - "user": "a user", - } - - assert actual == expected - assert extract_schema(actual) == "public" - - def test_transform_postgres_ssh(self): - input = { - "host": "airbyte.io", - "port": 5432, - "username": "a user", - "password": "password123", - "database": "my_db", - "schema": "public", - "tunnel_method": { - "tunnel_host": "1.2.3.4", - "tunnel_method": "SSH_PASSWORD_AUTH", - "tunnel_port": 22, - "tunnel_user": "user", - "tunnel_user_password": "pass", - }, - } - port = TransformConfig.pick_a_port() - - actual = TransformConfig().transform_postgres(input) - expected = { - "type": "postgres", - "dbname": "my_db", - "host": "localhost", - "pass": "password123", - "port": port, - "schema": "public", - "threads": 8, - "user": "a user", - } - - assert actual == expected - assert extract_schema(actual) == "public" - - def test_transform_snowflake(self): - input = { - "host": "http://123abc.us-east-7.aws.snowflakecomputing.com", - "role": "AIRBYTE_ROLE", - "warehouse": "AIRBYTE_WAREHOUSE", - "database": "AIRBYTE_DATABASE", - "schema": "AIRBYTE_SCHEMA", - "username": "AIRBYTE_USER", - "password": "password123", - } - - actual = TransformConfig().transform_snowflake(input) - expected = { - "account": "123abc.us-east-7.aws", - "client_session_keep_alive": False, - "database": "AIRBYTE_DATABASE", - "password": "password123", - "query_tag": "normalization", - "role": "AIRBYTE_ROLE", - "schema": "AIRBYTE_SCHEMA", - "threads": 5, - "retry_all": True, - "retry_on_database_errors": True, - "connect_retries": 3, - "connect_timeout": 15, - "type": "snowflake", - "user": "AIRBYTE_USER", - "warehouse": "AIRBYTE_WAREHOUSE", - } - - assert actual == expected - assert extract_schema(actual) == "AIRBYTE_SCHEMA" - - def test_transform_snowflake_oauth(self): - - input = { - "host": "http://123abc.us-east-7.aws.snowflakecomputing.com", - "role": "AIRBYTE_ROLE", - "warehouse": "AIRBYTE_WAREHOUSE", - "database": "AIRBYTE_DATABASE", - "schema": "AIRBYTE_SCHEMA", - "username": "AIRBYTE_USER", - "credentials": { - "auth_type": "OAuth2.0", - "client_id": "AIRBYTE_CLIENT_ID", - "access_token": "AIRBYTE_ACCESS_TOKEN", - "client_secret": "AIRBYTE_CLIENT_SECRET", - "refresh_token": "AIRBYTE_REFRESH_TOKEN", - }, - } - - actual = TransformConfig().transform_snowflake(input) - expected = { - "account": "123abc.us-east-7.aws", - "client_session_keep_alive": False, - "database": "AIRBYTE_DATABASE", - "query_tag": "normalization", - "role": "AIRBYTE_ROLE", - "schema": "AIRBYTE_SCHEMA", - "threads": 5, - "retry_all": True, - "retry_on_database_errors": True, - "connect_retries": 3, - "connect_timeout": 15, - "type": "snowflake", - "user": "AIRBYTE_USER", - "warehouse": "AIRBYTE_WAREHOUSE", - "authenticator": "oauth", - "oauth_client_id": "AIRBYTE_CLIENT_ID", - "oauth_client_secret": "AIRBYTE_CLIENT_SECRET", - "token": "AIRBYTE_REFRESH_TOKEN", - } - - assert actual == expected - assert extract_schema(actual) == "AIRBYTE_SCHEMA" - - def test_transform_snowflake_key_pair(self): - - input = { - "host": "http://123abc.us-east-7.aws.snowflakecomputing.com", - "role": "AIRBYTE_ROLE", - "warehouse": "AIRBYTE_WAREHOUSE", - "database": "AIRBYTE_DATABASE", - "schema": "AIRBYTE_SCHEMA", - "username": "AIRBYTE_USER", - "credentials": { - "private_key": "AIRBYTE_PRIVATE_KEY", - "private_key_password": "AIRBYTE_PRIVATE_KEY_PASSWORD", - }, - } - - actual = TransformConfig().transform_snowflake(input) - expected = { - "account": "123abc.us-east-7.aws", - "client_session_keep_alive": False, - "database": "AIRBYTE_DATABASE", - "query_tag": "normalization", - "role": "AIRBYTE_ROLE", - "schema": "AIRBYTE_SCHEMA", - "threads": 5, - "retry_all": True, - "retry_on_database_errors": True, - "connect_retries": 3, - "connect_timeout": 15, - "type": "snowflake", - "user": "AIRBYTE_USER", - "warehouse": "AIRBYTE_WAREHOUSE", - "private_key_path": "private_key_path.txt", - "private_key_passphrase": "AIRBYTE_PRIVATE_KEY_PASSWORD", - } - - assert actual == expected - assert extract_schema(actual) == "AIRBYTE_SCHEMA" - - def test_transform_mysql(self): - input = { - "type": "mysql5", - "host": "airbyte.io", - "port": 5432, - "database": "my_db", - "schema": "public", - "username": "a user", - "password": "password1234", - } - - actual = TransformConfig().transform_mysql(input) - expected = { - "type": "mysql5", - "server": "airbyte.io", - "port": 5432, - "schema": "my_db", - "database": "my_db", - "username": "a user", - "password": "password1234", - } - - assert actual == expected - # DBT schema is equivalent to MySQL database - assert extract_schema(actual) == "my_db" - - def test_transform_mssql(self): - input = { - "type": "sqlserver", - "host": "airbyte.io", - "port": 1433, - "database": "my_db", - "schema": "my_db", - "username": "SA", - "password": "password1234", - } - - actual = TransformConfig().transform_mysql(input) - expected = { - "type": "sqlserver", - "server": "airbyte.io", - "port": 1433, - "schema": "my_db", - "database": "my_db", - "username": "SA", - "password": "password1234", - } - - assert actual == expected - # DBT schema is equivalent to MySQL database - assert extract_schema(actual) == "my_db" - - def test_transform_clickhouse(self): - input = {"host": "airbyte.io", "port": 9440, "database": "default", "username": "ch", "password": "password1234", "ssl": True} - - actual = TransformConfig().transform_clickhouse(input) - expected = { - "type": "clickhouse", - "driver": "http", - "verify": False, - "host": "airbyte.io", - "port": 9440, - "schema": "default", - "user": "ch", - "password": "password1234", - "secure": True, - } - - assert actual == expected - assert extract_schema(actual) == "default" - - # test that the full config is produced. this overlaps slightly with the transform_postgres test. - def test_transform(self): - input = { - "host": "airbyte.io", - "port": 5432, - "username": "a user", - "password": "password123", - "database": "my_db", - "schema": "public", - } - - expected = self.get_base_config() - expected["normalize"]["outputs"]["prod"] = { - "type": "postgres", - "dbname": "my_db", - "host": "airbyte.io", - "pass": "password123", - "port": 5432, - "schema": "public", - "threads": 8, - "user": "a user", - } - actual = TransformConfig().transform(DestinationType.POSTGRES, input) - - assert actual == expected - assert extract_schema(actual["normalize"]["outputs"]["prod"]) == "public" - - def test_transform_tidb(self): - input = { - "type": "tidb", - "host": "airbyte.io", - "port": 5432, - "database": "ti_db", - "schema": "public", - "username": "a user", - "password": "password1234", - } - - actual = TransformConfig().transform_tidb(input) - expected = { - "type": "tidb", - "server": "airbyte.io", - "port": 5432, - "schema": "ti_db", - "database": "ti_db", - "username": "a user", - "password": "password1234", - } - - assert actual == expected - assert extract_schema(actual) == "ti_db" - - def test_transform_duckdb_schema(self): - input = { - "type": "duckdb", - "destination_path": "/local/testing.duckdb", - "schema": "quackqauck", - } - - actual = TransformConfig().transform_duckdb(input) - expected = { - "type": "duckdb", - "path": "/local/testing.duckdb", - "schema": "quackqauck", - } - - assert actual == expected - assert extract_path(actual) == "/local/testing.duckdb" - - def test_transform_duckdb_no_schema(self): - input = { - "type": "duckdb", - "destination_path": "/local/testing.duckdb", - } - - actual = TransformConfig().transform_duckdb(input) - expected = { - "type": "duckdb", - "path": "/local/testing.duckdb", - "schema": "main", - } - - assert actual == expected - assert extract_path(actual) == "/local/testing.duckdb" - - def get_base_config(self): - return { - "config": { - "partial_parse": True, - "printer_width": 120, - "send_anonymous_usage_stats": False, - "use_colors": True, - }, - "normalize": {"target": "prod", "outputs": {"prod": {}}}, - } - - def test_parse(self): - t = TransformConfig() - assert {"integration_type": DestinationType.POSTGRES, "config": "config.json", "output_path": "out.yml"} == t.parse( - ["--integration-type", "postgres", "--config", "config.json", "--out", "out.yml"] - ) - - def test_write_ssh_config(self): - original_config_input = { - "type": "postgres", - "dbname": "my_db", - "host": "airbyte.io", - "pass": "password123", - "port": 5432, - "schema": "public", - "threads": 32, - "user": "a user", - "tunnel_method": { - "tunnel_host": "1.2.3.4", - "tunnel_method": "SSH_PASSWORD_AUTH", - "tunnel_port": 22, - "tunnel_user": "user", - "tunnel_user_password": "pass", - }, - } - transformed_config_input = self.get_base_config() - transformed_config_input["normalize"]["outputs"]["prod"] = { - "port": 7890, - } - expected = { - "db_host": "airbyte.io", - "db_port": 5432, - "tunnel_map": { - "tunnel_host": "1.2.3.4", - "tunnel_method": "SSH_PASSWORD_AUTH", - "tunnel_port": 22, - "tunnel_user": "user", - "tunnel_user_password": "pass", - }, - "local_port": 7890, - } - tmp_path = tempfile.TemporaryDirectory().name - TransformConfig.write_ssh_config(tmp_path, original_config_input, transformed_config_input) - with open(os.path.join(tmp_path, "ssh.json"), "r") as f: - assert json.load(f) == expected diff --git a/airbyte-integrations/bases/base/.dockerignore b/airbyte-integrations/bases/base/.dockerignore deleted file mode 100644 index 378eac25d3117..0000000000000 --- a/airbyte-integrations/bases/base/.dockerignore +++ /dev/null @@ -1 +0,0 @@ -build diff --git a/airbyte-integrations/bases/base/Dockerfile b/airbyte-integrations/bases/base/Dockerfile deleted file mode 100644 index e03cdca90fc9a..0000000000000 --- a/airbyte-integrations/bases/base/Dockerfile +++ /dev/null @@ -1,19 +0,0 @@ -### WARNING ### -# The Java connector Dockerfiles will soon be deprecated. -# This Dockerfile is not used to build the connector image we publish to DockerHub. -# The new logic to build the connector image is declared with Dagger here: -# https://github.com/airbytehq/airbyte/blob/master/tools/ci_connector_ops/ci_connector_ops/pipelines/actions/environments.py#L649 - -# If you need to add a custom logic to build your connector image, you can do it by adding a finalize_build.sh or finalize_build.py script in the connector folder. -# Please reach out to the Connectors Operations team if you have any question. -FROM amazonlinux:2022.0.20220831.1 - -WORKDIR /airbyte - -COPY base.sh . - -ENV AIRBYTE_ENTRYPOINT "/airbyte/base.sh" -ENTRYPOINT ["/airbyte/base.sh"] - -LABEL io.airbyte.version=0.1.0 -LABEL io.airbyte.name=airbyte/integration-base diff --git a/airbyte-integrations/bases/base/base.sh b/airbyte-integrations/bases/base/base.sh deleted file mode 100755 index b72ad35e18eb0..0000000000000 --- a/airbyte-integrations/bases/base/base.sh +++ /dev/null @@ -1,64 +0,0 @@ -#!/usr/bin/env bash - -set -e - -function echo2() { - echo >&2 "$@" -} - -function error() { - echo2 "$@" - exit 1 -} - -# todo: make it easy to select source or destination and validate based on selection by adding an integration type env variable. -function main() { - CMD="$1" - shift 1 || error "command not specified." - - ARGS= - while [ $# -ne 0 ]; do - case "$1" in - --config) - CONFIG_FILE="$2" - shift 2 - ;; - --catalog) - CATALOG_FILE="$2" - shift 2 - ;; - --state) - STATE_FILE="$2" - shift 2 - ;; - *) - error "Unknown option: $1" - ;; - esac - done - - case "$CMD" in - spec) - eval "$AIRBYTE_SPEC_CMD" - ;; - check) - eval "$AIRBYTE_CHECK_CMD" --config "$CONFIG_FILE" - ;; - discover) - eval "$AIRBYTE_DISCOVER_CMD" --config "$CONFIG_FILE" - ;; - read) - READ_STATEMENT="$AIRBYTE_READ_CMD --config $CONFIG_FILE --catalog $CATALOG_FILE" - if [[ ! -z "$STATE_FILE" ]]; then READ_STATEMENT="$READ_STATEMENT --state $STATE_FILE"; fi - eval "$READ_STATEMENT" - ;; - write) - eval "$AIRBYTE_WRITE_CMD" --config "$CONFIG_FILE" --catalog "$CATALOG_FILE" - ;; - *) - error "Unknown command: $CMD" - ;; - esac -} - -main "$@" diff --git a/airbyte-integrations/bases/base/build.gradle b/airbyte-integrations/bases/base/build.gradle deleted file mode 100644 index 0c2de175e2cc9..0000000000000 --- a/airbyte-integrations/bases/base/build.gradle +++ /dev/null @@ -1,3 +0,0 @@ -plugins { - id 'airbyte-docker-legacy' -} diff --git a/airbyte-integrations/scripts/data-lowcode-connectors.sh b/airbyte-integrations/scripts/data-lowcode-connectors.sh deleted file mode 100755 index dca8891163125..0000000000000 --- a/airbyte-integrations/scripts/data-lowcode-connectors.sh +++ /dev/null @@ -1,81 +0,0 @@ -#!/bin/bash - -set -e - -if [[ `git status --porcelain` ]]; then - # everything is not up to date! - echo "" - echo "ERROR: There are changes left to commit!" - echo "" - exit 1 -fi - -BRANCH_NAME="$(git symbolic-ref HEAD 2>/dev/null)" || -BRANCH_NAME="(unnamed branch)" # detached HEAD -BRANCH_NAME=${BRANCH_NAME##refs/heads/} - -OUTPUT_FILE="num_lowcode_connectors.csv" -echo "date,num_lowcode_connectors,num_python_connectors" > $OUTPUT_FILE - -# get every date between sep 1 and today (so we can keep consistent results when generating this sheet) -dates=$(python << EOM -from datetime import date, timedelta - -start_date = date(2022, 10, 1) -end_date = date.today() -delta = timedelta(days=1) -results = [] -while start_date <= end_date: - results.append(start_date.strftime("%Y-%m-%d")) - start_date += delta - -print(" ".join(results)) -EOM -) - -for d in $dates -do -git checkout $(git rev-list -n 1 --first-parent --before="$d" master) - -# count how many lowcode connectors there are - -num_lowcode=$(python << EOM -import os - -connectors = [f.path for f in os.scandir("airbyte-integrations/connectors/") if f.is_dir()] -declarative_connectors = [] -num_python_connectors = 0 -connectors_file = "lowcode_connector_names.txt" -open(connectors_file, "w").write("") -for full_path in connectors: - files = os.listdir(full_path) - connector_name = full_path.split("/")[-1] - # clear the file so the last day is the only one that writes to it - python_files = [x for x in files if ".py" in x] - if len(python_files) > 0: - sourcepy_dir = f"{full_path}/{connector_name.replace('-','_')}/source.py" - try: - sourcepy = open(sourcepy_dir, "r").read() - if "declarative YAML" in sourcepy: - declarative_connectors.append(full_path) - open(connectors_file, "a").write(connector_name + "\n") - else: - num_python_connectors += 1 - except FileNotFoundError: - pass - #print(f"Couldn't find a source.py in {sourcepy_dir}. Skipping.") -print(f"{len(declarative_connectors)},{num_python_connectors}") -EOM -) - -# print with date -echo $d,$num_lowcode >> $OUTPUT_FILE -done - - - -git checkout $BRANCH_NAME -git checkout -- . - -#uncomment to upload to GCS -#gcloud storage cp num_lowcode_connectors.csv gs://sherif-airbyte-metabase-backing-bucket/ \ No newline at end of file diff --git a/airbyte-integrations/scripts/utils.sh b/airbyte-integrations/scripts/utils.sh deleted file mode 100644 index 3c8d1bb0fdf7b..0000000000000 --- a/airbyte-integrations/scripts/utils.sh +++ /dev/null @@ -1,26 +0,0 @@ -die () { - echo "$1" 1>&2 - exit 1 -} - -readlink_f () { - # https://stackoverflow.com/a/1116890 - TARGET_FILE=$1 - - cd "$(dirname $TARGET_FILE)" - TARGET_FILE="$(basename $TARGET_FILE)" - - # Iterate down a (possible) chain of symlinks - while [ -L "$TARGET_FILE" ] - do - TARGET_FILE="$(readlink $TARGET_FILE)" - cd "$(dirname $TARGET_FILE)" - TARGET_FILE="$(basename $TARGET_FILE)" - done - - # Compute the canonicalized name by finding the physical path - # for the directory we're in and appending the target file. - PHYS_DIR="$(pwd -P)" - RESULT="$PHYS_DIR/$TARGET_FILE" - echo "$RESULT" -}